Discuss / Python / 练习

练习

Topic source
from html.parser import HTMLParser
from urllib import request

class MyHTMLParser(HTMLParser):

    def __init__(self):
        super().__init__()
        self.flag = 0  # 状态 1:目标标签 0:不是目标标签

    def handle_starttag(self, tag, attrs):
        if tag == 'h3' and ('class', 'event-title') in attrs:  # 筛选会议名称
            self.flag = 1
        elif tag == 'time' and 'datetime' in attrs[0]:  # 筛选会议时间
            self.flag = 1
        elif tag == 'span' and ('class', 'event-location') in attrs:  # 筛选会议地点
            self.flag = 1

    def handle_data(self, data):
        if self.flag:
            print(data)
            self.flag = 0  # 还原状态


with request.urlopen('https://www.python.org/events/python-events/') as f:
    data = f.read().decode('utf-8')

parser = MyHTMLParser()
parser.feed(data)

  • 1

Reply