from html.parser import HTMLParser from urllib import request class MyHTMLParser(HTMLParser): def __init__(self): super().__init__() self.flag = 0 # 状态 1:目标标签 0:不是目标标签 def handle_starttag(self, tag, attrs): if tag == 'h3' and ('class', 'event-title') in attrs: # 筛选会议名称 self.flag = 1 elif tag == 'time' and 'datetime' in attrs[0]: # 筛选会议时间 self.flag = 1 elif tag == 'span' and ('class', 'event-location') in attrs: # 筛选会议地点 self.flag = 1 def handle_data(self, data): if self.flag: print(data) self.flag = 0 # 还原状态 with request.urlopen('https://www.python.org/events/python-events/') as f: data = f.read().decode('utf-8') parser = MyHTMLParser() parser.feed(data)
Sign in to make a reply
tortoise__knight