from urllib import request from html.parser import HTMLParser url = 'https://www.python.org/events/python-events/' data = request.urlopen(url).read().decode('utf-8') class MyHTMLParser(HTMLParser): def __init__(self): super().__init__() self.flag = 0 self.count = 0 def handle_starttag(self, tag, attrs): if tag == 'h3' and ('class', 'event-title') in attrs: self.flag = 1 self.count += 1 print('Event No: %s' %self.count) if tag == 'time' and 'datetime' in attrs[0]: self.flag = 2 if tag == 'span' and ('class', 'event-location') in attrs: self.flag = 3 def handle_data(self, data): if self.flag == 1: print('Event Name: %s' % data) self.flag = 0 if self.flag == 2: print('Event Time: %s' % data) self.flag = 0 if self.flag == 3: print('Event Location: %s\n' % data) self.flag = 0 MyHTMLParser().feed(data)
Sign in to make a reply
采蘑菇的lucas_688