from html.parser import HTMLParser
from html.entities import name2codepoint
Stag = []
Satt = {}
events = []
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
Stag.append(tag)
Satt[tag] = attrs
if len(Stag) > 2:
if Stag[0] in Satt:
Satt.pop(Stag[0])
Stag.pop(0)
def handle_endtag(self, tag):
if len(Stag) > 1:
if Stag[1] == tag:
if Stag[1] in Satt:
Satt.pop(Stag[1])
Stag[1] = ' '
def handle_data(self, data):
if len(Stag) > 1:
if Stag[0] == 'h3' and Stag[1] == 'a' and ('class', 'event-title') in Satt[Stag[0]]:
events.append([data.strip()])
if Stag[0] == 'p' and Stag[1] == 'time':
events[len(events)-1].append(data.strip())
if Stag[1] == 'span' and ('class', 'event-location') in Satt[Stag[1]]:
events[len(events)-1].append(data.strip())
parser = MyHTMLParser()
parser.feed(Htmldata)
n = 0
print('----重要事件----', len(events), '件')
while n < len(events):
print(events[n])
n = n + 1
一雷叔一
from html.parser import HTMLParser from html.entities import name2codepoint
Stag = [] Satt = {} events = []
class MyHTMLParser(HTMLParser):
parser = MyHTMLParser() parser.feed(Htmldata)
n = 0 print('----重要事件----', len(events), '件') while n < len(events): print(events[n]) n = n + 1