练习
Topic sourcefrom html.parser import HTMLParser
from html.entities import name2codepoint
class MyHTMLParser(HTMLParser):
def __init__(self):
super(MyHTMLParser, self).__init__()
self.Events={}
self._tag=''
self._counter=0
def handle_starttag(self, tag, attrs):
if tag=='h3'and attrs and attrs[0][0]=='class'and attrs[0][1]=='event-title':
self._tag='title'
#print('<%s>' % attrs)
if tag=='time'and attrs and attrs[0][0]=='datetime':
self._tag = 'datetime'
#print('<%s>' % attrs)
if tag == 'span'and attrs and attrs[0][0]=='class'and attrs[0][1]=='event-location':
self._tag = 'location'
#print('<%s>' % attrs)
def handle_data(self, data):
if self._tag=='title':
self.Events[self._counter]={'title':data.strip("\n")}
if self._tag=='datetime':
self.Events[self._counter]['time'] = data.strip("\n")
if self._tag=='location':
self.Events[self._counter]['location'] = data.strip("\n")
self._counter += 1
self._tag=''
def printEvents(self):
for k in self.Events:
print("title:%s Time: %s Loaction:%s" % (
self.Events[k]['title'], self.Events[k]['time'], self.Events[k]['location']))
- 1
影子钧
这是目前想到比较简单直接的办法: