from html.parser import HTMLParser from html.entities import name2codepoint
class MyHTMLParser(HTMLParser):
in_title = False in_loca = False in_time = False def handle_starttag(self,tag,attrs): if ('class','event-title') in attrs: self.in_title = True elif ('class','event-location') in attrs: self.in_loca = True elif tag == 'time': self.in_time = True self.times = [] def handle_data(self,data): if self.in_title: print('-'*50) print('Title:'+data.strip()) if self.in_loca: print('Location:'+data.strip()) if self.in_time: self.times.append(data) def handle_endtag(self,tag): if tag == 'h3':self.in_title = False if tag == 'span':self.in_loca = False if tag == 'time': self.in_time = False print('Time:'+'-'.join(self.times))
parser = MyHTMLParser() with open('index.html') as html: parser.feed(html.read())
想问一下,你这个得出了结果吗?为什么我复制过来后运行了一下,什么都没打印出来,当然,也没有报错。
后来我把所给网页的一小部分所需要的程序合成一个html再试的时候就可以成功打印了。
感谢
Sign in to make a reply
乔帮主Zzz
from html.parser import HTMLParser from html.entities import name2codepoint
class MyHTMLParser(HTMLParser):
parser = MyHTMLParser() with open('index.html') as html: parser.feed(html.read())