from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self): HTMLParser.__init__(self) self.flag = False self.h3_flag = False def _attrs(self, attrlist, attrname): for attr in attrlist: if attr[0] == attrname: return attr[1] return None def handle_starttag(self, tag, attrs): #print(attrs) if tag == 'h3' and self._attrs(attrs, 'class') =="event-title": self.h3_flag = True if tag == 'time': print("会议时间:") self.flag = True if tag == 'span' and self._attrs(attrs, 'class') == 'event-location': print("会议地点:") self.flag = True def handle_endtag(self, tag): self.flag = False self.h3_flag = False #print('</%s>' % tag) pass def handle_startendtag(self, tag, attrs): #print('<%s/>' % tag) pass def handle_data(self, data): if self.h3_flag == True and self.lasttag == 'a': print('会议名称:', data) if self.flag == True: print(data) def handle_comment(self, data): #print('<!--', data, '-->') pass def handle_entityref(self, name): print('&%s:' % name) def handle_charref(self, name): print('&#%s:' % name)
parser = MyHTMLParser() with open('html.txt', 'r', encoding='utf-8') as f: parser.feed(f.read())
光靠教程里的那些内容,做题实在是太难了…… o(╥﹏╥)o
做这个题你还看了哪些东西啊?感觉看这个教程做题很困难啊
Sign in to make a reply
阿萌QVQ
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
parser = MyHTMLParser() with open('html.txt', 'r', encoding='utf-8') as f: parser.feed(f.read())