from html.parser import HTMLParser from html.entities import name2codepoint class MyHTMLParser(HTMLParser): def handle_starttag(self,tagname,attrs): self.tagname,self.attrs=tagname,attrs pass def handle_endtag(self,name): self.tagname,self.attrs=None,None def handle_data(self,text): try: if self.tagname=='a' and self.attrs[0][1].startswith('/events/python-events/') and self.attrs[0][1].endswith('/past/')==False: global count event.append([]) event[-1].append(text) elif self.tagname=='time' and self.attrs[0][0]=='datetime': event[-1].append(text) elif self.tagname=='span' and self.attrs[0][1]=='event-location': event[-1].append(text) except: pass def handle_comment(self,text): pass def handle_startendtag(self,tagname,attrs): pass def handle_charref(self,name): pass def handle_entityref(self,name): pass parser=MyHTMLParser() event=[] html=r''' <html></html> ''' parser.feed(html) print(event)
Sign in to make a reply
儒生脱尘