Discuss / Python / 练习

练习

Topic source

影子钧

#1 Created at ... [Delete] [Delete and Lock User]

这是目前想到比较简单直接的办法:

class MyHTMLParser(HTMLParser):
    def __init__(self):
        super(MyHTMLParser, self).__init__()
        self._tag = None
        self.info = {'time': [], 'location': [], 'event': []}

    def handle_starttag(self, tag, attrs):

        try:
            if attrs[0][0] == 'datetime':
                self._tag = 'time'
            elif attrs[0][1] == 'event-location':
                self._tag = 'location'
            elif 'events' in attrs[0][1]:
                self._tag = 'event'
            else:
                self._tag = None
        except IndexError:
            pass

    def handle_data(self, data):
        data = data.strip().strip('\n')
        if self._tag != None and data != '':
            self.info[self._tag].append(data)

都在流浪

#2 Created at ... [Delete] [Delete and Lock User]
from html.parser import HTMLParser
from html.entities import name2codepoint

class MyHTMLParser(HTMLParser):
    def __init__(self):
        super(MyHTMLParser, self).__init__()
        self.Events={}
        self._tag=''
        self._counter=0

    def handle_starttag(self, tag, attrs):
        if tag=='h3'and attrs and attrs[0][0]=='class'and  attrs[0][1]=='event-title':
            self._tag='title'
            #print('<%s>' % attrs)
        if tag=='time'and attrs and attrs[0][0]=='datetime':
            self._tag = 'datetime'
            #print('<%s>' % attrs)
        if tag == 'span'and attrs and attrs[0][0]=='class'and  attrs[0][1]=='event-location':
            self._tag = 'location'
            #print('<%s>' % attrs)

    def handle_data(self, data):
        if self._tag=='title':
            self.Events[self._counter]={'title':data.strip("\n")}
        if self._tag=='datetime':
            self.Events[self._counter]['time'] = data.strip("\n")
        if self._tag=='location':
            self.Events[self._counter]['location'] = data.strip("\n")
            self._counter += 1
        self._tag=''

    def printEvents(self):
        for k in self.Events:
            print("title:%s  Time: %s  Loaction:%s" % (
                self.Events[k]['title'], self.Events[k]['time'], self.Events[k]['location']))

  • 1

Reply