Discuss / Python / 练习

练习

Topic source
from urllib import requestfrom html.parser import HTMLParserclass MyHTMLParser(HTMLParser):    def __init__(self):        super().__init__()        self.__tag = ''        self.__info = list()    def handle_starttag(self, tag, attrs):        if tag == 'span' and 'event-location' in str(attrs):            self.__tag = '地点'        elif tag == 'time':            self.__tag = '时间'        elif tag == 'span' and 'say-no-more' in str(attrs):            self.__tag = '年'        elif tag == 'h3' and 'event-title' in str(attrs):            self.__tag = '名称'    def handle_endtag(self, tag):        self.__tag = ''    def handle_data(self, data):        # print(date)        if self.__tag:            self.__info.append({self.__tag: data})    def output_info(self):        self.__info = self.__info[:-2]        for x in range(len(self.__info) // 4):            temp = self.__info[x * 4:(x + 1) * 4]            print('名称:{0}\n时间:{1} {2}\n地点:{3}'.format(                temp[0].get('名称'),                temp[2].get('年'),                temp[1].get('时间'),                temp[3].get('地点')            ))            print('----------')

```

from urllib import request

from html.parser import HTMLParser

class MyHTMLParser(HTMLParser):

    def __init__(self):

        super().__init__()

        self.__tag = ''

        self.__info = list()

    def handle_starttag(self, tag, attrs):

        if tag == 'span' and 'event-location' in str(attrs):

            self.__tag = '地点'

        elif tag == 'time':

            self.__tag = '时间'

        elif tag == 'span' and 'say-no-more' in str(attrs):

            self.__tag = '年'

        elif tag == 'h3' and 'event-title' in str(attrs):

            self.__tag = '名称'

    def handle_endtag(self, tag):

        self.__tag = ''

    def handle_data(self, data):

        # print(date)

        if self.__tag:

            self.__info.append({self.__tag: data})

    def output_info(self):

        self.__info = self.__info[:-2]

        for x in range(len(self.__info) // 4):

            temp = self.__info[x * 4:(x + 1) * 4]

            print('名称:{0}\n时间:{1} {2}\n地点:{3}'.format(

                temp[0].get('名称'),

                temp[2].get('年'),

                temp[1].get('时间'),

                temp[3].get('地点')

            ))

            print('----------')

```


  • 1

Reply