def __init__(self,kw,max_pn):
self.base_url = "https://tieba.baidu.com/f?kw={}&ie=utf-8&pn={}"
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
for pn in range(0,self.max_pn,50):
url = self.base_url.format(self.kw,pn)
return [self.base_url.format(self.kw,pn) for pn in range(0,self.max_pn,50)]
def get_content(self,url):
def get_items(self,content,idx):
with open('08-{}.html'.format(idx),'wb') as f:
def save_items(self,items):
url_list = self.get_url_list()
content = self.get_content(url)
items = self.get_items(content,url_list.index(url) + 1)
if __name__ == '__main__':
spider = TiebaSpider("英雄联盟",150)