-
Notifications
You must be signed in to change notification settings - Fork 512
/
Novel.py
109 lines (59 loc) · 2.49 KB
/
Novel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 28 17:02:49 2019
@author: Administrator
"""
import requests
from bs4 import BeautifulSoup
import sys
class download_txt(object):
def __init__(self):
self.url='https://www.biqukan.com'
self.new_url = 'https://www.biqukan.com/3_3026/'
self.names = [] #存放章节名
self.urls = [] #存放每一个章节的链接
self.nums = 0 #章节数目
def get_url(self):
try:
r=requests.get(url=self.new_url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
html=r.text
#print(html)
soup=BeautifulSoup(html,'html.parser')
div=soup.find_all('div',class_='listmain')
#print(div)
a_bf=BeautifulSoup(str(div[0]),'html.parser')
a=a_bf.find_all('a')
self.nums=len(a[12:])
for each in a[12:]:
self.names.append(each.string)
self.urls.append(self.url+each.get('href'))
except:
return "爬取链接失败"
def get_contents(self,url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
html=r.text
soup=BeautifulSoup(html,'html.parser')
texts=soup.find_all('div',class_='showtxt')
texts=texts[0].text.replace('\xa0'*8,'\n\n')
return texts
except:
return "爬取文章内容失败"
def save_txt(self,name,path,text):
with open(path,'a',encoding='utf-8') as f:
f.write(name+'\n')
f.writelines(text)
f.write('\n\n')
if __name__=='__main__':
d=download_txt()
d.get_url()
print('开始下载小说《斗罗大陆》')
for i in range(d.nums):
d.save_txt(d.names[i],'斗罗大陆.txt',d.get_contents(d.urls[i]))
sys.stdout.write(" 已经下载:%.2f%%" % float(i/d.nums*100) + '\r')
#sys.stdout.flush()
print('《斗罗大陆》下载完成')