Skip to content

Commit

Permalink
added required library list and changed formatting of RELATED WIKI TO…
Browse files Browse the repository at this point in the history
…PICS in text
  • Loading branch information
kohjiaxuan committed May 10, 2020
1 parent db02129 commit bd17172
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
4 changes: 4 additions & 0 deletions pip_install.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
requests
beautifulsoup4
matplotlib
nltk
19 changes: 19 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
beautifulsoup4==4.9.0
certifi==2020.4.5.1
chardet==3.0.4
click==7.1.2
cycler==0.10.0
idna==2.9
joblib==0.14.1
kiwisolver==1.2.0
matplotlib==3.2.1
nltk==3.5
numpy==1.18.4
pyparsing==2.4.7
python-dateutil==2.8.1
regex==2020.5.7
requests==2.23.0
six==1.14.0
soupsieve==2.0
tqdm==4.46.0
urllib3==1.25.9
4 changes: 2 additions & 2 deletions wikiscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def __init__(self,title,option='Yes',lang='en',checknltk='No',lemmatize='No'):
for paragraph in self.wordcorpus: #append paragraphs starting with <p>
self.para.append(paragraph)

self.relatedtopic = ",*RELATED WIKI TOPIC*" #Identify topics in Wikipedia with an URL to point out to user
self.relatedtopic = "RELATED WIKI TOPIC" #Identify topics in Wikipedia with an URL to point out to user
for paragraph in self.wordcorpus2: #append paragraphs starting with <li>
if str(paragraph).find('<li><a href=') != -1:
if str(paragraph).find('</a></li>') != -1 or str(paragraph).find('</a></sup></li>') != -1:
Expand Down Expand Up @@ -303,7 +303,7 @@ def __init__(self,title,option='Yes',lang='en',checknltk='No',lemmatize='No'):
'other', 'than', 'then', 'now', 'look', 'only', 'come', 'its', 'over', 'think', 'also', 'back',
'after', 'use', 'two', 'how', 'our', 'work', 'first', 'well', 'way', 'even', 'new', 'want','topic',
'because', 'any', 'these', 'give', 'day', 'most', 'us','retrieved','^','archived',"•",'related',
"',*related","wiki","topic*',","is","are",'was','since','such','articles','has','&amp;','p','b',
'wiki','topic',"is","are",'was','since','such','articles','has','&amp;','p','b',
'january','february','march','april','may','june','july','august','september','october','november',
'december','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20',
'21','22','23','24','25','26','27','28','29','30','31','i','wa')
Expand Down

0 comments on commit bd17172

Please sign in to comment.