Skip to content

Commit

Permalink
Allow search by name as well as url
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Saad-Falcon committed Jun 13, 2021
1 parent a31c719 commit 3b9bed8
Show file tree
Hide file tree
Showing 15 changed files with 11,002 additions and 38 deletions.
36 changes: 32 additions & 4 deletions Preprocessing/NewGoogleQueryCited.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def newGenerateCitedGoogleScholarCSV(list_of_researchers):

#researcher = list_of_researchers.iloc[i][0]
researcher = ""
researcher_URL = list_of_researchers.iloc[i][0]
researcher_URL = list_of_researchers.iloc[i][1]

researcherID = researcher_URL.replace("https://scholar.google.com/citations?user=", "")
researcherID = researcherID.replace("&hl=en&oi=ao", "")
Expand Down Expand Up @@ -72,10 +72,38 @@ def newGenerateCitedGoogleScholarCSV(list_of_researchers):
outputGrid.append([researcher, str(researcher_URL), "False"])

except:
search_query = 'error'
author = 'error'

outputGrid.append([researcher, str(researcher_URL), "False"])
try:

search_query = next(scholarly.search_author(list_of_researchers.iloc[i][0]))
author = search_query.fill()



researcher = author.name

print(researcher)



if author != None:

publications = author.publications

outputGrid.append([researcher, str(researcher_URL), "True"])

else:
search_query = 'error'
author = 'error'

outputGrid.append([researcher, str(researcher_URL), "False"])

except:

search_query = 'error'
author = 'error'

outputGrid.append([researcher, str(researcher_URL), "False"])


if search_query != 'error':
Expand Down
35 changes: 31 additions & 4 deletions Preprocessing/NewGoogleQueryRecent.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def newGenerateRecentGoogleScholarCSV(list_of_researchers):

#researcher = list_of_researchers.iloc[i][0]
researcher = ""
researcher_URL = list_of_researchers.iloc[i][0]
researcher_URL = list_of_researchers.iloc[i][1]

researcherID = researcher_URL.replace("https://scholar.google.com/citations?user=", "")
researcherID = researcherID.replace("&hl=en&oi=ao", "")
Expand Down Expand Up @@ -214,10 +214,37 @@ def newGenerateRecentGoogleScholarCSV(list_of_researchers):

except:

search_query = 'error'
author = 'error'
try:

outputGrid.append([researcher, str(researcher_URL), "False"])
search_query = next(scholarly.search_author(list_of_researchers.iloc[i][0]))
author = search_query.fill()



researcher = author.name

print(researcher)



if author != None:

publications = author.publications

outputGrid.append([researcher, str(researcher_URL), "True"])

else:
search_query = 'error'
author = 'error'

outputGrid.append([researcher, str(researcher_URL), "False"])

except:

search_query = 'error'
author = 'error'

outputGrid.append([researcher, str(researcher_URL), "False"])


if search_query != 'error':
Expand Down
32 changes: 21 additions & 11 deletions ResearchersDataset.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
URL
https://scholar.google.com/citations?user=B3U7yvcAAAAJ&hl=it
https://scholar.google.com/citations?user=JnkeH28AAAAJ&hl=it
https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en
https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en
https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en
https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en
https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en
https://scholar.google.com/citations?user=uE1oO1gAAAAJ&hl=en
https://scholar.google.com/citations?user=vNvRYksAAAAJ&hl=en
https://scholar.google.com/citations?user=wIFFAosAAAAJ&hl=en
Name,URL
Guido Noto La Diega,https://scholar.google.com/citations?user=B3U7yvcAAAAJ&hl=it
Rossana Ducato,https://scholar.google.com/citations?user=JnkeH28AAAAJ&hl=it
Martin Kretschmer,https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en
Marta Iljadica,https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en
Israel Cedillo Lazcano,https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en
Dr Lachlan Urquhart,https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en
Mariela de Amstalden,https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en
Jade Kouletakis,https://scholar.google.com/citations?user=uE1oO1gAAAAJ&hl=en
"Irene Couzigou, PhD, LLM",https://scholar.google.com/citations?user=vNvRYksAAAAJ&hl=en
Patricia Živković,https://scholar.google.com/citations?user=wIFFAosAAAAJ&hl=en
Emma Milne,https://scholar.google.co.uk/citations?user=iStd81MAAAAJ&hl=en
C William R Webster,https://scholar.google.co.uk/citations?user=mdypkb4AAAAJ&hl=en
Péter Cserne,https://scholar.google.com/citations?user=qlvq1lwAAAAJ&hl=en
Dr Mo Egan,https://scholar.google.co.uk/citations?user=6o0j57EAAAAJ&hl=en
Lilian Edwards,https://scholar.google.com/citations?user=a6-onLoAAAAJ&hl=en
Matthew Jewell,https://scholar.google.co.uk/citations?user=OkUv-F4AAAAJ&hl=en
Laurence Diver,https://scholar.google.co.uk/citations?user=9L-xODcAAAAJ&hl=en
Gianluca Andresani,https://scholar.google.com/citations?user=poIiHR0AAAAJ&hl=en
Burkhard Schafer,https://scholar.google.com/citations?user=vD2DRqoAAAAJ&hl=en
Paolo Cavaliere,https://scholar.google.com/citations?user=HOYJ86sAAAAJ&hl=en
20 changes: 15 additions & 5 deletions ResearchersScrapedCited.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
Guido Noto La Diega,https://scholar.google.com/citations?user=B3U7yvcAAAAJ&hl=it,True
Rossana Ducato,https://scholar.google.com/citations?user=JnkeH28AAAAJ&hl=it,True
,https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en,False
,https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en,False
,https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en,False
,https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en,False
,https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en,False
Martin Kretschmer,https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en,True
Marta Iljadica,https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en,True
Israel Cedillo Lazcano,https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en,True
Dr Lachlan Urquhart,https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en,True
Mariela de Amstalden,https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en,True
Jade Kouletakis,https://scholar.google.com/citations?user=uE1oO1gAAAAJ&hl=en,True
Irene Couzigou, PhD, LLM,https://scholar.google.com/citations?user=vNvRYksAAAAJ&hl=en,True
Patricia Živković,https://scholar.google.com/citations?user=wIFFAosAAAAJ&hl=en,True
Dafouz Milne, Emma,https://scholar.google.co.uk/citations?user=iStd81MAAAAJ&hl=en,True
C William R Webster,https://scholar.google.co.uk/citations?user=mdypkb4AAAAJ&hl=en,True
Péter Cserne,https://scholar.google.com/citations?user=qlvq1lwAAAAJ&hl=en,True
Dr Mo Egan,https://scholar.google.co.uk/citations?user=6o0j57EAAAAJ&hl=en,True
Lilian Edwards,https://scholar.google.com/citations?user=a6-onLoAAAAJ&hl=en,True
Matthew Jewell,https://scholar.google.co.uk/citations?user=OkUv-F4AAAAJ&hl=en,True
Dominik Ziegler,https://scholar.google.co.uk/citations?user=9L-xODcAAAAJ&hl=en,True
Gianluca Andresani,https://scholar.google.com/citations?user=poIiHR0AAAAJ&hl=en,True
Burkhard Schafer,https://scholar.google.com/citations?user=vD2DRqoAAAAJ&hl=en,True
Paolo Cavaliere,https://scholar.google.com/citations?user=HOYJ86sAAAAJ&hl=en,True
20 changes: 15 additions & 5 deletions ResearchersScrapedRecent.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
Guido Noto La Diega,https://scholar.google.com/citations?user=B3U7yvcAAAAJ&hl=it,True
Rossana Ducato,https://scholar.google.com/citations?user=JnkeH28AAAAJ&hl=it,True
,https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en,False
,https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en,False
,https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en,False
,https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en,False
,https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en,False
Martin Kretschmer,https://scholar.google.co.uk/citations?user=sbcJOSoAAAAJ&hl=en,True
Marta Iljadica,https://scholar.google.co.uk/citations?user=JXTpQXIAAAAJ&hl=en,True
Israel Cedillo Lazcano,https://scholar.google.com.mx/citations?user=SPrHADoAAAAJ&hl=en,True
Dr Lachlan Urquhart,https://scholar.google.co.uk/citations?user=o2gCskMAAAAJ&hl=en,True
Mariela de Amstalden,https://scholar.google.ch/citations?user=IoKBsjcAAAAJ&hl=en,True
Jade Kouletakis,https://scholar.google.com/citations?user=uE1oO1gAAAAJ&hl=en,True
Irene Couzigou, PhD, LLM,https://scholar.google.com/citations?user=vNvRYksAAAAJ&hl=en,True
Patricia Živković,https://scholar.google.com/citations?user=wIFFAosAAAAJ&hl=en,True
Dafouz Milne, Emma,https://scholar.google.co.uk/citations?user=iStd81MAAAAJ&hl=en,True
C William R Webster,https://scholar.google.co.uk/citations?user=mdypkb4AAAAJ&hl=en,True
Péter Cserne,https://scholar.google.com/citations?user=qlvq1lwAAAAJ&hl=en,True
Dr Mo Egan,https://scholar.google.co.uk/citations?user=6o0j57EAAAAJ&hl=en,True
Lilian Edwards,https://scholar.google.com/citations?user=a6-onLoAAAAJ&hl=en,True
Matthew Jewell,https://scholar.google.co.uk/citations?user=OkUv-F4AAAAJ&hl=en,True
Dominik Ziegler,https://scholar.google.co.uk/citations?user=9L-xODcAAAAJ&hl=en,True
Gianluca Andresani,https://scholar.google.com/citations?user=poIiHR0AAAAJ&hl=en,True
Burkhard Schafer,https://scholar.google.com/citations?user=vD2DRqoAAAAJ&hl=en,True
Paolo Cavaliere,https://scholar.google.com/citations?user=HOYJ86sAAAAJ&hl=en,True
Binary file modified citedScholarDataset.gzip
Binary file not shown.
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified recentScholarDataset.gzip
Binary file not shown.
Loading

0 comments on commit 3b9bed8

Please sign in to comment.