Skip to content

Commit

Permalink
improvements
Browse files Browse the repository at this point in the history
build the dev environment and try to solve the mismatch query.
  • Loading branch information
nico-zck committed Mar 28, 2022
1 parent 23ebe04 commit 6817352
Show file tree
Hide file tree
Showing 7 changed files with 441 additions and 438 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ typings/
# Custom Misc
build/
builds/
.DS_Store
package.json
package-lock.json
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

New features:
- Improve the process of dealing with reCAPTCHA, especially in China;
- Fix url error when encounter special characters in the paper title;
- When updating many items, a random time delay is added, which may avoid Google Scholar traffic detection and reduce the frequency of reCAPTCHA;
- Fix url error when encounter special characters in the paper title;
- Add a rough time range to the generated query url:
> For example, if a paper titled "Explaining and Exploiting Adversarial Examples" has a year of "2013" in Zotero, the original version will produce a URL that results in an incorrect query (https://scholar.google.com/scholar?q=Explaining+and+Harnessing+Adversarial+Examples&hl=en&as_sdt=0%2C5&as_ylo=2013&as_yhi=2013), while a looser range, such as 2012 - 2014, will hit the correct query (https://scholar.google.com/scholar?q=Explaining+and+Harnessing+Adversarial+Examples&hl=en&as_sdt=0%2C5&as_ylo=2012&as_yhi=2014).

v2.0.5
- Adaptation for Zotero 6.0;
Expand Down
Empty file modified build.sh
100755 → 100644
Empty file.
33 changes: 29 additions & 4 deletions chrome/content/zsc.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ let zsc = {
_extraEntrySep: "\n",
};

zsc._extraRegex = new RegExp(zsc._extraPrefix + ".*");
zsc._extraRegex = new RegExp(zsc._extraPrefix + ".{0,20}");

let isDebug = function () {
return (
Expand Down Expand Up @@ -184,20 +184,43 @@ zsc.retrieveCitationData = function (item, cb) {
let citeCount;
let xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
// xhr.responseType = "document";
xhr.onreadystatechange = function () {
if (this.readyState == 4 && this.status == 200) {
// // debug on response text
// if (isDebug()) Zotero.debug(this.responseText);

// check if response includes meaningful content
if (this.responseText.indexOf('class="gs_r gs_or gs_scl"') != -1) {
if (isDebug())
if (isDebug()) {
Zotero.debug(
"[scholar-citations] received non-captcha scholar results!"
);

// check if the returned title match with the itme title
var title1 = item
.getField("title")
.trim()
.toLowerCase()
.replace(/ +/g, " ");
Zotero.debug("[scholar-citations] the item title: " + title1);
var parser = new DOMParser();
var htmlDoc = parser.parseFromString(this.responseText, "text/html");
var title2 = htmlDoc
.getElementsByClassName("gs_rt")[0]
.innerText.trim()
.toLowerCase()
.replace(/ +/g, " ");
Zotero.debug("[scholar-citations] the queried title: " + title2);
Zotero.debug(
"[scholar-citations] received non-captcha scholar results"
"[scholar-citations] will item title equals to queried title?" +
(title1 === title2)
);
}

cb(item, zsc.getCiteCount(this.responseText));
// check if response includes captcha
} else if (
// check if response includes captcha
this.responseText.indexOf("www.google.com/recaptcha/api.js") != -1
) {
if (isDebug())
Expand Down Expand Up @@ -225,6 +248,7 @@ zsc.retrieveCitationData = function (item, cb) {
}
} else if (this.readyState == 4 && this.status == 429) {
if (this.responseText.indexOf("www.google.com/recaptcha/api.js") == -1) {
// failed without captcha
if (isDebug())
Zotero.debug(
"[scholar-citations] could not retrieve the google scholar data. Server returned: [" +
Expand All @@ -237,6 +261,7 @@ zsc.retrieveCitationData = function (item, cb) {
" seconds before sending further requests."
);
} else {
// failed with captcha
if (isDebug())
Zotero.debug(
"[scholar-citations] received a captcha instead of a scholar result"
Expand Down
22 changes: 12 additions & 10 deletions test/http/429.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
let http = require('http');
let http = require("http");

let port = 8080;

http.createServer(function(req, res) {
console.log('incomming request!');
console.log('method: ', req.method);
console.log('url: ', req.url);
console.log('header: ', req.headers);
res.writeHead(429, {'Content-Type': 'text/plain', 'Retry-After': 3600});
res.end('Yikes! Your\'re blocked!');
}).listen(port);
http
.createServer(function (req, res) {
console.log("incomming request!");
console.log("method: ", req.method);
console.log("url: ", req.url);
console.log("header: ", req.headers);
res.writeHead(429, { "Content-Type": "text/plain", "Retry-After": 3600 });
res.end("Yikes! Your're blocked!");
})
.listen(port);

console.log('Starting super simple http server on localhost:' + port + '!');
console.log("Starting super simple http server on localhost:" + port + "!");
22 changes: 12 additions & 10 deletions test/http/captcha.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
let http = require('http');
let http = require("http");

let port = 8080;

http.createServer(function(req, res) {
console.log('incomming request!');
console.log('method: ', req.method);
console.log('url: ', req.url);
console.log('header: ', req.headers);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Such Captcha! Much Protec! www.google.com/recaptcha/api.js Wow!');
}).listen(port);
http
.createServer(function (req, res) {
console.log("incomming request!");
console.log("method: ", req.method);
console.log("url: ", req.url);
console.log("header: ", req.headers);
res.writeHead(200, { "Content-Type": "text/plain" });
res.end("Such Captcha! Much Protec! www.google.com/recaptcha/api.js Wow!");
})
.listen(port);

console.log('Starting super simple http server on localhost:' + port + '!');
console.log("Starting super simple http server on localhost:" + port + "!");
Loading

0 comments on commit 6817352

Please sign in to comment.