From 8aedd078fa5a9585dc75ded6187c514c05f5ee96 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Mon, 1 Jan 2024 21:47:51 +0000 Subject: [PATCH] build(update): crawlers --- index.json | 192 +++++++++++++++++++++++++++++++++++++++---- scripts/generate.mjs | 2 +- 2 files changed, 179 insertions(+), 15 deletions(-) diff --git a/index.json b/index.json index 626aa46..f46eb78 100644 --- a/index.json +++ b/index.json @@ -3,18 +3,182 @@ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)", - "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)", - "adidxbot/2.0 (+http://search.msn.com/msnbot.htm)", - "librabot/1.0 (+http://search.msn.com/msnbot.htm)", - "librabot/2.0 (+http://search.msn.com/msnbot.htm)", - "msnbot-NewsBlogs/2.0b (+http://search.msn.com/msnbot.htm)", - "msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)", - "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)", - "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)", - "msnbot-media/2.0b (+http://search.msn.com/msnbot.htm)", - "msnbot/1.0 (+http://search.msn.com/msnbot.htm)", - "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", - "msnbot/2.0b (+http://search.msn.com/msnbot.htm)", - "msnbot/2.0b (+http://search.msn.com/msnbot.htm).", - "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._" + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.8.0_111; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 5.2.8-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 5.2.9-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 5.2.11-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", + "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.5; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.0; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.2; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.0; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.2; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.4 (domain ownership verifier); http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.6; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.2; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.3; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )", + "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", + "Wotbox/2.01 (+http://www.wotbox.com/bot/)", + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", + "coccoc/1.0 (http://help.coccoc.com/)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )", + "psbot-image (+http://www.picsearch.com/bot.html)", + "psbot-page (+http://www.picsearch.com/bot.html)", + "psbot/0.1 (+http://www.picsearch.com/bot.html)", + "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)", + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)", + "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)", + "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)", + "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.1; +https://help.qwant.com/bot)", + "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.2.1; +https://help.qwant.com/bot)", + "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)", + "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com", + "LivelapBot/0.2 (http://site.livelap.com/crawler)", + "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)", + "Mozilla/5.0 (compatible; Cliqzbot/2.0; +http://cliqz.com/company/cliqzbot)", + "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", + "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)", + "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)", + "Slackbot-LinkExpanding (+https://api.slack.com/robots)", + "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", + "Slackbot 1.0 (+https://api.slack.com/robots)", + "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)", + "datagnionbot (+http://www.datagnion.com/bot.html)", + "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", + "adbeat_bot", + "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)", + "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)", + "PiplBot (+http://www.pipl.com/bot/)", + "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html", + "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", + "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0", + "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)", + "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)", + "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)", + "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)", + "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", + "ContextAd Bot 1.0", + "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)", + "FeedlyBot/1.0 (http://feedly.com)", + "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)", + "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)", + "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", + "MoodleBot/1.0", + "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)", + "Clickagy Intelligence Bot v2", + "Mozilla/5.0 (compatible; online-webceo-bot/1.0; +http://online.webceo.com)", + "Mozilla/5.0 (compatible; AddSearchBot/0.9; +http://www.addsearch.com/bot; info@addsearch.com)", + "RSSingBot (http://www.rssing.com)", + "Mozilla/5.0 (compatible; Jooblebot/2.0; Windows NT 6.1; WOW64; +http://jooble.org/jooble-bot) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36", + "Gwene/1.0 (The gwene.org rss-to-news gateway) Googlebot", + "Superfeedr bot/2.0 http://superfeedr.com - Make your feeds realtime: get in touch - feed-id:1162088860", + "Mozilla/5.0 (compatible; SurdotlyBot/1.0; +http://sur.ly/bot.html; Linux; Android 4; iPhone; CPU iPhone OS 6_0_1 like Mac OS X)", + "Mozilla/5.0 (compatible; LinkisBot/1.0; bot@linkis.com) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321", + "FreeWebMonitoring SiteChecker/0.2 (+https://www.freewebmonitoring.com/bot.html)", + "SentiBot www.sentibot.eu (compatible with Googlebot)", + "Mozilla/5.0 (compatible; NIXStatsbot/1.1; +http://www.nixstats.com/bot.html)", + "Mozilla/5.0 (compatible; Vigil/1.0; +http://vigil-app.com/bot.html)", + "Mozilla/5.0 (compatible; startmebot/1.0; +https://start.me/bot)", + "Mozilla/5.0 (X11; U; Linux Core i7-4980HQ; de; rv:32.0; compatible; JobboerseBot; http://www.jobboerse.com/bot.htm) Gecko/20100101 Firefox/38.0", + "FreshRSS/1.11.2 (Linux; https://freshrss.org) like Googlebot", + "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)", + "OdklBot/1.0 (share@odnoklassniki.ru)", + "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)" ] \ No newline at end of file diff --git a/scripts/generate.mjs b/scripts/generate.mjs index 8c5bbf1..0239f17 100644 --- a/scripts/generate.mjs +++ b/scripts/generate.mjs @@ -18,7 +18,7 @@ const candidates = [...new Set(crawlers.flatMap(crawler => crawler.instances))] const teslaUrl = await fetch('https://api.teslahunt.io/cars?maxRecords=1', { headers: { 'x-api-key': process.env.TESLAHUNT_API_KEY } }) .then(res => res.json()) - .then(payload => payload.detailsUrl) + .then(cars => cars[0].detailsUrl) const URLS = [ 'https://twitter.com/Kikobeats/status/1687837848802578432',