diff --git a/index.json b/index.json index f46eb78..11fde5c 100644 --- a/index.json +++ b/index.json @@ -1,56 +1,59 @@ [ - "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)", + "adbeat_bot", + "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)", "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)", + "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)", + "Clickagy Intelligence Bot v2", + "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", + "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", + "coccoc", + "coccoc/1.0 ()", + "coccoc/1.0 (http://help.coccoc.com/)", + "ContextAd Bot 1.0", + "datagnionbot (+http://www.datagnion.com/bot.html)", + "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com", "LivelapBot/0.2 (http://site.livelap.com/crawler)", + "FeedlyBot/1.0 (http://feedly.com)", + "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)", + "FreeWebMonitoring SiteChecker/0.2 (+https://www.freewebmonitoring.com/bot.html)", + "FreshRSS/1.11.2 (Linux; https://freshrss.org) like Googlebot", + "Gwene/1.0 (The gwene.org rss-to-news gateway) Googlebot", + "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)", + "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)", + "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)", - "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)", - "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.8.0_111; Europe/en) http://yacy.net/bot.html", - "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", - "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 5.2.8-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 5.2.9-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", - "yacybot (-global; amd64 Linux 5.2.11-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)", + "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", + "MoodleBot/1.0", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )", + "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", + "Mozilla/5.0 (compatible; AddSearchBot/0.9; +http://www.addsearch.com/bot; info@addsearch.com)", + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)", + "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)", + "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/2.0; +http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)", + "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)", + "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", + "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)", + "Mozilla/5.0 (compatible; Google-InspectionTool/1.0)", + "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)", + "Mozilla/5.0 (compatible; Jooblebot/2.0; Windows NT 6.1; WOW64; +http://jooble.org/jooble-bot) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36", + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; LinkisBot/1.0; bot@linkis.com) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321", + "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)", @@ -70,14 +73,27 @@ "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", + "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)", + "Mozilla/5.0 (compatible; NIXStatsbot/1.1; +http://www.nixstats.com/bot.html)", + "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)", + "Mozilla/5.0 (compatible; online-webceo-bot/1.0; +http://online.webceo.com)", + "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.1; +https://help.qwant.com/bot)", + "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.2.1; +https://help.qwant.com/bot)", + "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)", + "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)", "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )", - "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )", @@ -87,98 +103,98 @@ "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )", - "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )", - "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )", - "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )", - "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", - "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", - "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", - "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", - "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", - "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", - "Wotbox/2.01 (+http://www.wotbox.com/bot/)", - "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", - "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", - "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)", - "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", - "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", - "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", - "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", - "coccoc/1.0 (http://help.coccoc.com/)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )", - "psbot-image (+http://www.picsearch.com/bot.html)", - "psbot-page (+http://www.picsearch.com/bot.html)", - "psbot/0.1 (+http://www.picsearch.com/bot.html)", - "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)", - "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", - "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)", - "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)", + "Mozilla/5.0 (compatible; startmebot/1.0; +https://start.me/bot)", + "Mozilla/5.0 (compatible; SurdotlyBot/1.0; +http://sur.ly/bot.html; Linux; Android 4; iPhone; CPU iPhone OS 6_0_1 like Mac OS X)", + "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", + "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)", + "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)", "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", + "Mozilla/5.0 (compatible; Vigil/1.0; +http://vigil-app.com/bot.html)", "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)", - "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.1; +https://help.qwant.com/bot)", - "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.2.1; +https://help.qwant.com/bot)", "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)", - "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com", - "LivelapBot/0.2 (http://site.livelap.com/crawler)", - "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)", - "Mozilla/5.0 (compatible; Cliqzbot/2.0; +http://cliqz.com/company/cliqzbot)", - "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", - "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", - "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", - "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)", - "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", - "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)", - "Slackbot-LinkExpanding (+https://api.slack.com/robots)", - "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", - "Slackbot 1.0 (+https://api.slack.com/robots)", - "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)", - "datagnionbot (+http://www.datagnion.com/bot.html)", - "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", - "adbeat_bot", - "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)", - "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)", - "PiplBot (+http://www.pipl.com/bot/)", - "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)", + "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Google-InspectionTool/1.0)", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html", - "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0", - "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)", - "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)", - "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)", - "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)", - "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", - "ContextAd Bot 1.0", - "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)", - "FeedlyBot/1.0 (http://feedly.com)", - "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)", - "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)", - "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", - "MoodleBot/1.0", - "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)", - "Clickagy Intelligence Bot v2", - "Mozilla/5.0 (compatible; online-webceo-bot/1.0; +http://online.webceo.com)", - "Mozilla/5.0 (compatible; AddSearchBot/0.9; +http://www.addsearch.com/bot; info@addsearch.com)", - "RSSingBot (http://www.rssing.com)", - "Mozilla/5.0 (compatible; Jooblebot/2.0; Windows NT 6.1; WOW64; +http://jooble.org/jooble-bot) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36", - "Gwene/1.0 (The gwene.org rss-to-news gateway) Googlebot", - "Superfeedr bot/2.0 http://superfeedr.com - Make your feeds realtime: get in touch - feed-id:1162088860", - "Mozilla/5.0 (compatible; SurdotlyBot/1.0; +http://sur.ly/bot.html; Linux; Android 4; iPhone; CPU iPhone OS 6_0_1 like Mac OS X)", - "Mozilla/5.0 (compatible; LinkisBot/1.0; bot@linkis.com) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321", - "FreeWebMonitoring SiteChecker/0.2 (+https://www.freewebmonitoring.com/bot.html)", - "SentiBot www.sentibot.eu (compatible with Googlebot)", - "Mozilla/5.0 (compatible; NIXStatsbot/1.1; +http://www.nixstats.com/bot.html)", - "Mozilla/5.0 (compatible; Vigil/1.0; +http://vigil-app.com/bot.html)", - "Mozilla/5.0 (compatible; startmebot/1.0; +https://start.me/bot)", "Mozilla/5.0 (X11; U; Linux Core i7-4980HQ; de; rv:32.0; compatible; JobboerseBot; http://www.jobboerse.com/bot.htm) Gecko/20100101 Firefox/38.0", - "FreshRSS/1.11.2 (Linux; https://freshrss.org) like Googlebot", - "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)", + "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)", "OdklBot/1.0 (share@odnoklassniki.ru)", - "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)" + "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)", + "PiplBot (+http://www.pipl.com/bot/)", + "psbot-image (+http://www.picsearch.com/bot.html)", + "psbot-page (+http://www.picsearch.com/bot.html)", + "psbot/0.1 (+http://www.picsearch.com/bot.html)", + "RSSingBot (http://www.rssing.com)", + "SentiBot www.sentibot.eu (compatible with Googlebot)", + "Slackbot 1.0 (+https://api.slack.com/robots)", + "Slackbot-LinkExpanding (+https://api.slack.com/robots)", + "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", + "Superfeedr bot/2.0 http://superfeedr.com - Make your feeds realtime: get in touch - feed-id:1162088860", + "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", + "WhatsApp", + "WhatsApp/0.3.4479 N", + "WhatsApp/0.3.4941 N", + "WhatsApp/2.12.15/i", + "WhatsApp/2.12.17/i", + "WhatsApp/2.12.449 A", + "WhatsApp/2.12.453 A", + "WhatsApp/2.12.540 A", + "WhatsApp/2.12.548 A", + "WhatsApp/2.12.556 A", + "WhatsApp/2.16.1/i", + "WhatsApp/2.16.2/i", + "WhatsApp/2.16.13 A", + "WhatsApp/2.16.42 A", + "WhatsApp/2.16.57 A", + "WhatsApp/2.19.175 A", + "WhatsApp/2.19.244 A", + "WhatsApp/2.19.258 A", + "WhatsApp/2.19.308 A", + "WhatsApp/2.19.330 A", + "Wotbox/2.01 (+http://www.wotbox.com/bot/)", + "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 5.2.8-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 5.2.11-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.8.0_111; Europe/en) http://yacy.net/bot.html" ] \ No newline at end of file diff --git a/scripts/generate.mjs b/scripts/generate.mjs index 0239f17..152d07d 100644 --- a/scripts/generate.mjs +++ b/scripts/generate.mjs @@ -8,15 +8,27 @@ import { load } from 'cheerio' import pFilter from 'p-filter' import pEvery from 'p-every' -const crawlers = createRequire(import.meta.url)('crawler-user-agents/crawler-user-agents.json') +const crawlers = createRequire(import.meta.url)( + 'crawler-user-agents/crawler-user-agents.json' +) const CHECK = { true: '✅', false: '❌' } const MAX_CONCURRENCY = 10 const REQ_TIMEOUT = 10000 -const candidates = [...new Set(crawlers.flatMap(crawler => crawler.instances))] +const shuffle = array => { + for (let index = array.length - 1; index > 0; index--) { + const newIndex = Math.floor(Math.random() * (index + 1)) + ;[array[index], array[newIndex]] = [array[newIndex], array[index]] + } + return array +} + +const candidates = shuffle([...new Set(crawlers.flatMap(crawler => crawler.instances))]) -const teslaUrl = await fetch('https://api.teslahunt.io/cars?maxRecords=1', { headers: { 'x-api-key': process.env.TESLAHUNT_API_KEY } }) +const teslaUrl = await fetch('https://api.teslahunt.io/cars?maxRecords=1', { + headers: { 'x-api-key': process.env.TESLAHUNT_API_KEY } +}) .then(res => res.json()) .then(cars => cars[0].detailsUrl) @@ -51,6 +63,7 @@ const verify = async (userAgent, index) => Promise.resolve() .then(() => pFilter(candidates, verify, { concurrency: MAX_CONCURRENCY })) .then(async result => { - await writeFile('index.json', JSON.stringify(result, null, 2)) - console.log(`\nGenerated ${result.length} crawlers ✨`) + const sorted = result.sort((a, b) => a.localeCompare(b)) + await writeFile('index.json', JSON.stringify(sorted, null, 2)) + console.log(`\nGenerated ${sorted.length} crawlers ✨`) })