From 80d9aa9e929ddd332b72223face20d00b823b7bc Mon Sep 17 00:00:00 2001 From: Jarell <91372088+jarelllama@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:15:11 +0000 Subject: [PATCH] Update NRD feeds Update feeds used Remove NRD count from README Temporarily disable notification for failed NRD feed download --- SOURCES.md | 3 +-- scripts/tools.sh | 24 +++++++++++------------- scripts/update_readme.sh | 13 ------------- 3 files changed, 12 insertions(+), 28 deletions(-) diff --git a/SOURCES.md b/SOURCES.md index 883fef03..bd6daef4 100644 --- a/SOURCES.md +++ b/SOURCES.md @@ -15,7 +15,6 @@ Sources marked as inactive are not being automatically employed to retrieve doma | [Fake Website Buster](https://fakewebsitebuster.com/) | Fake | | | | [Google's Custom Search JSON API](https://developers.google.com/custom-search/v1/introduction) | Fake | | | | [GunTab](https://www.guntab.com/scam-websites) | Firearm | | Yes | -| [Hagezi's NRD List](https://github.com/hagezi/dns-blocklists?tab=readme-ov-file#nrd) | NRD | - | - | | [Jeroen Gui's phishing & scam feeds](https://jeroengui.be/anti-phishing-project/)[^1] | Phishing | | | | [PetScams.com](https://petscams.com/) | Pet | | | | [PhishStats](https://phishstats.info/)[^2] | Phishing | | | @@ -23,12 +22,12 @@ Sources marked as inactive are not being automatically employed to retrieve doma | [Scam Directory](https://scam.directory/) | Any | | | | [Scam.Delivery](https://scam.delivery/) | Non-delivery | Yes | - | | [ScamAdvisor](https://www.scamadviser.com/) | Any | | | -| [Shreshta's NRD List](https://github.com/shreshta-labs/newly-registered-domains) | NRD | - | - | | [Stop 419 Scams and Scammers](https://www.stop419scams.com/) | Any | Yes | - | | [StopGunScams.com](https://stopgunscams.com/) | Firearm | | | | [dnstwist](https://github.com/elceef/dnstwist) | Phishing | | | | [openSquat](https://github.com/atenreiro/opensquat) | Phishing | Yes | - | | [r/Scams](https://www.reddit.com/r/Scams/) | Any | Yes | - | +| [xRuffKez's NRD List](https://github.com/xRuffKez/NRD) | NRD | - | - | [^1]: Only the scam feed is used for the light version. [^2]: Only domains found in the NRD feed are used for the light version. diff --git a/scripts/tools.sh b/scripts/tools.sh index 95b55409..ccf7771b 100644 --- a/scripts/tools.sh +++ b/scripts/tools.sh @@ -112,24 +112,22 @@ download_toplist() { download_nrd_feed() { [[ -f nrd.tmp ]] && return - url1='https://raw.githubusercontent.com/shreshta-labs/newly-registered-domains/main/nrd-1m.csv' - url2='https://feeds.opensquat.com/domain-names-month.txt' - url3='https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.30-onlydomains.txt' + url1='https://raw.githubusercontent.com/xRuffKez/NRD/main/nrd-30day_part1.txt' + url2='https://raw.githubusercontent.com/xRuffKez/NRD/main/nrd-30day_part2.txt' + # Disabled due to size of the combined feeds + #url3='https://feeds.opensquat.com/domain-names-month.txt' - { - curl -sSL "$url1" || send_telegram \ - "Error occurred while downloading NRD feeds." - - # Download the bigger feeds in parallel - curl -sSLZH 'User-Agent: openSquat-2.1.0' "$url2" "$url3" - } | mawk '!/#/' > nrd.tmp + # Download the feeds in parallel + curl -sSLZ "$url1" "$url2" | mawk '!/#/' > nrd.tmp + # TODO: update method of checking if the feeds downloaded correctly + # # Appears to be the best way of checking if the bigger feeds downloaded # properly without checking each feed individually and losing # parallelization. - if (( $(wc -l < nrd.tmp) < 9000000 )); then - send_telegram "Error occurred while downloading NRD feeds." - fi + #if (( $(wc -l < nrd.tmp) < 9000000 )); then + # send_telegram "Error occurred while downloading NRD feeds." + #fi format_file nrd.tmp } diff --git a/scripts/update_readme.sh b/scripts/update_readme.sh index 8dc166b5..7f2122fb 100644 --- a/scripts/update_readme.sh +++ b/scripts/update_readme.sh @@ -12,10 +12,6 @@ The [automated retrieval](https://github.com/jarelllama/Scam-Blocklist/actions/w This blocklist aims to be an alternative to blocking all newly registered domains (NRDs) seeing how many, but not all, NRDs are malicious. A variety of sources are integrated to detect new malicious domains within a short time span of their registration date. -In the last 30 days, more than $(sum_nrds)[^1] malicious NRDs were found. - -[^1]: Number calculated using NRDs from [Hagezi's NRD 30 feed](https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.30-onlydomains.txt). The number of malicious NRDs found in reality is higher due to additional feeds being used. See the list of feeds used here: [SOURCES.md](https://github.com/jarelllama/Scam-Blocklist/blob/main/SOURCES.md) - ## Download | Format | Syntax | @@ -297,15 +293,6 @@ sum_excluded() { printf "%s" "$(( excluded_count * 100 / raw_count ))" } -# Function 'sum_nrds' is an echo wrapper that returns the number of domains in -# the blocklist found in the NRD feed. -sum_nrds() { - # Only Hagezi's NRD feed is downloaded to save processing time - curl -sSL 'https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.30-onlydomains.txt' \ - -o nrd.tmp - grep -cxFf "$RAW" nrd.tmp | sed 's/\([0-9]\{3\}\)$/,\1/' -} - # Entry point trap 'find . -maxdepth 1 -type f -name "*.tmp" -delete' EXIT