Skip to content

Commit

Permalink
Scripts: implement wildcard optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
jarelllama committed May 14, 2024
1 parent 45a8db7 commit 8b56ecf
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
12 changes: 12 additions & 0 deletions config/wildcards.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
185-224-83-37.cprapid.com
group-telegram.my.id
my-telegram.my.id
fyfyvfytvghv.workers.dev
group-lucah.my.id
ajsik.my.id
ktt55.my.id
cryptocurrencies-offers.com
mutuel-credible.com
serpgold.com
mutuelcredible.com
financial-offer.com
4 changes: 4 additions & 0 deletions scripts/build_lists.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ readonly RAW='data/raw.txt'
readonly RAW_LIGHT='data/raw_light.txt'
readonly ADBLOCK='lists/adblock'
readonly DOMAINS='lists/wildcard_domains'
readonly WILDCARDS='config/wildcards.txt'

main() {
# Install AdGuard's Hostlist Compiler
Expand All @@ -31,6 +32,9 @@ main() {
# Function 'build' removes redundant entries from the raw files and compiles
# them into the various blocklist formats.
build() {
# Append wildcards to optimize the list
sort -u "$WILDCARDS" "$source" -o "$source"

# Compile blocklist. See the list of transformations here:
# https://github.com/AdguardTeam/HostlistCompiler
printf "\n"
Expand Down
14 changes: 6 additions & 8 deletions scripts/test_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ readonly RAW='data/raw.txt'
readonly RAW_LIGHT='data/raw_light.txt'
readonly WHITELIST='config/whitelist.txt'
readonly BLACKLIST='config/blacklist.txt'
readonly WILDCARDS='config/wildcards.txt'
readonly ROOT_DOMAINS='data/root_domains.txt'
readonly SUBDOMAINS='data/subdomains.txt'
readonly SUBDOMAINS_TO_REMOVE='config/subdomains.txt'
Expand All @@ -22,13 +23,10 @@ readonly SOURCE_LOG='config/source_log.csv'

main() {
# Initialize
: > "$RAW"
: > "$DEAD_DOMAINS"
: > "$SUBDOMAINS"
: > "$ROOT_DOMAINS"
: > "$PARKED_DOMAINS"
: > "$WHITELIST"
: > "$BLACKLIST"
for file in "$RAW" "$DEAD_DOMAINS" "$SUBDOMAINS" "$ROOT_DOMAINS" \
"$PARKED_DOMAINS" "$WHITELIST" "$BLACKLIST" "$WILDCARDS"; do
: > "$file"
done
sed -i '1q' "$DOMAIN_LOG"
sed -i '1q' "$SOURCE_LOG"
error=false
Expand Down Expand Up @@ -245,7 +243,7 @@ TEST_PARKED_CHECK() {
# correctly built with the right syntax.
TEST_BUILD() {
# INPUT
printf "build-test.com\n" >> "$RAW"
printf "build-test.com\n" >> "$WILDCARDS"
printf "redundant.build-test.com\n" >> "$RAW"

# EXPECTED OUTPUT
Expand Down

0 comments on commit 8b56ecf

Please sign in to comment.