This repository has been archived by the owner on May 28, 2023. It is now read-only.
forked from AdguardTeam/HostlistCompiler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rule.js
281 lines (248 loc) · 7.57 KB
/
rule.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
const _ = require('lodash');
const utils = require('./utils');
const domainRegex = /^(?=.{1,255}$)[0-9A-Za-z](?:(?:[0-9A-Za-z]|-){0,61}[0-9A-Za-z])?(?:\.[0-9A-Za-z](?:(?:[0-9A-Za-z]|-){0,61}[0-9A-Za-z])?)*\.?$/;
const etcHostsRegex = /^([a-f0-9.:\][]+)(%[a-z0-9]+)?\s+([^#]+)(#.*)?$/;
/**
* Helper utils for working with filtering rules
*/
/**
* @param {String} ruleText - rule to check
* @returns {Boolean} true if rule is a comment
*/
function isComment(ruleText) {
return _.startsWith(ruleText, '!')
|| _.startsWith(ruleText, '# ')
|| ruleText === '#'
|| _.startsWith(ruleText, '####');
}
/**
* @param {String} ruleText - rule to check
* @returns {Boolean} true if the rule is "allowing"
*/
function isAllowRule(ruleText) {
return _.startsWith(ruleText, '@@');
}
/**
* @param {String} rule - rule to check
* @returns {Boolean} true if the rule is just the domain name
*/
function isJustDomain(ruleText) {
return _.includes(ruleText, '.')
&& domainRegex.test(ruleText);
}
/**
* @param {String} ruleText - rule to check
* @returns {Boolean} true if this is a /etc/hosts rule
*/
function isEtcHostsRule(ruleText) {
return etcHostsRegex.test(ruleText);
}
/**
* Represents a /etc/hosts rule
* @typedef {Object} AdblockRuleTokens
* @property {String} pattern - rule pattern
* @property {String} options - modifiers
* @property {Boolean} whitelist - whether the rule is whitelist or not
*/
/**
* parseRuleTokens splits the rule text in multiple tokens
* @param {String} ruleText - original rule text
* @returns {AdblockRuleTokens} rule tokens
*/
function parseRuleTokens(ruleText) {
const tokens = {
pattern: null,
options: null,
whitelist: false,
};
let startIndex = 0;
if (_.startsWith(ruleText, '@@')) {
tokens.whitelist = true;
startIndex = 2;
}
if (ruleText.length <= startIndex) {
throw new TypeError(`the rule is too short: ${ruleText}`);
}
// Setting pattern to rule text (for the case of empty options)
tokens.pattern = ruleText.substring(startIndex);
// Avoid parsing options inside of a regex rule
if (_.startsWith(tokens.pattern, '/')
&& _.endsWith(tokens.pattern, '/')
&& tokens.pattern.indexOf('replace=') === -1) {
return tokens;
}
for (let i = ruleText.length; i >= startIndex; i -= 1) {
const c = ruleText[i];
if (c === '$') {
if (i > startIndex && ruleText[i - 1] === '\\') {
// Escaped, doing nothing
} else {
tokens.pattern = ruleText.substring(startIndex, i);
tokens.options = ruleText.substring(i + 1);
break;
}
}
}
return tokens;
}
/**
* Represents a /etc/hosts rule
* @typedef {Object} EtcHostsRule
* @property {String} ruleText - original rule text
* @property {Array<String>} hostnames - list of hostnames in the rule
*/
/**
* Extracts rule properties from an /etc/hosts entry.
*
* @param {String} ruleText - rule text
* @returns {EtcHostsRule} - rule properties
* @throws {TypeError} thrown if it is not a valid /etc/hosts rule
*/
function loadEtcHostsRuleProperties(ruleText) {
let rule = _.trim(ruleText);
if (rule.indexOf('#') > 0) {
rule = rule.substring(0, rule.indexOf('#'));
}
const [, ...hostnames] = _.trim(rule).split(/\s+/);
if (hostnames.length < 1) {
throw new TypeError(`Invalid /etc/hosts rule: ${ruleText}`);
}
return {
ruleText,
hostnames,
};
}
/**
* Represents an adblock-style rule
* @typedef {Object} AdblockRule
* @property {String} ruleText - original rule text
* @property {String} pattern - matching pattern
* @property {Array<{{name: string, value: string}}>} options - list of rule modifiers
* @property {Boolean} whitelist - whether this is an exception rule or not
* @property {String} hostname - hostname can only be extracted from the rules
* that look like `||[a-z0-9-.]^.*`
*/
/**
* Extracts hostname from the adblock rule pattern
*
* @param {String} pattern adblock rule pattern string
* @returns {String} hostname or null if cannot be extracted
*/
function extractHostname(pattern) {
const match = pattern.match(/^(@@)?\|\|([a-z0-9-.]+)\^$/);
const hostname = match ? match[2] : null;
return hostname || null;
}
/**
* Extracts rule properties from an adblock-style rule.
*
* @param {String} ruleText - rule text
* @returns {AdblockRule} - rule properties
*/
function loadAdblockRuleProperties(ruleText) {
const tokens = parseRuleTokens(_.trim(ruleText));
const rule = {
ruleText,
pattern: tokens.pattern,
whitelist: tokens.whitelist,
options: null, // to be filled later
hostname: extractHostname(tokens.pattern),
};
if (tokens.options) {
const optionParts = utils.splitByDelimiterWithEscapeCharacter(tokens.options, ',', '\\', false);
if (optionParts.length > 0) {
rule.options = [];
// eslint-disable-next-line no-restricted-syntax
for (const option of optionParts) {
const parts = _.split(option, '=', 2);
const name = parts[0];
const value = parts[1] ? parts[1] : null;
rule.options.push({
name,
value,
});
}
}
}
return rule;
}
/**
* Finds the specified modifier in the AdblockRule properties
*
* @param {AdblockRule} ruleProps - rule properies
* @param {String} name - modifier name
* @returns {{name: string, value: string} | null} modifier info or null if not found
*/
function findModifier(ruleProps, name) {
if (!ruleProps.options) {
return null;
}
// eslint-disable-next-line no-restricted-syntax
for (const option of ruleProps.options) {
if (option.name === name) {
return option;
}
}
return null;
}
/**
* Removes the specified modifier from the list of the rule modifiers
*
* @param {AdblockRule} ruleProps - rule properties
* @param {String} name - modifier name
* @returns {Boolean} true if there was such a modifier and it was removed
*/
function removeModifier(ruleProps, name) {
if (!ruleProps.options) {
return false;
}
let found = false;
for (let iOptions = ruleProps.options.length - 1; iOptions >= 0; iOptions -= 1) {
const option = ruleProps.options[iOptions];
if (option.name === name) {
ruleProps.options.splice(iOptions, 1);
found = true;
}
}
return found;
}
/**
* Converts {@link AdblockRule} to string.
*
* @param {AdblockRule} ruleProps - rule properties
* @returns {String} rule text
*/
function adblockRuleToString(ruleProps) {
let ruleText = '';
if (ruleProps.whitelist) {
ruleText = '@@';
}
ruleText += ruleProps.pattern;
if (!_.isEmpty(ruleProps.options)) {
ruleText += '$';
for (let i = 0; i < ruleProps.options.length; i += 1) {
const option = ruleProps.options[i];
ruleText += option.name;
if (option.value) {
ruleText += '=';
ruleText += option.value;
}
if (i < ruleProps.options.length - 1) {
ruleText += ',';
}
}
}
return ruleText;
}
module.exports = {
isComment,
isAllowRule,
isJustDomain,
isEtcHostsRule,
loadEtcHostsRuleProperties,
extractHostname,
loadAdblockRuleProperties,
findModifier,
removeModifier,
adblockRuleToString,
};