forked from denoland/std
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(regexp): add escape function (denoland#3334)
- Loading branch information
1 parent
d4f79d8
commit b0b5185
Showing
3 changed files
with
194 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. | ||
|
||
// // For future forward-compatibility with regexp `v` flag, reservedCharMap is | ||
// // autogenerated from the ClassSetReservedDoublePunctuator, | ||
// // ClassSetSyntaxCharacter, and ClassSetReservedPunctuator categories in the | ||
// // draft spec. | ||
// // See https://github.com/tc39/proposal-regexp-v-flag#how-is-the-v-flag-different-from-the-u-flag | ||
// // and https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetReservedDoublePunctuator | ||
// const reservedChars = [...new Set(['ClassSetReservedDoublePunctuator', 'ClassSetSyntaxCharacter', 'ClassSetReservedPunctuator'].map(n => | ||
// document.querySelector(`[name=${n}] emu-rhs`).textContent.replaceAll(/\s/g, '') | ||
// ).join(''))] | ||
// const reservedCharMap = Object.fromEntries(reservedChars | ||
// .map(x => { | ||
// try { | ||
// for (const flag of 'gimsuy') { | ||
// new RegExp(`\\${x}`, flag) | ||
// new RegExp(`[\\${x}]`, flag) | ||
// } | ||
// return [x, `\\${x}`] | ||
// } catch (e) { | ||
// return [x, `\\x${x.codePointAt(0).toString(16).padStart(2, '0')}`] | ||
// } | ||
// })) | ||
|
||
const reservedCharMap = { | ||
"&": "\\x26", | ||
"!": "\\x21", | ||
"#": "\\x23", | ||
"$": "\\$", | ||
"%": "\\x25", | ||
"*": "\\*", | ||
"+": "\\+", | ||
",": "\\x2c", | ||
".": "\\.", | ||
":": "\\x3a", | ||
";": "\\x3b", | ||
"<": "\\x3c", | ||
"=": "\\x3d", | ||
">": "\\x3e", | ||
"?": "\\?", | ||
"@": "\\x40", | ||
"^": "\\^", | ||
"`": "\\x60", | ||
"~": "\\x7e", | ||
"(": "\\(", | ||
")": "\\)", | ||
"[": "\\[", | ||
"]": "\\]", | ||
"{": "\\{", | ||
"}": "\\}", | ||
"/": "\\/", | ||
"-": "\\x2d", | ||
"\\": "\\\\", | ||
"|": "\\|", | ||
}; | ||
|
||
const RX_REGEXP_ESCAPE = new RegExp( | ||
`[${Object.values(reservedCharMap).join("")}]`, | ||
"gu", | ||
); | ||
|
||
/** | ||
* Escapes arbitrary text for interpolation into a regexp, such that it will | ||
* match exactly that text and nothing else. | ||
* | ||
* @example | ||
* ```ts | ||
* import { escape } from "https://deno.land/std@$STD_VERSION/regexp/mod.ts"; | ||
* import { assertEquals, assertMatch, assertNotMatch } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts"; | ||
* | ||
* const re = new RegExp(`^${escape(".")}$`, "u"); | ||
* | ||
* assertEquals("^\\.$", re.source); | ||
* assertMatch(".", re); | ||
* assertNotMatch("a", re); | ||
* ``` | ||
*/ | ||
export function escape(str: string) { | ||
return str.replaceAll( | ||
RX_REGEXP_ESCAPE, | ||
(m) => reservedCharMap[m as keyof typeof reservedCharMap], | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. | ||
|
||
import { escape } from "./escape.ts"; | ||
import { | ||
assertEquals, | ||
assertMatch, | ||
assertNotMatch, | ||
} from "../testing/asserts.ts"; | ||
|
||
const ALL_ASCII = | ||
"\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; | ||
const ALL_REGEXP_FLAGS = "gimsuy"; | ||
|
||
Deno.test("regexp", async (t) => { | ||
await t.step("escape", async (t) => { | ||
await t.step("examples", async (t) => { | ||
await t.step("`.` matches literal `.`", () => { | ||
const re = new RegExp(`^${escape(".")}$`, "u"); | ||
|
||
assertEquals("^\\.$", re.source); | ||
assertMatch(".", re); | ||
assertNotMatch("a", re); | ||
}); | ||
await t.step("`$` matches literal `$`", () => { | ||
const re = new RegExp(`^${escape("$")}$`); | ||
|
||
assertMatch("$", re); | ||
assertNotMatch("", re); | ||
}); | ||
await t.step("`*` matches literal `*`", () => { | ||
const re = new RegExp(`^${escape("a*")}$`); | ||
|
||
assertMatch("a*", re); | ||
assertNotMatch("", re); | ||
assertNotMatch("aaa", re); | ||
}); | ||
await t.step("escapes work correctly within character class", () => { | ||
const re = new RegExp(`^[${escape(".$*+[](){}|\\<>")}]$`); | ||
|
||
assertMatch(".", re); | ||
assertMatch("$", re); | ||
assertMatch("*", re); | ||
assertMatch("+", re); | ||
assertMatch("[", re); | ||
assertMatch("]", re); | ||
assertMatch("(", re); | ||
assertMatch(")", re); | ||
assertMatch("{", re); | ||
assertMatch("}", re); | ||
assertMatch("|", re); | ||
assertMatch("\\", re); | ||
assertMatch("<", re); | ||
assertMatch(">", re); | ||
|
||
assertNotMatch("a", re); | ||
}); | ||
}); | ||
await t.step("all ASCII", async (t) => { | ||
await t.step("interpolates without erroring", async (t) => { | ||
await t.step("outside character class", () => { | ||
for (const char of ALL_ASCII) { | ||
for (const flag of ALL_REGEXP_FLAGS) { | ||
new RegExp(escape(char), flag); | ||
} | ||
} | ||
}); | ||
await t.step("within character class", () => { | ||
for (const char of ALL_ASCII) { | ||
for (const flag of ALL_REGEXP_FLAGS) { | ||
new RegExp(`[${escape(char)}]`, flag); | ||
} | ||
} | ||
}); | ||
await t.step("matches self", () => { | ||
for (const char of ALL_ASCII) { | ||
for (const flag of ALL_REGEXP_FLAGS) { | ||
assertMatch(char, new RegExp(`^${escape(char)}$`, flag)); | ||
} | ||
} | ||
}); | ||
await t.step("doesn't match any other chars", () => { | ||
for (const char of ALL_ASCII) { | ||
for (const flag of ALL_REGEXP_FLAGS) { | ||
if (flag === "i") continue; | ||
|
||
for (const char2 of ALL_ASCII) { | ||
if (char2 === char) continue; | ||
|
||
assertNotMatch( | ||
char2, | ||
new RegExp(`^${escape(char)}$`, flag), | ||
); | ||
} | ||
} | ||
} | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. | ||
// This module is browser compatible. | ||
|
||
/** | ||
* Functions for tasks related to regular expression (regexps), such as | ||
* escaping text for interpolation into a regexp | ||
* | ||
* @module | ||
*/ | ||
|
||
export * from "./escape.ts"; |