Skip to content

Commit

Permalink
feat(regexp): add escape function (denoland#3334)
Browse files Browse the repository at this point in the history
  • Loading branch information
lionel-rowe authored and mxdvl committed May 16, 2023
1 parent d4f79d8 commit b0b5185
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 0 deletions.
83 changes: 83 additions & 0 deletions regexp/escape.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

// // For future forward-compatibility with regexp `v` flag, reservedCharMap is
// // autogenerated from the ClassSetReservedDoublePunctuator,
// // ClassSetSyntaxCharacter, and ClassSetReservedPunctuator categories in the
// // draft spec.
// // See https://github.com/tc39/proposal-regexp-v-flag#how-is-the-v-flag-different-from-the-u-flag
// // and https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetReservedDoublePunctuator
// const reservedChars = [...new Set(['ClassSetReservedDoublePunctuator', 'ClassSetSyntaxCharacter', 'ClassSetReservedPunctuator'].map(n =>
// document.querySelector(`[name=${n}] emu-rhs`).textContent.replaceAll(/\s/g, '')
// ).join(''))]
// const reservedCharMap = Object.fromEntries(reservedChars
// .map(x => {
// try {
// for (const flag of 'gimsuy') {
// new RegExp(`\\${x}`, flag)
// new RegExp(`[\\${x}]`, flag)
// }
// return [x, `\\${x}`]
// } catch (e) {
// return [x, `\\x${x.codePointAt(0).toString(16).padStart(2, '0')}`]
// }
// }))

const reservedCharMap = {
"&": "\\x26",
"!": "\\x21",
"#": "\\x23",
"$": "\\$",
"%": "\\x25",
"*": "\\*",
"+": "\\+",
",": "\\x2c",
".": "\\.",
":": "\\x3a",
";": "\\x3b",
"<": "\\x3c",
"=": "\\x3d",
">": "\\x3e",
"?": "\\?",
"@": "\\x40",
"^": "\\^",
"`": "\\x60",
"~": "\\x7e",
"(": "\\(",
")": "\\)",
"[": "\\[",
"]": "\\]",
"{": "\\{",
"}": "\\}",
"/": "\\/",
"-": "\\x2d",
"\\": "\\\\",
"|": "\\|",
};

const RX_REGEXP_ESCAPE = new RegExp(
`[${Object.values(reservedCharMap).join("")}]`,
"gu",
);

/**
* Escapes arbitrary text for interpolation into a regexp, such that it will
* match exactly that text and nothing else.
*
* @example
* ```ts
* import { escape } from "https://deno.land/std@$STD_VERSION/regexp/mod.ts";
* import { assertEquals, assertMatch, assertNotMatch } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts";
*
* const re = new RegExp(`^${escape(".")}$`, "u");
*
* assertEquals("^\\.$", re.source);
* assertMatch(".", re);
* assertNotMatch("a", re);
* ```
*/
export function escape(str: string) {
return str.replaceAll(
RX_REGEXP_ESCAPE,
(m) => reservedCharMap[m as keyof typeof reservedCharMap],
);
}
100 changes: 100 additions & 0 deletions regexp/escape_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

import { escape } from "./escape.ts";
import {
assertEquals,
assertMatch,
assertNotMatch,
} from "../testing/asserts.ts";

const ALL_ASCII =
"\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F";
const ALL_REGEXP_FLAGS = "gimsuy";

Deno.test("regexp", async (t) => {
await t.step("escape", async (t) => {
await t.step("examples", async (t) => {
await t.step("`.` matches literal `.`", () => {
const re = new RegExp(`^${escape(".")}$`, "u");

assertEquals("^\\.$", re.source);
assertMatch(".", re);
assertNotMatch("a", re);
});
await t.step("`$` matches literal `$`", () => {
const re = new RegExp(`^${escape("$")}$`);

assertMatch("$", re);
assertNotMatch("", re);
});
await t.step("`*` matches literal `*`", () => {
const re = new RegExp(`^${escape("a*")}$`);

assertMatch("a*", re);
assertNotMatch("", re);
assertNotMatch("aaa", re);
});
await t.step("escapes work correctly within character class", () => {
const re = new RegExp(`^[${escape(".$*+[](){}|\\<>")}]$`);

assertMatch(".", re);
assertMatch("$", re);
assertMatch("*", re);
assertMatch("+", re);
assertMatch("[", re);
assertMatch("]", re);
assertMatch("(", re);
assertMatch(")", re);
assertMatch("{", re);
assertMatch("}", re);
assertMatch("|", re);
assertMatch("\\", re);
assertMatch("<", re);
assertMatch(">", re);

assertNotMatch("a", re);
});
});
await t.step("all ASCII", async (t) => {
await t.step("interpolates without erroring", async (t) => {
await t.step("outside character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
new RegExp(escape(char), flag);
}
}
});
await t.step("within character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
new RegExp(`[${escape(char)}]`, flag);
}
}
});
await t.step("matches self", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
assertMatch(char, new RegExp(`^${escape(char)}$`, flag));
}
}
});
await t.step("doesn't match any other chars", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
if (flag === "i") continue;

for (const char2 of ALL_ASCII) {
if (char2 === char) continue;

assertNotMatch(
char2,
new RegExp(`^${escape(char)}$`, flag),
);
}
}
}
});
});
});
});
});
11 changes: 11 additions & 0 deletions regexp/mod.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.

/**
* Functions for tasks related to regular expression (regexps), such as
* escaping text for interpolation into a regexp
*
* @module
*/

export * from "./escape.ts";

0 comments on commit b0b5185

Please sign in to comment.