Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow optional rewrite rules before computing similarity scores during replay #499

Merged
merged 9 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,50 @@
import chalk from "chalk";
import program from "commander";
import fs from "fs-extra";
import { RewriteRule, RewriteRules } from "./rewrite";
import { RecordReplayServer } from "./server";

function commaSeparatedList(value: string) {
return value ? value.split(",") : [];
}

const RE_SED_INPUT_VALIDATION = /s\/(.+(?<!\\))\/(.+(?<!\\))\/([gims]*)/;
const RE_REPLACE_SED_CAPTURE_GROUPS = /(\\[1-9][0-9]*)/;

function rewriteRule(value: string, rewriteRules: RewriteRules): RewriteRules {
// Does the given value match a sed-style regex expression?
const match = RE_SED_INPUT_VALIDATION.exec(value);
if (match === null) {
throw new Error(
`Provided rewrite rule ${value} does not look like a sed-like rewrite rule.`
);
}
const [rawFind, rawReplace, rawFlags] = match.slice(1, 4);
HGyllensvard marked this conversation as resolved.
Show resolved Hide resolved

// Parse the found regex with the given regex flags.
let find: RegExp;
try {
find = new RegExp(rawFind, rawFlags);
} catch (e) {
if (e instanceof SyntaxError) {
throw new Error(`Find regex is syntactically invalid: ${e}`);
} else {
throw e;
}
}

// Convert sed-style \N capture group replacement values into JavaScript regex $N
// capture group replacement values.
const replace = rawReplace.replace(
RE_REPLACE_SED_CAPTURE_GROUPS,
(m) => "$" + m.substring(1)
);

// Append the new rule to the set of rules.
const rule = new RewriteRule(find, replace);
return rewriteRules.appendRule(rule);
}

async function main(argv: string[]) {
program
.option("-m, --mode <mode>", "Mode (record, replay or passthrough)")
Expand Down Expand Up @@ -40,6 +78,12 @@ async function main(argv: string[]) {
"--https-ca <filename.pem>",
"Enable HTTPS server where the certificate was generated by this CA. Useful if you are using a self-signed certificate. Also requires --https-key and --https-cert."
)
.option<RewriteRules>(
"--rewrite-before-diff [s/find/replace/g...]",
"Provide regex-based rewrite rules over strings before passing them to the diffing algorithm. The regex rules use sed-style syntax. s/find/replace/ with an optional regex modifier suffixes. Capture groups can be used using sed-style \\N syntax. This is only used during replaying existing tapes.",
rewriteRule,
new RewriteRules()
)
.parse(argv);

const initialMode: string = (program.mode || "").toLowerCase();
Expand All @@ -52,6 +96,7 @@ async function main(argv: string[]) {
const httpsCA: string = program.httpsCa || "";
const httpsKey: string = program.httpsKey;
const httpsCert: string = program.httpsCert;
const rewriteBeforeDiffRules: RewriteRules = program.rewriteBeforeDiff;

switch (initialMode) {
case "record":
Expand Down Expand Up @@ -102,6 +147,7 @@ async function main(argv: string[]) {
httpsCA,
httpsKey,
httpsCert,
rewriteBeforeDiffRules,
});
await server.start(port);
console.log(chalk.green(`Proxying in ${initialMode} mode on port ${port}.`));
Expand Down
7 changes: 5 additions & 2 deletions src/matcher.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { RewriteRules } from "./rewrite";
import { computeSimilarity } from "./similarity";
import { Headers, TapeRecord } from "./tape";

Expand Down Expand Up @@ -45,7 +46,8 @@ export function findRecordMatches(
requestMethod: string,
requestPath: string,
requestHeaders: Headers,
requestBody: Buffer
requestBody: Buffer,
rewriteBeforeDiffRules: RewriteRules
): TapeRecord[] {
let bestSimilarityScore = +Infinity;
let bestMatches: TapeRecord[] = [];
Expand All @@ -55,7 +57,8 @@ export function findRecordMatches(
requestPath,
requestHeaders,
requestBody,
potentialMatch
potentialMatch,
rewriteBeforeDiffRules
);

if (similarityScore < bestSimilarityScore) {
Expand Down
80 changes: 80 additions & 0 deletions src/rewrite.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { RewriteRule, RewriteRules } from "./rewrite";

describe("RewriteRule", () => {
njday marked this conversation as resolved.
Show resolved Hide resolved
describe("without capture groups", () => {
it("applies the expected changes", () => {
const rule = new RewriteRule(/-[a-z0-9]+-?/gi, "");
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Could we add a comment explaining the regex to the expressions in these tests? Will help identify the intention and to verify the output is expected

expect(rule.apply("")).toEqual("");
expect(rule.apply("chicken")).toEqual("chicken");
expect(rule.apply("one-TWO-thRee-fOUr")).toEqual("onethRee");
HGyllensvard marked this conversation as resolved.
Show resolved Hide resolved
});
});

describe("with capture groups", () => {
it("applies the expected changes", () => {
const rule1 = new RewriteRule(/\$([0-9]+)(\.[0-9]+)?/g, "£$1");
expect(rule1.apply("")).toEqual("");
expect(rule1.apply("chicken")).toEqual("chicken");
expect(rule1.apply("They are $5, $7.90, and $1024.9876.")).toEqual(
"They are £5, £7, and £1024."
);

const rule2 = new RewriteRule(
/-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(@example.com)$/gi,
"$1"
);
expect(
rule2.apply(
"jane.doe-some-test-6f82fbbe-d36a-4c5c-b47b-84100122fbbc@example.com"
)
).toEqual("[email protected]");
expect(
rule2.apply(
"jane.doe-some-test-6F82FBBE-D36A-4C5C-B47B-84100122FBBC@example.com"
)
).toEqual("[email protected]");
});
});
});

describe("RewriteRules", () => {
describe("when there are no rules", () => {
const rules = new RewriteRules();

it("does not mutate the given value at all", () => {
expect(rules.apply("chicken")).toEqual("chicken");
});
});

describe("when there is a simple rewrite rule", () => {
const rules = new RewriteRules().appendRule(new RewriteRule(/dog/, "cat"));

it("applies the expected changes on basic data types", () => {
expect(rules.apply("The dog dodges the doggie.")).toEqual(
"The cat dodges the doggie."
njday marked this conversation as resolved.
Show resolved Hide resolved
);
expect(rules.apply(null)).toEqual(null);
expect(rules.apply(undefined)).toEqual(undefined);
expect(rules.apply(123.4)).toEqual(123.4);
expect(rules.apply(true)).toEqual(true);
});

it("applies the expected changes on arrays", () => {
const a1 = ["cat", "dog", "fish", "bird"];
expect(rules.apply(a1)).toEqual(["cat", "cat", "fish", "bird"]);
expect(a1).toEqual(["cat", "dog", "fish", "bird"]);
expect(rules.apply([])).toEqual([]);
});

it("applies the expected changes on objects", () => {
const o1 = { dog: "woof", doggie: "wuff", xyz: "I hate dogs" };
expect(rules.apply(o1)).toEqual({
cat: "woof",
catgie: "wuff",
xyz: "I hate cats",
});
expect(o1).toEqual({ dog: "woof", doggie: "wuff", xyz: "I hate dogs" });

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"I hate dogs", blasphemy! 😂

expect(rules.apply({})).toEqual({});
});
});
});
63 changes: 63 additions & 0 deletions src/rewrite.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
export class RewriteRule {
find: RegExp;
replace: string;

constructor(find: RegExp, replace: string) {
this.find = find;
this.replace = replace;
}

apply(value: string): string {
return value.replace(this.find, this.replace);
}
}

export class RewriteRules {
private rules: RewriteRule[];

constructor() {
this.rules = [];
}

appendRule(rule: RewriteRule): RewriteRules {
this.rules.push(rule);
return this;
}

apply<T>(value: T): T {
timdawborn marked this conversation as resolved.
Show resolved Hide resolved
// Bail early if we have no rules to apply.
if (this.rules.length === 0) {
return value;
}

return this._apply(value);
}

private _apply<T>(value: T): T {
if (typeof value === "object" && value !== null) {
// If the object is an array, iterate through each element and call the function recursively
if (Array.isArray(value)) {
return (value.map((v) => this._apply(v)) as any) as T;
}

// If the object is not an array, create a new object with the same keys,
// and call the function recursively on each value
const oldObj = value as { [key: string]: any };
const newObj: { [key: string]: any } = {};
for (const key of Object.keys(oldObj)) {
const newKey = this._apply(key);
const newValue = this._apply(oldObj[key]);
newObj[newKey] = newValue;
}
return newObj as T;
} else if (typeof value === "string") {
let s = value as string;
for (const rule of this.rules) {
s = rule.apply(value);
}
return (s as any) as T;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question Why is this first converted to any to then immediately be casted to T?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type of s was resolved as a string with our manual check typeof value === "string" and subsequent cast let s = value as string. However the top level recursed function has a generic type T which it needs to return. string and T don't overlap and the cast to any is required to allow us to cast it to T afterwards

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a limitation of TypeScript. Line 54 casts the generic type T to string. We need to cast string back to T in order to return it. The compiler does not allow casting string directly to T as there's no overlap of the types. Going via any is the way you do a "cast to anything" cast in TypeScript.

} else {
return value;
njday marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
11 changes: 9 additions & 2 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { ensureBuffer } from "./buffer";
import { findNextRecordToReplay, findRecordMatches } from "./matcher";
import { Mode } from "./modes";
import { Persistence } from "./persistence";
import { RewriteRules } from "./rewrite";
import { Request, send } from "./sender";
import { TapeRecord } from "./tape";

Expand All @@ -27,6 +28,7 @@ export class RecordReplayServer {
private defaultTape: string;
private replayedTapes: Set<TapeRecord> = new Set();
private preventConditionalRequests?: boolean;
private rewriteBeforeDiffRules: RewriteRules;

constructor(options: {
initialMode: Mode;
Expand All @@ -40,6 +42,7 @@ export class RecordReplayServer {
httpsCA?: string;
httpsKey?: string;
httpsCert?: string;
rewriteBeforeDiffRules?: RewriteRules;
}) {
this.currentTapeRecords = [];
this.mode = options.initialMode;
Expand All @@ -50,6 +53,8 @@ export class RecordReplayServer {
this.persistence = new Persistence(options.tapeDir, redactHeaders);
this.defaultTape = options.defaultTapeName;
this.preventConditionalRequests = options.preventConditionalRequests;
this.rewriteBeforeDiffRules =
options.rewriteBeforeDiffRules || new RewriteRules();
this.loadTape(this.defaultTape);

const handler = async (
Expand Down Expand Up @@ -262,7 +267,8 @@ export class RecordReplayServer {
request.method,
request.path,
request.headers,
request.body
request.body,
this.rewriteBeforeDiffRules
),
this.replayedTapes
);
Expand Down Expand Up @@ -324,7 +330,8 @@ export class RecordReplayServer {
request.method,
request.path,
request.headers,
request.body
request.body,
this.rewriteBeforeDiffRules
),
this.replayedTapes
);
Expand Down
Loading