forked from dqbd/tiktoken
-
Notifications
You must be signed in to change notification settings - Fork 2
/
redact.py
67 lines (52 loc) · 1.7 KB
/
redact.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import argparse
import re
import subprocess
from pathlib import Path
def redact_file(path: Path, dry_run: bool) -> None:
if not path.exists() or path.is_dir():
return
text = path.read_text()
if not text:
return
first_line = text.splitlines()[0]
if "redact" in first_line:
if not dry_run:
path.unlink()
print(f"Deleted {path}")
return
pattern = "|".join(
re.escape(x)
for x in [
"# ===== redact-beg =====\n",
"# ===== redact-end =====\n",
"<!--- redact-beg -->\n",
"<!--- redact-end -->\n",
]
)
if re.search(pattern, text):
redacted_text = "".join(re.split(pattern, text)[::2])
if not dry_run:
path.write_text(redacted_text)
print(f"Redacted {path}")
return
print(f"Skipped {path}")
def redact(dry_run: bool) -> None:
tiktoken_root = Path(__file__).parent.parent
assert tiktoken_root.name == "tiktoken"
assert (tiktoken_root / "pyproject.toml").exists()
try:
output = subprocess.check_output(["git", "ls-files"], cwd=tiktoken_root, text=True)
paths = [Path(p) for p in output.splitlines()]
except subprocess.CalledProcessError:
paths = list(tiktoken_root.glob("**/*"))
for path in paths:
redact_file(path, dry_run=dry_run)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", type=lambda x: not x or x[0].lower() != "f", default=True)
args = parser.parse_args()
redact(args.dry_run)
if args.dry_run:
print("Dry run, use --dry-run=false to actually redact files")
if __name__ == "__main__":
main()