-
Notifications
You must be signed in to change notification settings - Fork 0
/
approxihash.py
executable file
·112 lines (89 loc) · 4.35 KB
/
approxihash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
import argparse
import hashlib
import sys
import json
import itertools
AVAILABLE_HASH_TYPES = ["md5", "sha1", "sha256", "sha384", "sha512", "ALL"]
class GeneratedHash(object):
def __init__(self, data_dict, hash_types):
""" Create a new GeneratedHash object.
Args:
data_dict: a dict of string:string mappings only
Returns:
Nothing
"""
self.data_dict = data_dict
if hash_types == ["ALL"]:
self.hash_types = [ hashfn for hashfn in AVAILABLE_HASH_TYPES if hashfn != "ALL" ]
else:
self.hash_types = hash_types
def generate_hashes(self, divider="", min_combo_selections=2):
""" Generate and return all the hashes.
Args:
divider: optional string to join elements with
min_combo_selections: minimum number of elements to use to generate a hash
Returns:
A dictionary of hash: parameter mappings
"""
hash_results = {}
current_keys = list(self.data_dict.keys())
# Use from min_combo_selections to the max number
for num_elements in range(min_combo_selections, len(list(self.data_dict.keys()))+1):
# Get the combination of num_elements values
for combination in itertools.permutations(current_keys, num_elements):
combo_fields = [ self.data_dict[elem] for elem in combination]
joined_combo = divider.join(combo_fields)
for hashfn in self.hash_types:
combo_hash = hashlib.new(hashfn, joined_combo.encode("utf-8"))
hash_results[combo_hash.hexdigest()] = {"fields": combo_fields,
"hashfn": hashfn}
return hash_results
def main(hashfn, input_dict, min_combo_selection, divider, verbose):
a = GeneratedHash(input_dict, hashfn)
hashes = a.generate_hashes(divider, min_combo_selection)
for hash_str, hash_attrs in hashes.items():
if verbose:
print("{} {} {}".format(hash_str, divider.join(hash_attrs["fields"]), hash_attrs["hashfn"]))
else:
print(hash_str)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Generate a series of hashes based on key-value datasets')
parser.add_argument("--hashfn", "-H", dest="hashfn", action="store",
help="Hashing function(s) to use.", choices=AVAILABLE_HASH_TYPES,
nargs="+", default=["md5"])
parser.add_argument("--json", "-j", dest="json", action="store_true",
help="Parse STDIN as JSON key-value data")
parser.add_argument("--min-combinations", "-m", dest="min_combo_selection",
action="store", type=int, default=1,
help="Minimum number of items to pick combinations of")
parser.add_argument("--keyval", "-k", dest="keyvals", action="store", nargs="+",
help="A key:value pair.", default=[])
parser.add_argument("--divider", "-D", dest="divider", action="store", default="",
help="Divider placed between attributes when generating hashes.")
parser.add_argument("--verbose", "-v", dest="verbose", action="store_true",
help="Annotate hashes with the fields used to generate them",
default=False)
args = parser.parse_args()
input_dict = {}
if args.json and not sys.stdin.isatty():
try:
input_dict.update(json.load(sys.stdin))
except ValueError as e:
sys.stderr.write("Failed to load input - is it valid JSON? {}\n".format(e))
sys.exit(254)
if not all([ type(i) == str or type(i) == int for i in input_dict.values() ]):
# for now any nested dicts or lists don't fit with our model
sys.stderr.write("Not all JSON keys are integers or strings - can't operate on this data\n")
sys.exit(253)
for key, val in input_dict.items():
if type(val) == int:
input_dict[key] = str(val)
else:
arg_data_dict = {}
for key, val in [ i.split(":") for i in args.keyvals ]:
arg_data_dict[key] = val
input_dict.update(arg_data_dict)
main(args.hashfn, input_dict, args.min_combo_selection,
args.divider, args.verbose)