-
Notifications
You must be signed in to change notification settings - Fork 69
/
sourcerer-cc.properties
36 lines (30 loc) · 829 Bytes
/
sourcerer-cc.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#Sourcerer-CC-D specific
NODE_PREFIX=NODE
QUERY_DIR_PATH=query
OUTPUT_DIR=output
DATASET_DIR_PATH=input/dataset
IS_GEN_CANDIDATE_STATISTICS=false
IS_STATUS_REPORTER_ON=true
#for recovery
LOG_PROCESSED_LINENUMBER_AFTER_X_LINES=50
# Ignore all files outside these bounds
MIN_TOKENS=65
MAX_TOKENS=500000
# Sharding speeds up search for very large datasets (>200K files).
# For small-ish datasets, it doesn't matter so much
IS_SHARDING=true
SHARD_MAX_NUM_TOKENS=65,100,300,500000
# The next few variables serve for tuning performance.
# Their values depend, in part, on how many cores are available.
# The default values work well for 1 single SourcererCC process
# on an 8-core machine.
# INDEXING
BTSQ_THREADS=4
BTIIQ_THREADS=4
BTFIQ_THREADS=4
# SEARCH
QLQ_THREADS=4
QBQ_THREADS=4
QCQ_THREADS=4
VCQ_THREADS=16
RCQ_THREADS=4