-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.sh
executable file
·128 lines (120 loc) · 3.55 KB
/
convert.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/bin/bash
icd_g='https://purl.bioontology.org/ontology/ICD9CM/'
snomed_g='https://purl.bioontology.org/ontology/SNOMEDCT/'
predicate='skos:relation'
map_mode='one'
function usage {
echo "Usage: $0 [-i URL] [-s URL] [-p PRED] [-f FILE] [-m] [-I] [-S]"
echo " -i URL Set the ICD9CM graph URL"
echo " -s URL Set the SNOMEDCT graph URL"
echo " -p URL Set the predicate to use for annotations"
echo " -f FILE Set path for input mapping file"
echo " -m Process 1-to-many file instead of 1-to-1 file"
echo " -I Generate ICD9CM class annotations"
echo " -S Generate SNOMEDCT class annotations"
exit 0
}
while getopts ':hi:s:p:f:mIS' OPTION; do
case "$OPTION" in
i)
icd_g="$OPTARG"
;;
s)
snomed_g="$OPTARG"
;;
p)
predicate="$OPTARG"
;;
f)
mapping_file="$OPTARG"
;;
m)
map_mode='many'
;;
I)
gen_mode='icd'
;;
S)
gen_mode='snomed'
;;
h*)
usage
;;
esac
done
# If no -f mapping_file specified then we shall guess
# the file name assuming it was extracted directly from the zip
# based on whether we are in 1-to-1 or 1-to-many processing mode
if [ "${map_mode}" == 'one' ]; then
map_stub='ICD9CM_SNOMED_MAP_1TO1_'
else
map_stub='ICD9CM_SNOMED_MAP_1TOM_'
fi
# Set the map_file
#
# If -f mapping_file provided, attempt to use that (if exists)
# and throw error if not found
#
# If no -f mapping_file then attempt to find the txt file as
# it is named in the zip
# https://www.nlm.nih.gov/research/umls/mapping_projects/icd9cm_to_snomedct.html
if [ -s "${mapping_file}" ]; then
if [ -f "${mapping_file}" ]; then
map_file="${mapping_file}"
else
echo "Map file does not exist" >&2
exit 1
fi
elif [ -f "${map_stub}202012.txt" ]; then
map_file="${map_stub}202012.txt"
else
echo "No map file found, expected e.g. './ICD9CM_SNOMED_MAP_1TO1_202012.txt'" >&2
exit 1
fi
# Turtle output, specifying prefix makes file smaller
# and doesn't require passing graph URLs to awk
echo "@prefix skos: <https://www.w3.org/2004/02/skos/core#> ."
echo "@prefix icd9cm: <${icd_g}> ."
echo "@prefix snomedct: <${snomed_g}> ."
echo ""
echo ""
# ICD and SNOMED both have a 1-to-1 and 1-to-many processing mode
# ICD mode annotates ICD concepts with predicate to the snomed concepts
# SNOMED mode annotates SNOMED concepts with predicate to the ICD concepts
#
# 1-to-1 provides a simple mapping and is easier to work with but misses
# some concepts
# See URL further up in script for more info provided by NLM
#
# First thing done in any mode is skip the TSV header row
# 1-to-many mode has an extra check as the 1-to-many map file may
# have NULL snomed codes for some rows
if [ "$gen_mode" == "icd" ] && [ "$map_mode" == 'one' ]; then
awk -v FS='\t' \
-v 'PRED='"${predicate}" \
'(NR>1) { print "icd9cm:" $1 " " PRED " snomedct:" $8 " ." }' \
"$map_file"
elif [ "$gen_mode" == "icd" ] && [ "$map_mode" == 'many' ]; then
awk -v FS='\t' \
-v 'PRED='"${predicate}" \
-f- "$map_file" <<'EOF'
(NR>1 && $8 != "NULL") {
print "icd9cm:" $1 " " PRED " snomedct:" $8 " ."
}
EOF
elif [ "$gen_mode" == "snomed" ] && [ "$map_mode" == 'one' ]; then
awk -v FS='\t' \
-v 'PRED='"${predicate}" \
'(NR>1) { print "snomedct:" $8 " " PRED " icd9cm:" $1 " ." }' \
"$map_file"
elif [ "$gen_mode" == "snomed" ] && [ "$map_mode" == 'many' ]; then
awk -v FS='\t' \
-v 'PRED='"${predicate}" \
-f- "$map_file" <<'EOF'
(NR>1 && $8 != "NULL") {
print "snomedct:" $8 " " PRED " icd9cm:" $1 " ."
}
EOF
else
usage
fi