-
Notifications
You must be signed in to change notification settings - Fork 0
/
ehr_analysis.py
127 lines (116 loc) · 4.73 KB
/
ehr_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""The input data for parse_data function should be text files which entries
are separated by tabs.
"""
from logging import raiseExceptions
import sqlite3
con = sqlite3.connect("ehr.db")
cur = con.cursor()
def parse_patient_data(filename: str) -> None:
"""Read the file and parse the data files into a SQLite table.
The analysis of computational complexity is based on the assumption that
the input data is N by M.
"""
cur.execute(
"CREATE TABLE IF NOT EXISTS Patient([id] TEXT PRIMARY KEY , [gender] TEXT, [dob] TEXT, [age] FLOAT)"
) # O(1)
with open(filename, encoding="utf-8-sig") as p: # O(1)
lines = p.readlines() # O(1)
col_name = [] # O(1)
first_row = True # O(1)
for line in lines: # O(N)
line = line.strip() # O(1)
if first_row: # O(1)
col_name = line.split() # O(1)
first_row = False # O(1)
elif not first_row: # O(1)
dic = {} # O(1)
dat = line.split("\t") # O(1)
for count, ele in enumerate(dat, 0): # O(M)
dic[col_name[count]] = dat[count] # O(1)
lst = [
dic["PatientID"],
dic["PatientGender"],
dic["PatientDateOfBirth"],
0.0,
] # O(4)
cur.execute(
"INSERT or REPLACE INTO Patient VALUES (?, ?, ?, ?)", lst
) # O(1)
return
# The function has computational complexity 0(NM)
def parse_lab_data(filename: str) -> None:
"""Read the file and parse the data files into a SQLite table.
The analysis of computational complexity is based on the assumption that
the input data is N by M.
"""
# Create a table for Lab
cur.execute(
"CREATE TABLE IF NOT EXISTS Lab([id] TEXT, [admission] INTEGER, [name] TEXT, \
[value] FLOAT, [units] TEXT, [time] TEXT)"
) # O(1)
with open(filename, encoding="utf-8-sig") as p: # O(1)
lines = p.readlines() # O(1)
col_name = [] # O(1)
first_row = True # O(1)
for line in lines: # O(N)
line = line.strip() # O(1)
if first_row: # O(1)
col_name = line.split() # O(1)
first_row = False # O(1)
elif not first_row: # O(1)
dic = {} # O(1)
dat = line.split("\t") # O(1)
for count, ele in enumerate(dat, 0): # O(M)
dic[col_name[count]] = dat[count] # O(1)
lst = [
dic["PatientID"],
dic["AdmissionID"],
dic["LabName"],
dic["LabValue"],
dic["LabUnits"],
dic["LabDateTime"],
] # O(6)
cur.execute(
"INSERT or REPLACE INTO Lab VALUES (?, ?, ?, ?, ?, ?)", lst
) # O(1)
return
# The function has computational complexity 0(NM)
def num_older_than(given_age: float, cursor) -> int:
"""Returns the number of patients older than a given age."""
cursor.execute(
"UPDATE Patient SET age = (strftime('%Y', 'now') - strftime('%Y', dob))\
- (strftime('%m-%d', 'now') < strftime('%m-%d', dob))"
)
ct = cursor.execute("SELECT COUNT(*) FROM Patient WHERE age > ?", (given_age,))
return list(ct)[0][0]
# The function has computational complexity 0(1)
def sick_patients(lab: str, gt_lt: str, value: float, cursor) -> list:
"""Returns a unique list of patients who have a given test with
value above (">") or below ("<") a given value.
"""
if gt_lt == ">":
names = cursor.execute(
"SELECT DISTINCT id FROM Lab WHERE name = ? AND value > ?", (lab, value)
)
elif gt_lt == "<":
names = cursor.execute(
"SELECT DISTINCT id FROM Lab WHERE name = ? AND value < ?", (lab, value)
)
else:
raise ValueError("gt_lt should be either < or >")
return list(names)
# The function has computational complexity 0(1)
def first_admission(id: str, name: str, cursor) -> int:
"""Returns the age of the patient's (specified by id)
first admission to the lab(specified by lab_name).
"""
visit_age = cursor.execute(
"SELECT MIN((strftime('%Y', time) - strftime('%Y', (SELECT dob FROM \
Patient WHERE id = ?))) - (strftime('%m-%d', time) < \
strftime('%m-%d', (SELECT dob FROM Patient WHERE \
id = ?)))) FROM Lab WHERE id = ? AND \
admission = 1 AND name = ?",
(id, id, id, name),
)
return list(visit_age)[0][0]
# The function has computational complexity 0(1)