Skip to content

Commit

Permalink
Lint fixes using black
Browse files Browse the repository at this point in the history
  • Loading branch information
ayan-b committed Jun 3, 2019
1 parent 76b792c commit 02b9ead
Show file tree
Hide file tree
Showing 12 changed files with 876 additions and 488 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@

# W605: invalid escape sequence '\*'
# W503: line break before binary operator
# E203: whitespace before ':'
ignore = W605, W503
24 changes: 11 additions & 13 deletions tests/test_gdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,16 @@ def test_simple_and_filter():
expected = {
"content": [
{"content": {"field": "a", "value": ["b"]}, "op": "in"},
{"content": {"field": "c", "value": ["d"]}, "op": "exclude"}
{"content": {"field": "c", "value": ["d"]}, "op": "exclude"},
],
"op": "and"
"op": "and",
}
actual = gdc.simple_and_filter(in_dict_2, exclude_dict_2)
compare_dict(expected, actual)


def test_reduce_json_array():
input_1 = [{
'a': 'hello',
'b': [1, 2, 3],
'c': [10]
}]
input_1 = [{'a': 'hello', 'b': [1, 2, 3], 'c': [10]}]
input_2 = [{'a': 'b'}]
actual_1 = gdc.reduce_json_array(input_1)
expected_1 = {"a": "hello", "b": [1, 2, 3], "c": 10}
Expand Down Expand Up @@ -79,7 +75,7 @@ def test_get_project_info():
"name": "Cystadenocarcinoma",
"primary_site": "Ovary",
"program.name": "TCGA",
"project_id": "TCGA-OV"
"project_id": "TCGA-OV",
}
actual.equals(expected)

Expand All @@ -99,12 +95,13 @@ def test_search():
actual = gdc.search(endpoint=endpoint, in_filter=in_filter, fields=fields)
expected = {
"id": "d1a15919-f5e2-5e60-aed9-cb52a8b4a7a1",
"target": "TARGET-51-PAKWMM"
"target": "TARGET-51-PAKWMM",
}
actual.equals(expected)
with pytest.raises(ValueError) as exception_info:
gdc.search(endpoint=endpoint, in_filter=in_filter, fields=fields,
method="PUT")
gdc.search(
endpoint=endpoint, in_filter=in_filter, fields=fields, method="PUT"
)
error_str = 'Invalid method: PUT\n method must be either "GET" or "POST".'
assert exception_info.value.args[0] == error_str

Expand All @@ -116,7 +113,8 @@ def test_gdc_check_new(capfd):
gdc.gdc_check_new(new_file_uuids)
out, err = capfd.readouterr()
actual = pd.read_csv(StringIO(out), sep='\t')
expected = pd.read_csv("tests/fixtures/gdc_check_new_DR9.0_files_swap.csv",
sep='\t')
expected = pd.read_csv(
"tests/fixtures/gdc_check_new_DR9.0_files_swap.csv", sep='\t'
)
expected = expected.head()
actual.equals(expected)
89 changes: 54 additions & 35 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,29 @@ def test_xena_eql(self):
assert parsed.df2 == "df2"

def test_gdc_check_new(self):
parsed = self.parser.parse_args(["gdc-check-new",
"https://example.com/data.gz"])
parsed = self.parser.parse_args(
["gdc-check-new", "https://example.com/data.gz"]
)
assert parsed.subcomm == "gdc-check-new"
assert parsed.url == "https://example.com/data.gz"

def test_merge_xena(self):
parsed = self.parser.parse_args(["merge-xena", "-f", "path/to/matrix1",
"path/to/matrix2", "-t", "datatype",
"-o", "path/to/dir", "-n", "new_name",
"-c", "cohort_name"])
parsed = self.parser.parse_args(
[
"merge-xena",
"-f",
"path/to/matrix1",
"path/to/matrix2",
"-t",
"datatype",
"-o",
"path/to/dir",
"-n",
"new_name",
"-c",
"cohort_name",
]
)
assert parsed.subcomm == "merge-xena"
assert parsed.files == ["path/to/matrix1", "path/to/matrix2"]
assert parsed.datatype == "datatype"
Expand All @@ -39,46 +52,52 @@ def test_merge_xena(self):
assert parsed.cohort == "cohort_name"

def test_etl(self):
parsed = self.parser.parse_args([
"etl",
"-r",
"path/to/dir",
"-p",
"project_name",
"-t",
"datatype",
])
parsed = self.parser.parse_args(
[
"etl",
"-r",
"path/to/dir",
"-p",
"project_name",
"-t",
"datatype",
]
)
assert parsed.subcomm == "etl"
assert parsed.root == "path/to/dir"
assert parsed.projects == ["project_name"]
assert parsed.datatype == ["datatype"]
# for mutually exclusive groups
parsed = self.parser.parse_args([
"etl",
"-r",
"path/to/dir",
"-P",
"not_this_project_name",
"-T",
"not_this_datatype",
])
parsed = self.parser.parse_args(
[
"etl",
"-r",
"path/to/dir",
"-P",
"not_this_project_name",
"-T",
"not_this_datatype",
]
)
assert parsed.subcomm == "etl"
assert parsed.root == "path/to/dir"
assert parsed.not_projects == ["not_this_project_name"]
assert parsed.not_datatype == ["not_this_datatype"]

def test_metaparser(self):
parsed = self.parser.parse_args([
"metadata",
"-p",
"project_name",
"-t",
"datatype",
"-m",
"path/to/matrix",
"-r",
"10",
])
parsed = self.parser.parse_args(
[
"metadata",
"-p",
"project_name",
"-t",
"datatype",
"-m",
"path/to/matrix",
"-r",
"10",
]
)
assert parsed.subcomm == "metadata"
assert parsed.project == "project_name"
assert parsed.datatype == "datatype"
Expand Down
5 changes: 3 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ def compare_dict(dict_1, dict_2):
>>> compare_dict({'a': 'b'}, {'a': 'b'})
True
"""
return json.dumps(dict_1, sort_keys=True) == json.dumps(dict_2,
sort_keys=True)
return json.dumps(dict_1, sort_keys=True) == json.dumps(
dict_2, sort_keys=True
)
41 changes: 23 additions & 18 deletions xena_gdc_etl/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
GDC_RELEASE_URL = 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/' # noqa: E501
GDC_RELEASE_URL = (
'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/'
) # noqa: E501

# Map GDC project_id to Xena specific cohort name.
GDC_XENA_COHORT = {
Expand Down Expand Up @@ -34,7 +36,7 @@
'TCGA-KICH': 'GDC TCGA Kidney Chromophobe (KICH)',
'TCGA-UCS': 'GDC TCGA Uterine Carcinosarcoma (UCS)',
'TCGA-CHOL': 'GDC TCGA Bile Duct Cancer (CHOL)',
'TCGA-DLBC': 'GDC TCGA Large B-cell Lymphoma (DLBC)'
'TCGA-DLBC': 'GDC TCGA Large B-cell Lymphoma (DLBC)',
}

# Map xena_dtype to corresponding metadata template.
Expand All @@ -53,36 +55,39 @@
'GDC_phenotype': 'template.phenotype.meta.json',
'survival': 'template.survival.meta.json',
'methylation27': 'template.methylation.meta.json',
'methylation450': 'template.methylation.meta.json'
'methylation450': 'template.methylation.meta.json',
}

# Jinja2 template variables for corresponding "xena_dtype".
METADATA_VARIABLES = {
'htseq_counts': {'gdc_type': 'HTSeq - Counts'},
'htseq_fpkm': {'gdc_type': 'HTSeq - FPKM',
'unit': 'fpkm'},
'htseq_fpkm-uq': {'gdc_type': 'HTSeq - FPKM-UQ',
'unit': 'fpkm-uq'},
'htseq_fpkm': {'gdc_type': 'HTSeq - FPKM', 'unit': 'fpkm'},
'htseq_fpkm-uq': {'gdc_type': 'HTSeq - FPKM-UQ', 'unit': 'fpkm-uq'},
'mirna': {'gdc_type': 'miRNA Expression Quantification'},
'mirna_isoform': {'gdc_type': 'Isoform Expression Quantification'},
'cnv': {'gdc_type': 'Copy Number Segment'},
'masked_cnv': {'gdc_type': 'Masked Copy Number Segment'},
'muse_snv': {'gdc_type': 'MuSE Variant Aggregation and Masking'},
'mutect2_snv': {
'gdc_type': 'MuTect2 Variant Aggregation and Masking'
},
'mutect2_snv': {'gdc_type': 'MuTect2 Variant Aggregation and Masking'},
'somaticsniper_snv': {
'gdc_type': 'SomaticSniper Variant Aggregation and Masking'
},
'varscan2_snv': {
'gdc_type': 'VarScan2 Variant Aggregation and Masking'
},
'varscan2_snv': {'gdc_type': 'VarScan2 Variant Aggregation and Masking'},
'methylation27': {'platform_num': '27'},
'methylation450': {'platform_num': '450'}
'methylation450': {'platform_num': '450'},
}
valid_dtype = [
'htseq_counts', 'htseq_fpkm', 'htseq_fpkm-uq', 'mirna',
'masked_cnv', 'muse_snv', 'mutect2_snv',
'somaticsniper_snv', 'varscan2_snv', 'GDC_phenotype',
'survival', 'methylation27', 'methylation450'
'htseq_counts',
'htseq_fpkm',
'htseq_fpkm-uq',
'mirna',
'masked_cnv',
'muse_snv',
'mutect2_snv',
'somaticsniper_snv',
'varscan2_snv',
'GDC_phenotype',
'survival',
'methylation27',
'methylation450',
]
Loading

0 comments on commit 02b9ead

Please sign in to comment.