Lint fixes using black

yunhailuo · Jun 3, 2019 · 02b9ead · 02b9ead
1 parent 76b792c
commit 02b9ead
Show file tree

Hide file tree

Showing 12 changed files with 876 additions and 488 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -3,4 +3,5 @@
 
 # W605: invalid escape sequence '\*'
 # W503: line break before binary operator
+# E203: whitespace before ':'
 ignore = W605, W503
diff --git a/tests/test_gdc.py b/tests/test_gdc.py
@@ -21,20 +21,16 @@ def test_simple_and_filter():
  expected = {
  "content": [
  {"content": {"field": "a", "value": ["b"]}, "op": "in"},
- {"content": {"field": "c", "value": ["d"]}, "op": "exclude"}
+ {"content": {"field": "c", "value": ["d"]}, "op": "exclude"},
  ],
- "op": "and"
+ "op": "and",
  }
  actual = gdc.simple_and_filter(in_dict_2, exclude_dict_2)
  compare_dict(expected, actual)
 
 
 def test_reduce_json_array():
- input_1 = [{
- 'a': 'hello',
- 'b': [1, 2, 3],
- 'c': [10]
- }]
+ input_1 = [{'a': 'hello', 'b': [1, 2, 3], 'c': [10]}]
  input_2 = [{'a': 'b'}]
  actual_1 = gdc.reduce_json_array(input_1)
  expected_1 = {"a": "hello", "b": [1, 2, 3], "c": 10}
@@ -79,7 +75,7 @@ def test_get_project_info():
  "name": "Cystadenocarcinoma",
  "primary_site": "Ovary",
  "program.name": "TCGA",
- "project_id": "TCGA-OV"
+ "project_id": "TCGA-OV",
  }
  actual.equals(expected)
 
@@ -99,12 +95,13 @@ def test_search():
  actual = gdc.search(endpoint=endpoint, in_filter=in_filter, fields=fields)
  expected = {
  "id": "d1a15919-f5e2-5e60-aed9-cb52a8b4a7a1",
- "target": "TARGET-51-PAKWMM"
+ "target": "TARGET-51-PAKWMM",
  }
  actual.equals(expected)
  with pytest.raises(ValueError) as exception_info:
- gdc.search(endpoint=endpoint, in_filter=in_filter, fields=fields,
- method="PUT")
+ gdc.search(
+ endpoint=endpoint, in_filter=in_filter, fields=fields, method="PUT"
+ )
  error_str = 'Invalid method: PUT\n method must be either "GET" or "POST".'
  assert exception_info.value.args[0] == error_str
 
@@ -116,7 +113,8 @@ def test_gdc_check_new(capfd):
  gdc.gdc_check_new(new_file_uuids)
  out, err = capfd.readouterr()
  actual = pd.read_csv(StringIO(out), sep='\t')
- expected = pd.read_csv("tests/fixtures/gdc_check_new_DR9.0_files_swap.csv",
- sep='\t')
+ expected = pd.read_csv(
+ "tests/fixtures/gdc_check_new_DR9.0_files_swap.csv", sep='\t'
+ )
  expected = expected.head()
  actual.equals(expected)
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -21,16 +21,29 @@ def test_xena_eql(self):
  assert parsed.df2 == "df2"
 
  def test_gdc_check_new(self):
- parsed = self.parser.parse_args(["gdc-check-new",
- "https://example.com/data.gz"])
+ parsed = self.parser.parse_args(
+ ["gdc-check-new", "https://example.com/data.gz"]
+ )
  assert parsed.subcomm == "gdc-check-new"
  assert parsed.url == "https://example.com/data.gz"
 
  def test_merge_xena(self):
- parsed = self.parser.parse_args(["merge-xena", "-f", "path/to/matrix1",
- "path/to/matrix2", "-t", "datatype",
- "-o", "path/to/dir", "-n", "new_name",
- "-c", "cohort_name"])
+ parsed = self.parser.parse_args(
+ [
+ "merge-xena",
+ "-f",
+ "path/to/matrix1",
+ "path/to/matrix2",
+ "-t",
+ "datatype",
+ "-o",
+ "path/to/dir",
+ "-n",
+ "new_name",
+ "-c",
+ "cohort_name",
+ ]
+ )
  assert parsed.subcomm == "merge-xena"
  assert parsed.files == ["path/to/matrix1", "path/to/matrix2"]
  assert parsed.datatype == "datatype"
@@ -39,46 +52,52 @@ def test_merge_xena(self):
  assert parsed.cohort == "cohort_name"
 
  def test_etl(self):
- parsed = self.parser.parse_args([
- "etl",
- "-r",
- "path/to/dir",
- "-p",
- "project_name",
- "-t",
- "datatype",
- ])
+ parsed = self.parser.parse_args(
+ [
+ "etl",
+ "-r",
+ "path/to/dir",
+ "-p",
+ "project_name",
+ "-t",
+ "datatype",
+ ]
+ )
  assert parsed.subcomm == "etl"
  assert parsed.root == "path/to/dir"
  assert parsed.projects == ["project_name"]
  assert parsed.datatype == ["datatype"]
  # for mutually exclusive groups
- parsed = self.parser.parse_args([
- "etl",
- "-r",
- "path/to/dir",
- "-P",
- "not_this_project_name",
- "-T",
- "not_this_datatype",
- ])
+ parsed = self.parser.parse_args(
+ [
+ "etl",
+ "-r",
+ "path/to/dir",
+ "-P",
+ "not_this_project_name",
+ "-T",
+ "not_this_datatype",
+ ]
+ )
  assert parsed.subcomm == "etl"
  assert parsed.root == "path/to/dir"
  assert parsed.not_projects == ["not_this_project_name"]
  assert parsed.not_datatype == ["not_this_datatype"]
 
  def test_metaparser(self):
- parsed = self.parser.parse_args([
- "metadata",
- "-p",
- "project_name",
- "-t",
- "datatype",
- "-m",
- "path/to/matrix",
- "-r",
- "10",
- ])
+ parsed = self.parser.parse_args(
+ [
+ "metadata",
+ "-p",
+ "project_name",
+ "-t",
+ "datatype",
+ "-m",
+ "path/to/matrix",
+ "-r",
+ "10",
+ ]
+ )
  assert parsed.subcomm == "metadata"
  assert parsed.project == "project_name"
  assert parsed.datatype == "datatype"

diff --git a/tests/utils.py b/tests/utils.py
@@ -17,5 +17,6 @@ def compare_dict(dict_1, dict_2):
  >>> compare_dict({'a': 'b'}, {'a': 'b'})
  True
  """
- return json.dumps(dict_1, sort_keys=True) == json.dumps(dict_2,
- sort_keys=True)
+ return json.dumps(dict_1, sort_keys=True) == json.dumps(
+ dict_2, sort_keys=True
+ )
diff --git a/xena_gdc_etl/constants.py b/xena_gdc_etl/constants.py
@@ -1,4 +1,6 @@
-GDC_RELEASE_URL = 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/' # noqa: E501
+GDC_RELEASE_URL = (
+ 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/'
+) # noqa: E501
 
 # Map GDC project_id to Xena specific cohort name.
 GDC_XENA_COHORT = {
@@ -34,7 +36,7 @@
  'TCGA-KICH': 'GDC TCGA Kidney Chromophobe (KICH)',
  'TCGA-UCS': 'GDC TCGA Uterine Carcinosarcoma (UCS)',
  'TCGA-CHOL': 'GDC TCGA Bile Duct Cancer (CHOL)',
- 'TCGA-DLBC': 'GDC TCGA Large B-cell Lymphoma (DLBC)'
+ 'TCGA-DLBC': 'GDC TCGA Large B-cell Lymphoma (DLBC)',
 }
 
 # Map xena_dtype to corresponding metadata template.
@@ -53,36 +55,39 @@
  'GDC_phenotype': 'template.phenotype.meta.json',
  'survival': 'template.survival.meta.json',
  'methylation27': 'template.methylation.meta.json',
- 'methylation450': 'template.methylation.meta.json'
+ 'methylation450': 'template.methylation.meta.json',
 }
 
 # Jinja2 template variables for corresponding "xena_dtype".
 METADATA_VARIABLES = {
  'htseq_counts': {'gdc_type': 'HTSeq - Counts'},
- 'htseq_fpkm': {'gdc_type': 'HTSeq - FPKM',
- 'unit': 'fpkm'},
- 'htseq_fpkm-uq': {'gdc_type': 'HTSeq - FPKM-UQ',
- 'unit': 'fpkm-uq'},
+ 'htseq_fpkm': {'gdc_type': 'HTSeq - FPKM', 'unit': 'fpkm'},
+ 'htseq_fpkm-uq': {'gdc_type': 'HTSeq - FPKM-UQ', 'unit': 'fpkm-uq'},
  'mirna': {'gdc_type': 'miRNA Expression Quantification'},
  'mirna_isoform': {'gdc_type': 'Isoform Expression Quantification'},
  'cnv': {'gdc_type': 'Copy Number Segment'},
  'masked_cnv': {'gdc_type': 'Masked Copy Number Segment'},
  'muse_snv': {'gdc_type': 'MuSE Variant Aggregation and Masking'},
- 'mutect2_snv': {
- 'gdc_type': 'MuTect2 Variant Aggregation and Masking'
- },
+ 'mutect2_snv': {'gdc_type': 'MuTect2 Variant Aggregation and Masking'},
  'somaticsniper_snv': {
  'gdc_type': 'SomaticSniper Variant Aggregation and Masking'
  },
- 'varscan2_snv': {
- 'gdc_type': 'VarScan2 Variant Aggregation and Masking'
- },
+ 'varscan2_snv': {'gdc_type': 'VarScan2 Variant Aggregation and Masking'},
  'methylation27': {'platform_num': '27'},
- 'methylation450': {'platform_num': '450'}
+ 'methylation450': {'platform_num': '450'},
 }
 valid_dtype = [
- 'htseq_counts', 'htseq_fpkm', 'htseq_fpkm-uq', 'mirna',
- 'masked_cnv', 'muse_snv', 'mutect2_snv',
- 'somaticsniper_snv', 'varscan2_snv', 'GDC_phenotype',
- 'survival', 'methylation27', 'methylation450'
+ 'htseq_counts',
+ 'htseq_fpkm',
+ 'htseq_fpkm-uq',
+ 'mirna',
+ 'masked_cnv',
+ 'muse_snv',
+ 'mutect2_snv',
+ 'somaticsniper_snv',
+ 'varscan2_snv',
+ 'GDC_phenotype',
+ 'survival',
+ 'methylation27',
+ 'methylation450',
 ]