Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lint fixes #37

Merged
merged 4 commits into from
Jun 3, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Lint fixes
  • Loading branch information
ayan-b committed Jun 3, 2019
commit 03c6eceaab956974eaedb9bb63310589b3b75ef3
8 changes: 3 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ script:
jobs:
include:
- stage: lint
script:
- pip install flake8
- flake8
allow_failures:
- stage: lint
before_install: false
install: pip install flake8
script: flake8

notifications:
email: false
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
# W605: invalid escape sequence '\*'
# E121: continuation line under-indented for hanging indent
# E126: continuation line over-indented for hanging indent
ignore = E722, W605, E121, E126
# W503: line break before binary operator
ignore = E722, W605, E121, E126, W503
ayan-b marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 3 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,7 @@ def test_version(self):
with pytest.raises(SystemExit):
self.parser.parse_args(['--version'])
out, _ = self.capfd.readouterr()
__version__ = pkg_resources.get_distribution("xena_gdc_etl").version
__version__ = pkg_resources.get_distribution(
"xena_gdc_etl"
).version
assert out == "xge " + __version__ + "\n"
2 changes: 2 additions & 0 deletions xena_gdc_etl/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
GDC_RELEASE_URL = 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/' # noqa: E501

# Map GDC project_id to Xena specific cohort name.
GDC_XENA_COHORT = {
'TCGA-BRCA': 'GDC TCGA Breast Cancer (BRCA)',
Expand Down
7 changes: 4 additions & 3 deletions xena_gdc_etl/gdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,9 +306,10 @@ def download(uuids, download_dir='.', chunk_size=4096):
for chunk in response.iter_content(chunk_size):
f.write(chunk)
downloaded = downloaded + chunk_size
print(status.format(count, total, path,
min(1, downloaded / file_size)),
end='')
print(status.format(
count, total, path, min(1, downloaded / file_size)),
end=''
)
sys.stdout.flush()
download_list.append(path)
else:
Expand Down
6 changes: 3 additions & 3 deletions xena_gdc_etl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .utils import handle_merge_xena
from .gdc import gdc_check_new, get_project_info
from .constants import valid_dtype
from .constants import valid_dtype, GDC_RELEASE_URL
from .xena_dataset import GDCOmicset, GDCPhenoset, GDCSurvivalset
from .gdc2xena import gdc2xena

Expand Down Expand Up @@ -84,8 +84,8 @@ def main():
dataset = GDCOmicset(options.project, options.datatype, root_dir)
dataset.matrix = options.matrix
dataset.gdc_release = (
'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#data-release-' + # noqa
str(options.release).replace('.', '')
GDC_RELEASE_URL + '#data-release-'
+ str(options.release).replace('.', '')
)
dataset.metadata()

Expand Down
6 changes: 3 additions & 3 deletions xena_gdc_etl/scripts/panTCGA.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import jinja2
import pandas as pd

from ..constants import METADATA_TEMPLATE, METADATA_VARIABLES # noqa
from ..constants import METADATA_TEMPLATE, METADATA_VARIABLES, GDC_RELEASE_URL


def metadata(matrix, xena_dtypes):
Expand Down Expand Up @@ -59,7 +59,7 @@ def metadata(matrix, xena_dtypes):
variables = {
'project_id': 'GDC-PANCAN',
'date': matrix_date,
'gdc_release': 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#data-release-90', # noqa
'gdc_release': GDC_RELEASE_URL + '#data-release-90',
'xena_cohort': 'GDC Pan-Cancer (PANCAN)'
}
try:
Expand All @@ -78,7 +78,7 @@ def main():
datatypes = ['htseq_counts', 'htseq_fpkm', 'htseq_fpkm-uq', 'mirna',
'masked_cnv', 'muse_snv', 'mutect2_snv', 'somaticsniper_snv',
'varscan2_snv', 'survival']
gdc_release = 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#data-release-100'
gdc_release = GDC_RELEASE_URL + '#data-release-100'
meta_templates_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'Resources')
Expand Down
9 changes: 7 additions & 2 deletions xena_gdc_etl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
import pandas as pd
import jinja2

from .constants import METADATA_TEMPLATE, METADATA_VARIABLES, valid_dtype
from .constants import (
METADATA_TEMPLATE,
METADATA_VARIABLES,
valid_dtype,
GDC_RELEASE_URL,
)


def mkdir_p(dir_name):
Expand Down Expand Up @@ -78,7 +83,7 @@ def merge(filelist, xena_dtypes, out_matrix):
k: os.path.join(meta_templates_dir, v)
for k, v in METADATA_TEMPLATE.items()
}
gdc_release = 'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#data-release-90' # noqa
gdc_release = GDC_RELEASE_URL + '#data-release-90'
start_time = timeit.default_timer()
if xena_dtypes in ['htseq_counts', 'htseq_fpkm', 'htseq_fpkm-uq',
'mirna', 'methylation27']:
Expand Down
50 changes: 29 additions & 21 deletions xena_gdc_etl/xena_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@

from xena_gdc_etl import gdc
from .utils import mkdir_p
from .constants import GDC_XENA_COHORT, METADATA_TEMPLATE, METADATA_VARIABLES
from .constants import (
GDC_XENA_COHORT,
METADATA_TEMPLATE,
METADATA_VARIABLES,
GDC_RELEASE_URL,
)


def read_by_ext(filename, mode='r'):
Expand Down Expand Up @@ -706,7 +711,8 @@ class GDCOmicset(XenaDataset):
a string or a list of strings. It can be automatically derived
from ``projects`` and ``xena_dtype`` if it is not assigned
explicitly by the user when being used. Please check `GDC API
documentation <https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query>`_
documentation
<https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query>`_
for details.
gdc_prefix (str): A GDC available file field whost value will be used
in the filename of corresponding download file. It will be used by
Expand Down Expand Up @@ -779,19 +785,23 @@ class GDCOmicset(XenaDataset):
'60', '61', '99'])},
'muse_snv': {
'data_type': 'Masked Somatic Mutation',
'analysis.workflow_type': 'MuSE Variant Aggregation and Masking'
'analysis.workflow_type':
'MuSE Variant Aggregation and Masking'
},
'mutect2_snv': {
'data_type': 'Masked Somatic Mutation',
'analysis.workflow_type': 'MuTect2 Variant Aggregation and Masking'
'analysis.workflow_type':
'MuTect2 Variant Aggregation and Masking'
},
'somaticsniper_snv': {
'data_type': 'Masked Somatic Mutation',
'analysis.workflow_type': 'SomaticSniper Variant Aggregation and Masking'
'analysis.workflow_type':
'SomaticSniper Variant Aggregation and Masking'
},
'varscan2_snv': {
'data_type': 'Masked Somatic Mutation',
'analysis.workflow_type': 'VarScan2 Variant Aggregation and Masking'
'analysis.workflow_type':
'VarScan2 Variant Aggregation and Masking'
},
'methylation27': {'data_type': 'Methylation Beta Value',
'platform': 'Illumina Human Methylation 27'},
Expand Down Expand Up @@ -870,10 +880,7 @@ def gdc_release(self):
anchor = re.match(
r'(Data Release [^\s]+)\s', data_release
).group(1).replace(' ', '-').replace('.', '').lower()
self.__gdc_release = (
'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#'
+ anchor
)
self.__gdc_release = GDC_RELEASE_URL + '#' + anchor
return self.__gdc_release

@gdc_release.setter
Expand Down Expand Up @@ -937,7 +944,10 @@ def download_map(self):
str(self.projects)))
return None
file_dict = {
'{}/data/{}'.format(gdc.GDC_API_BASE, uuid): os.path.join(self.raw_data_dir, name)
'{}/data/{}'.format(
gdc.GDC_API_BASE,
uuid
): os.path.join(self.raw_data_dir, name)
for uuid, name in file_dict.items()
}
self._download_map = file_dict
Expand Down Expand Up @@ -1048,7 +1058,8 @@ class GDCPhenoset(XenaDataset):
a string or a list of strings. It can be automatically derived
from ``projects`` and ``xena_dtype`` if it is not assigned
explicitly by the user when being used. Please check `GDC API
documentation <https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query>`_
documentation
<https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query>`_
for details.
download_map (dict): A dict with the key being a URL for one raw data
to be downloaded and the value being a path for saving downloaded
Expand Down Expand Up @@ -1199,10 +1210,7 @@ def gdc_release(self):
anchor = re.match(
r'(Data Release [^\s]+)\s', data_release
).group(1).replace(' ', '-').replace('.', '').lower()
self.__gdc_release = (
'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#'
+ anchor
)
self.__gdc_release = GDC_RELEASE_URL + '#' + anchor
return self.__gdc_release

@gdc_release.setter
Expand Down Expand Up @@ -1252,7 +1260,10 @@ def download_map(self):
str(self.projects)))
return None
file_dict = {
'{}/data/{}'.format(gdc.GDC_API_BASE, uuid): os.path.join(self.raw_data_dir, name)
'{}/data/{}'.format(
gdc.GDC_API_BASE,
uuid
): os.path.join(self.raw_data_dir, name)
for uuid, name in file_dict.items()
}
self._download_map = file_dict
Expand Down Expand Up @@ -1499,10 +1510,7 @@ def gdc_release(self):
anchor = re.match(
r'(Data Release [^\s]+)\s', data_release
).group(1).replace(' ', '-').replace('.', '').lower()
self.__gdc_release = (
'https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#'
+ anchor
)
self.__gdc_release = GDC_RELEASE_URL + '#' + anchor
return self.__gdc_release

@gdc_release.setter
Expand Down