try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO
import os

import pandas as pd
import pytest

from xena_gdc_etl import gdc
from tests.utils import compare_dict


def test_simple_and_filter():
    in_dict_1 = {}
    exclude_dict_1 = {}
    output_1 = gdc.simple_and_filter(in_dict_1, exclude_dict_1)
    assert output_1 == in_dict_1
    in_dict_2 = {'a': 'b'}
    exclude_dict_2 = {'c': 'd'}
    expected = {
        "content": [
            {"content": {"field": "a", "value": ["b"]}, "op": "in"},
            {"content": {"field": "c", "value": ["d"]}, "op": "exclude"},
        ],
        "op": "and",
    }
    actual = gdc.simple_and_filter(in_dict_2, exclude_dict_2)
    compare_dict(expected, actual)


def test_reduce_json_array():
    input_1 = [{'a': 'hello', 'b': [1, 2, 3], 'c': [10]}]
    input_2 = [{'a': 'b'}]
    actual_1 = gdc.reduce_json_array(input_1)
    expected_1 = {"a": "hello", "b": [1, 2, 3], "c": 10}
    assert actual_1 == expected_1
    actual_2 = gdc.reduce_json_array(input_2)
    expected_2 = {'a': 'b'}
    compare_dict(actual_2, expected_2)


def test_get_ext():
    input_1 = "txt.vcf.xls"
    actual_1 = gdc.get_ext(input_1)
    expected_1 = "txt.vcf.xls"
    assert actual_1 == expected_1
    input_2 = "abc.xyz.pqr"
    expected_2 = "pqr"
    actual_2 = gdc.get_ext(input_2)
    assert actual_2 == expected_2
    input_3 = "name.txt.vcf.xls"
    actual_3 = gdc.get_ext(input_3)
    expected_3 = "txt.vcf.xls"
    assert actual_3 == expected_3


@pytest.mark.CI
def test_download():
    uuid = "53a637ce-8aaf-4cec-b02d-89202bbb0890"
    gdc.download(uuid, download_dir="./tests")
    file_path = "./tests/" + uuid + ".svs"
    assert os.path.isfile(file_path) is True
    os.unlink(file_path)


@pytest.mark.CI
def test_get_project_info():
    project_name = "TCGA-THCA"
    assert 'TCGA-BRCA' in gdc.get_project_info().index
    assert gdc.get_project_info(['TCGA-BRCA']).index.tolist() == ['TCGA-BRCA']
    actual = gdc.get_project_info([project_name]).head()
    expected = {
        "id": ["TCGA-OV", "Ovarian", "Serous"],
        "name": "Cystadenocarcinoma",
        "primary_site": "Ovary",
        "program.name": "TCGA",
        "project_id": "TCGA-OV",
    }
    actual.equals(expected)


@pytest.mark.CI
def test_get_samples_clinical():
    project_id = "TCGA-OV"
    actual = gdc.get_samples_clinical(project_id)
    assert "2038fd65-d8f1-4b16-af90-b1c8f9a379a7" == actual['case_id'][0]


@pytest.mark.CI
def test_search():
    endpoint = "cases"
    in_filter = {"project.project_id": "TARGET-CCSK"}
    fields = ["submitter_id"]
    actual = gdc.search(endpoint=endpoint, in_filter=in_filter, fields=fields)
    expected = {
        "id": "d1a15919-f5e2-5e60-aed9-cb52a8b4a7a1",
        "target": "TARGET-51-PAKWMM",
    }
    actual.equals(expected)
    with pytest.raises(ValueError) as exception_info:
        gdc.search(
            endpoint=endpoint, in_filter=in_filter, fields=fields, method="PUT"
        )
    error_str = 'Invalid method: PUT\n method must be either "GET" or "POST".'
    assert exception_info.value.args[0] == error_str


@pytest.mark.CI
def test_gdc_check_new(capfd):
    url = "https://docs.gdc.cancer.gov/Data/Release_Notes/DR9.0_files_swap.txt.gz"  # noqa
    new_file_uuids = pd.read_csv(url, sep='\t')['New File UUID'].tolist()
    gdc.gdc_check_new(new_file_uuids)
    out, err = capfd.readouterr()
    actual = pd.read_csv(StringIO(out), sep='\t')
    expected = pd.read_csv(
        "tests/fixtures/gdc_check_new_DR9.0_files_swap.csv", sep='\t'
    )
    expected = expected.head()
    actual.equals(expected)