Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pandas 2.1.4 from_contents and from_membership have different Index types #233

Closed
detrout opened this issue Dec 20, 2023 · 1 comment · Fixed by #238
Closed

pandas 2.1.4 from_contents and from_membership have different Index types #233

detrout opened this issue Dec 20, 2023 · 1 comment · Fixed by #238

Comments

@detrout
Copy link

detrout commented Dec 20, 2023

Hello,

while testing against pandas 2.1.4

the test_from_contents_vs_memberships tests fail in assert_frame_equal because the dataframe constructed by from_memberships ends up with an IndexRange(star=0, stop=0, step=1) column while the result of drop([id_column], axis=1) is Index([], dtype='object')

More of the output of the test failure.

../../../upsetplot/tests/test_data.py::test_from_contents_vs_memberships[id-set-None] FAILED                                     [  4%]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> traceback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

data = None, typ = <class 'set'>, id_column = 'id'

    @pytest.mark.parametrize('data', [None,
                                      {'attr1': [3, 4, 5, 6, 7, 8],
                                       'attr2': list('qrstuv')}])
    @pytest.mark.parametrize('typ', [set, list, tuple, iter])
    @pytest.mark.parametrize('id_column', ['id', 'blah'])
    def test_from_contents_vs_memberships(data, typ, id_column):
        contents = OrderedDict([('cat1', typ(['aa', 'bb', 'cc'])),
                                ('cat2', typ(['cc', 'dd'])),
                                ('cat3', typ(['ee']))])
        # Note that ff is not present in contents
        data_df = pd.DataFrame(data,
                               index=['aa', 'bb', 'cc', 'dd', 'ee', 'ff'])
        baseline = from_contents(contents, data=data_df,
                                 id_column=id_column)
        # compare from_contents to from_memberships
        expected = from_memberships(memberships=[{'cat1'},
                                                 {'cat1'},
                                                 {'cat1', 'cat2'},
                                                 {'cat2'},
                                                 {'cat3'},
                                                 []],
                                    data=data_df)
        assert_series_equal(baseline[id_column].reset_index(drop=True),
                            pd.Series(['aa', 'bb', 'cc', 'dd', 'ee', 'ff'],
                                      name=id_column))
>       assert_frame_equal(baseline.drop([id_column], axis=1), expected, check_column_type='equiv')

baseline   =                    id
cat1  cat2  cat3     
True  False False  aa
            False  bb
      True  False  cc
False True  False  dd
      False True   ee
            False  ff
contents   = OrderedDict([('cat1', {'bb', 'cc', 'aa'}),
             ('cat2', {'dd', 'cc'}),
             ('cat3', {'ee'})])
data       = None
data_df    = Empty DataFrame
Columns: []
Index: [aa, bb, cc, dd, ee, ff]
expected   = Empty DataFrame
Columns: []
Index: [(True, False, False), (True, False, False), (True, True, False), (False, True, False), (False, False, True), (False, False, False)]
id_column  = 'id'
typ        = <class 'set'>

../../../upsetplot/tests/test_data.py:114: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

left = Index([], dtype='object'), right = RangeIndex(start=0, stop=0, step=1), obj = 'DataFrame.columns'

    def _check_types(left, right, obj: str = "Index") -> None:
        if not exact:
            return
    
        assert_class_equal(left, right, exact=exact, obj=obj)
>       assert_attr_equal("inferred_type", left, right, obj=obj)
E       AssertionError: DataFrame.columns are different
E       
E       Attribute "inferred_type" are different
E       [left]:  empty
E       [right]: integer

check_categorical = True
exact      = 'equiv'
left       = Index([], dtype='object')
obj        = 'DataFrame.columns'
right      = RangeIndex(start=0, stop=0, step=1)

/usr/lib/python3/dist-packages/pandas/_testing/asserters.py:236: AssertionError
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> entering PDB >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

To get upsetplot fully compatible with pandas 2.1 I had make two sets of changes, one was changing the test imports from pandas.util.testing to pandas.testing, and one to fix the above problem was to skip testing the column type with assert_frame_equals(..., check_column_type=False)

There might be a way to get the column index to match but I wasn't able to figure it in a few minutes.

--- a/upsetplot/tests/test_data.py
+++ b/upsetplot/tests/test_data.py
@@ -3,8 +3,8 @@
 import pandas as pd
 import numpy as np
 from packaging.version import parse
-from pandas.util.testing import (assert_series_equal, assert_frame_equal,
-                                 assert_index_equal)
+from pandas.testing import (assert_series_equal, assert_frame_equal,
+                            assert_index_equal)
 from upsetplot import (from_memberships, from_contents, from_indicators,
                        generate_data)
 
@@ -111,7 +111,7 @@
     assert_series_equal(baseline[id_column].reset_index(drop=True),
                         pd.Series(['aa', 'bb', 'cc', 'dd', 'ee', 'ff'],
                                   name=id_column))
-    assert_frame_equal(baseline.drop([id_column], axis=1), expected)
+    assert_frame_equal(baseline.drop([id_column], axis=1), expected, check_column_type=False)
 
 
 def test_from_contents(typ=set, id_column='id'):
--- a/upsetplot/tests/test_reformat.py
+++ b/upsetplot/tests/test_reformat.py
@@ -1,6 +1,6 @@
 import pytest
 import pandas as pd
-from pandas.util.testing import assert_series_equal, assert_frame_equal
+from pandas.testing import assert_series_equal, assert_frame_equal
 
 from upsetplot import generate_counts, generate_samples
 from upsetplot import query
--- a/upsetplot/tests/test_upsetplot.py
+++ b/upsetplot/tests/test_upsetplot.py
@@ -2,7 +2,7 @@
 import itertools
 
 import pytest
-from pandas.util.testing import (
+from pandas.testing import (
     assert_series_equal, assert_frame_equal, assert_index_equal)
 from numpy.testing import assert_array_equal
 import pandas as pd
@jnothman
Copy link
Owner

Thanks for the report

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
2 participants