Skip to content

Commit

Permalink
REF: remove result_index attribute from describe_agg (#626)
Browse files Browse the repository at this point in the history
* remove result_index

* intensity tests
  • Loading branch information
u3ks committed Jun 20, 2024
1 parent 81ba419 commit c3c5d5e
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 67 deletions.
23 changes: 3 additions & 20 deletions momepy/functional/_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
def describe_agg(
y: NDArray[np.float64] | Series,
aggregation_key: NDArray[np.float64] | Series,
result_index: pd.Index | None = None,
q: tuple[float, float] | list[float] | None = None,
statistics: list[str] | None = None,
) -> DataFrame:
Expand All @@ -96,8 +95,6 @@ def describe_agg(
Notes
-----
The index of ``y`` must match the index along which the ``graph`` is
built.
The numba package is used extensively in this function to accelerate the computation
of statistics. Without numba, these computations may become slow on large data.
Expand All @@ -109,10 +106,6 @@ def describe_agg(
aggregation_key : Series | numpy.array
The unique ID that specifies the aggregation
of ``y`` objects to groups.
result_index : pd.Index (default None)
An index that specifies how to order the results.
Use to align the results from the grouping to an external index.
If ``None`` the index from the computations is used.
q : tuple[float, float] | None, optional
Tuple of percentages for the percentiles to compute. Values must be between 0
and 100 inclusive. When set, values below and above the percentiles will be
Expand Down Expand Up @@ -188,21 +181,11 @@ def describe_agg(

stats = _compute_stats(grouper, to_compute=statistics)

if result_index is None:
result_index = stats.index

# post processing to have the same behaviour as describe_reached_agg
result = pd.DataFrame(
np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
)
result.loc[stats.index.values] = stats.values
result.columns = stats.columns
# fill only counts with zeros, other stats are NA
if "count" in result.columns:
result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
result.index.names = result_index.names
if "count" in stats.columns:
stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)

return result
return stats


def describe_reached_agg(
Expand Down
57 changes: 23 additions & 34 deletions momepy/functional/tests/test_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,14 +412,9 @@ def test_describe_agg(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
)

df_noindex = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
)

result_index = self.df_buildings["nID"].value_counts().sort_index()
# not testing std, there are different implementations:
# OO momepy uses ddof=0, functional momepy - ddof=1
expected_area_sum = {
Expand All @@ -435,17 +430,14 @@ def test_describe_agg(self):
"mean": 746.7028417890866,
}
expected_area_count = {
"min": 0,
"min": 1,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
"count": 22,
"mean": 6.545454545454546,
}
assert_result(df["count"], expected_area_count, self.df_streets)
assert_result(df["sum"], expected_area_sum, self.df_streets)
assert_result(df["mean"], expected_area_mean, self.df_streets)

assert df_noindex.shape[0] == 22
assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
assert_result(df["count"], expected_area_count, result_index, check_names=False)
assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
assert_result(df["mean"], expected_area_mean, result_index, check_names=False)

filtered_counts = mm.describe_agg(
self.df_buildings["area"],
Expand All @@ -459,12 +451,16 @@ def test_describe_agg(self):
"count": 22,
"mean": 4.727272,
}
assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
assert_result(
filtered_counts,
expected_filtered_area_count,
result_index,
check_names=False,
)

df = mm.describe_agg(
self.df_buildings["fl_area"].values,
self.df_buildings["nID"],
self.df_streets.index,
)

expected_fl_area_sum = {
Expand All @@ -479,15 +475,10 @@ def test_describe_agg(self):
"count": 22,
"mean": 3995.8307750062318,
}
expected_fl_area_count = {
"min": 0,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
}
assert_result(df["count"], expected_fl_area_count, self.df_streets)
assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
assert_result(df["mean"], expected_fl_area_mean, self.df_streets)

assert_result(df["count"], expected_area_count, result_index)
assert_result(df["sum"], expected_fl_area_sum, result_index)
assert_result(df["mean"], expected_fl_area_mean, result_index)

@pytest.mark.skipif(
not PD_210, reason="aggregation is different in previous pandas versions"
Expand All @@ -496,7 +487,6 @@ def test_describe_cols(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
statistics=["min", "max"],
)
assert list(df.columns) == ["min", "max"]
Expand Down Expand Up @@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
)
def test_describe_reached_input_equality(self):
island_result_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
self.df_buildings["area"], self.df_buildings["nID"]
)

island_result_ndarray = mm.describe_agg(
self.df_buildings["area"].values,
self.df_buildings["nID"].values,
self.df_streets.index,
)

assert np.allclose(
Expand Down Expand Up @@ -574,11 +563,10 @@ def test_na_results(self):
pandas_agg_vals = mm.describe_agg(
nan_areas,
self.df_buildings["nID"],
self.df_streets.index,
)

numba_agg_vals = mm.describe_agg(
nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
nan_areas, self.df_buildings["nID"], q=(0, 100)
)

assert_frame_equal(pandas_agg_vals, numba_agg_vals)
Expand Down Expand Up @@ -849,24 +837,25 @@ def _distance_decay_weights(group):
not PD_210, reason="aggregation is different in previous pandas versions"
)
def test_describe_reached_equality(self):
new_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
)
new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])

new_count = new_df["count"]
old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
old_count = old_count[old_count > 0]
assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)

new_area = new_df["sum"]
old_area = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
).series
old_area = old_area[old_area.notna()]
assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)

new_area_mean = new_df["mean"]
old_area_mean = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
).series
old_area_mean = old_area_mean[old_area_mean.notna()]
assert_series_equal(
new_area_mean, old_area_mean, check_names=False, check_dtype=False
)
Expand Down
64 changes: 51 additions & 13 deletions momepy/functional/tests/test_intensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,8 @@ def test_node_density(self):
not PD_210, reason="aggregation is different in previous pandas versions"
)
def test_area_ratio(self):
## change to describe_agg when merged

def area_ratio(overlay, covering, agg_key):
res = mm.describe_agg(covering, agg_key, overlay.index)
res = mm.describe_agg(covering, agg_key)
return res["sum"] / overlay

car_block = area_ratio(
Expand All @@ -103,7 +101,9 @@ def area_ratio(overlay, covering, agg_key):
"count": 8,
}

assert_result(car_block, car_block_expected, self.blocks)
assert_result(
car_block, car_block_expected, self.blocks, exact=False, check_names=False
)

car = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -122,8 +122,16 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.029097983413141276,
"count": 144,
}
assert_result(car, car_expected, self.df_tessellation)
assert_result(car2, car_expected, self.df_tessellation.set_index("uID"))
assert_result(
car, car_expected, self.df_tessellation, exact=False, check_names=False
)
assert_result(
car2,
car_expected,
self.df_tessellation.set_index("uID"),
exact=False,
check_names=False,
)

car_sel = area_ratio(
self.df_tessellation.iloc[10:20]["area"],
Expand All @@ -136,7 +144,13 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.22057633949526625,
"count": 10,
}
assert_result(car_sel, car_sel_expected, self.df_tessellation.iloc[10:20])
assert_result(
car_sel,
car_sel_expected,
self.df_tessellation.iloc[10:20],
exact=False,
check_names=False,
)

far = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -149,7 +163,9 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.26188185071827147,
"count": 144,
}
assert_result(far, far_expected, self.df_tessellation)
assert_result(
far, far_expected, self.df_tessellation, exact=False, check_names=False
)


class TestIntensityEquality:
Expand Down Expand Up @@ -189,7 +205,7 @@ def test_courtyards(self):
)
def test_area_ratio(self):
def area_ratio(overlay, covering, agg_key):
res = mm.describe_agg(covering, agg_key, overlay.index)
res = mm.describe_agg(covering, agg_key)
return res["sum"] / overlay

self.blocks["area"] = self.blocks.geometry.area
Expand All @@ -202,7 +218,11 @@ def area_ratio(overlay, covering, agg_key):
self.blocks, self.df_buildings, "area", "area", "bID"
).series
assert_series_equal(
car_block_new, car_block_old, check_dtype=False, check_names=False
car_block_new,
car_block_old,
check_dtype=False,
check_names=False,
check_index_type=False,
)

car_new = area_ratio(
Expand All @@ -218,7 +238,13 @@ def area_ratio(overlay, covering, agg_key):
car_old = mm.AreaRatio(
self.df_tessellation, self.df_buildings, "area", "area", "uID"
).series
assert_series_equal(car_new, car_old, check_dtype=False, check_names=False)
assert_series_equal(
car_new,
car_old,
check_dtype=False,
check_names=False,
check_index_type=False,
)
assert_series_equal(
car_old,
car2_new.reset_index(drop=True),
Expand All @@ -236,7 +262,13 @@ def area_ratio(overlay, covering, agg_key):
self.df_tessellation.iloc[10:20]["uID"] - 1,
)

assert_series_equal(car_sel_new, car_sel, check_dtype=False, check_names=False)
assert_series_equal(
car_sel_new,
car_sel,
check_dtype=False,
check_index_type=False,
check_names=False,
)

far_new = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -252,7 +284,13 @@ def area_ratio(overlay, covering, agg_key):
"uID",
).series

assert_series_equal(far_new, far_old, check_dtype=False, check_names=False)
assert_series_equal(
far_new,
far_old,
check_index_type=False,
check_dtype=False,
check_names=False,
)

def test_density(self):
sw = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
Expand Down

0 comments on commit c3c5d5e

Please sign in to comment.