REF: remove result_index attribute from describe_agg (#626)

* remove result_index * intensity tests
pysal · Jun 20, 2024 · c3c5d5e · c3c5d5e
1 parent 81ba419
commit c3c5d5e
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 67 deletions.
diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py
@@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
 def describe_agg(
  y: NDArray[np.float64] | Series,
  aggregation_key: NDArray[np.float64] | Series,
- result_index: pd.Index | None = None,
  q: tuple[float, float] | list[float] | None = None,
  statistics: list[str] | None = None,
 ) -> DataFrame:
@@ -96,8 +95,6 @@ def describe_agg(
 
  Notes
  -----
- The index of ``y`` must match the index along which the ``graph`` is
- built.
 
  The numba package is used extensively in this function to accelerate the computation
  of statistics. Without numba, these computations may become slow on large data.
@@ -109,10 +106,6 @@ def describe_agg(
  aggregation_key : Series | numpy.array
  The unique ID that specifies the aggregation
  of ``y`` objects to groups.
- result_index : pd.Index (default None)
- An index that specifies how to order the results.
- Use to align the results from the grouping to an external index.
- If ``None`` the index from the computations is used.
  q : tuple[float, float] | None, optional
  Tuple of percentages for the percentiles to compute. Values must be between 0
  and 100 inclusive. When set, values below and above the percentiles will be
@@ -188,21 +181,11 @@ def describe_agg(
 
  stats = _compute_stats(grouper, to_compute=statistics)
 
- if result_index is None:
- result_index = stats.index
-
- # post processing to have the same behaviour as describe_reached_agg
- result = pd.DataFrame(
- np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
- )
- result.loc[stats.index.values] = stats.values
- result.columns = stats.columns
  # fill only counts with zeros, other stats are NA
- if "count" in result.columns:
- result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
- result.index.names = result_index.names
+ if "count" in stats.columns:
+ stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)
 
- return result
+ return stats
 
 
 def describe_reached_agg(

diff --git a/momepy/functional/tests/test_diversity.py b/momepy/functional/tests/test_diversity.py
@@ -412,14 +412,9 @@ def test_describe_agg(self):
  df = mm.describe_agg(
  self.df_buildings["area"],
  self.df_buildings["nID"],
- self.df_streets.index,
- )
-
- df_noindex = mm.describe_agg(
- self.df_buildings["area"],
- self.df_buildings["nID"],
  )
 
+ result_index = self.df_buildings["nID"].value_counts().sort_index()
  # not testing std, there are different implementations:
  # OO momepy uses ddof=0, functional momepy - ddof=1
  expected_area_sum = {
@@ -435,17 +430,14 @@ def test_describe_agg(self):
  "mean": 746.7028417890866,
  }
  expected_area_count = {
- "min": 0,
+ "min": 1,
  "max": 18,
- "count": 35,
- "mean": 4.114285714285714,
+ "count": 22,
+ "mean": 6.545454545454546,
  }
- assert_result(df["count"], expected_area_count, self.df_streets)
- assert_result(df["sum"], expected_area_sum, self.df_streets)
- assert_result(df["mean"], expected_area_mean, self.df_streets)
-
- assert df_noindex.shape[0] == 22
- assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
+ assert_result(df["count"], expected_area_count, result_index, check_names=False)
+ assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
+ assert_result(df["mean"], expected_area_mean, result_index, check_names=False)
 
  filtered_counts = mm.describe_agg(
  self.df_buildings["area"],
@@ -459,12 +451,16 @@ def test_describe_agg(self):
  "count": 22,
  "mean": 4.727272,
  }
- assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
+ assert_result(
+ filtered_counts,
+ expected_filtered_area_count,
+ result_index,
+ check_names=False,
+ )
 
  df = mm.describe_agg(
  self.df_buildings["fl_area"].values,
  self.df_buildings["nID"],
- self.df_streets.index,
  )
 
  expected_fl_area_sum = {
@@ -479,15 +475,10 @@ def test_describe_agg(self):
  "count": 22,
  "mean": 3995.8307750062318,
  }
- expected_fl_area_count = {
- "min": 0,
- "max": 18,
- "count": 35,
- "mean": 4.114285714285714,
- }
- assert_result(df["count"], expected_fl_area_count, self.df_streets)
- assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
- assert_result(df["mean"], expected_fl_area_mean, self.df_streets)
+
+ assert_result(df["count"], expected_area_count, result_index)
+ assert_result(df["sum"], expected_fl_area_sum, result_index)
+ assert_result(df["mean"], expected_fl_area_mean, result_index)
 
  @pytest.mark.skipif(
  not PD_210, reason="aggregation is different in previous pandas versions"
@@ -496,7 +487,6 @@ def test_describe_cols(self):
  df = mm.describe_agg(
  self.df_buildings["area"],
  self.df_buildings["nID"],
- self.df_streets.index,
  statistics=["min", "max"],
  )
  assert list(df.columns) == ["min", "max"]
@@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
  )
  def test_describe_reached_input_equality(self):
  island_result_df = mm.describe_agg(
- self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
+ self.df_buildings["area"], self.df_buildings["nID"]
  )
 
  island_result_ndarray = mm.describe_agg(
  self.df_buildings["area"].values,
  self.df_buildings["nID"].values,
- self.df_streets.index,
  )
 
  assert np.allclose(
@@ -574,11 +563,10 @@ def test_na_results(self):
  pandas_agg_vals = mm.describe_agg(
  nan_areas,
  self.df_buildings["nID"],
- self.df_streets.index,
  )
 
  numba_agg_vals = mm.describe_agg(
- nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
+ nan_areas, self.df_buildings["nID"], q=(0, 100)
  )
 
  assert_frame_equal(pandas_agg_vals, numba_agg_vals)
@@ -849,24 +837,25 @@ def _distance_decay_weights(group):
  not PD_210, reason="aggregation is different in previous pandas versions"
  )
  def test_describe_reached_equality(self):
- new_df = mm.describe_agg(
- self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
- )
+ new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])
 
  new_count = new_df["count"]
  old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
+ old_count = old_count[old_count > 0]
  assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)
 
  new_area = new_df["sum"]
  old_area = mm.Reached(
  self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
  ).series
+ old_area = old_area[old_area.notna()]
  assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)
 
  new_area_mean = new_df["mean"]
  old_area_mean = mm.Reached(
  self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
  ).series
+ old_area_mean = old_area_mean[old_area_mean.notna()]
  assert_series_equal(
  new_area_mean, old_area_mean, check_names=False, check_dtype=False
  )

diff --git a/momepy/functional/tests/test_intensity.py b/momepy/functional/tests/test_intensity.py
@@ -85,10 +85,8 @@ def test_node_density(self):
  not PD_210, reason="aggregation is different in previous pandas versions"
  )
  def test_area_ratio(self):
- ## change to describe_agg when merged
-
  def area_ratio(overlay, covering, agg_key):
- res = mm.describe_agg(covering, agg_key, overlay.index)
+ res = mm.describe_agg(covering, agg_key)
  return res["sum"] / overlay
 
  car_block = area_ratio(
@@ -103,7 +101,9 @@ def area_ratio(overlay, covering, agg_key):
  "count": 8,
  }
 
- assert_result(car_block, car_block_expected, self.blocks)
+ assert_result(
+ car_block, car_block_expected, self.blocks, exact=False, check_names=False
+ )
 
  car = area_ratio(
  self.df_tessellation.geometry.area,
@@ -122,8 +122,16 @@ def area_ratio(overlay, covering, agg_key):
  "min": 0.029097983413141276,
  "count": 144,
  }
- assert_result(car, car_expected, self.df_tessellation)
- assert_result(car2, car_expected, self.df_tessellation.set_index("uID"))
+ assert_result(
+ car, car_expected, self.df_tessellation, exact=False, check_names=False
+ )
+ assert_result(
+ car2,
+ car_expected,
+ self.df_tessellation.set_index("uID"),
+ exact=False,
+ check_names=False,
+ )
 
  car_sel = area_ratio(
  self.df_tessellation.iloc[10:20]["area"],
@@ -136,7 +144,13 @@ def area_ratio(overlay, covering, agg_key):
  "min": 0.22057633949526625,
  "count": 10,
  }
- assert_result(car_sel, car_sel_expected, self.df_tessellation.iloc[10:20])
+ assert_result(
+ car_sel,
+ car_sel_expected,
+ self.df_tessellation.iloc[10:20],
+ exact=False,
+ check_names=False,
+ )
 
  far = area_ratio(
  self.df_tessellation.geometry.area,
@@ -149,7 +163,9 @@ def area_ratio(overlay, covering, agg_key):
  "min": 0.26188185071827147,
  "count": 144,
  }
- assert_result(far, far_expected, self.df_tessellation)
+ assert_result(
+ far, far_expected, self.df_tessellation, exact=False, check_names=False
+ )
 
 
 class TestIntensityEquality:
@@ -189,7 +205,7 @@ def test_courtyards(self):
  )
  def test_area_ratio(self):
  def area_ratio(overlay, covering, agg_key):
- res = mm.describe_agg(covering, agg_key, overlay.index)
+ res = mm.describe_agg(covering, agg_key)
  return res["sum"] / overlay
 
  self.blocks["area"] = self.blocks.geometry.area
@@ -202,7 +218,11 @@ def area_ratio(overlay, covering, agg_key):
  self.blocks, self.df_buildings, "area", "area", "bID"
  ).series
  assert_series_equal(
- car_block_new, car_block_old, check_dtype=False, check_names=False
+ car_block_new,
+ car_block_old,
+ check_dtype=False,
+ check_names=False,
+ check_index_type=False,
  )
 
  car_new = area_ratio(
@@ -218,7 +238,13 @@ def area_ratio(overlay, covering, agg_key):
  car_old = mm.AreaRatio(
  self.df_tessellation, self.df_buildings, "area", "area", "uID"
  ).series
- assert_series_equal(car_new, car_old, check_dtype=False, check_names=False)
+ assert_series_equal(
+ car_new,
+ car_old,
+ check_dtype=False,
+ check_names=False,
+ check_index_type=False,
+ )
  assert_series_equal(
  car_old,
  car2_new.reset_index(drop=True),
@@ -236,7 +262,13 @@ def area_ratio(overlay, covering, agg_key):
  self.df_tessellation.iloc[10:20]["uID"] - 1,
  )
 
- assert_series_equal(car_sel_new, car_sel, check_dtype=False, check_names=False)
+ assert_series_equal(
+ car_sel_new,
+ car_sel,
+ check_dtype=False,
+ check_index_type=False,
+ check_names=False,
+ )
 
  far_new = area_ratio(
  self.df_tessellation.geometry.area,
@@ -252,7 +284,13 @@ def area_ratio(overlay, covering, agg_key):
  "uID",
  ).series
 
- assert_series_equal(far_new, far_old, check_dtype=False, check_names=False)
+ assert_series_equal(
+ far_new,
+ far_old,
+ check_index_type=False,
+ check_dtype=False,
+ check_names=False,
+ )
 
  def test_density(self):
  sw = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")