diff --git a/CHANGES.rst b/CHANGES.rst index fb862bdac..f96097e31 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,8 @@ Unreleased - change chunking in energy_ratio_by_chunks to use all data points - fix warning for spkt_welch_density - adapt default settings for "value_count" and "range_count" +- added + - maxlag parameter to agg_autocorrelation function Version 0.11.1 ============== diff --git a/docs/api/tests.integrations.rst b/docs/api/tests.integrations.rst deleted file mode 100644 index 810ed13ee..000000000 --- a/docs/api/tests.integrations.rst +++ /dev/null @@ -1,38 +0,0 @@ -tests.integrations package -========================== - -Submodules ----------- - -tests.integrations.test_full_pipeline module --------------------------------------------- - -.. automodule:: tests.integrations.test_full_pipeline - :members: - :undoc-members: - :show-inheritance: - -tests.integrations.test_notebooks module ----------------------------------------- - -.. automodule:: tests.integrations.test_notebooks - :members: - :undoc-members: - :show-inheritance: - -tests.integrations.test_relevant_feature_extraction module ----------------------------------------------------------- - -.. automodule:: tests.integrations.test_relevant_feature_extraction - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: tests.integrations - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/tests.rst b/docs/api/tests.rst deleted file mode 100644 index 3d50aacfa..000000000 --- a/docs/api/tests.rst +++ /dev/null @@ -1,22 +0,0 @@ -tests package -============= - -Submodules ----------- - -tests.fixtures module ---------------------- - -.. automodule:: tests.fixtures - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: tests - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/tests.units.rst b/docs/api/tests.units.rst deleted file mode 100644 index 6646a7930..000000000 --- a/docs/api/tests.units.rst +++ /dev/null @@ -1,22 +0,0 @@ -tests.units package -=================== - -Submodules ----------- - -tests.units.test_feature_significance module --------------------------------------------- - -.. automodule:: tests.units.test_feature_significance - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: tests.units - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 49c01b50b..18e70b18b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -87,7 +87,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ['_build', 'api/tests*'] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None diff --git a/rdocs-requirements.txt b/rdocs-requirements.txt index f8badc78c..d8ddfc460 100644 --- a/rdocs-requirements.txt +++ b/rdocs-requirements.txt @@ -1,3 +1,3 @@ -Sphinx>=1.6.4 +Sphinx==1.6.4 sphinx_rtd_theme>=0.2.4 -r requirements.txt diff --git a/tests/units/feature_extraction/test_feature_calculations.py b/tests/units/feature_extraction/test_feature_calculations.py index ad23fddaf..1aff7f197 100644 --- a/tests/units/feature_extraction/test_feature_calculations.py +++ b/tests/units/feature_extraction/test_feature_calculations.py @@ -129,36 +129,51 @@ def test_sum(self): self.assertEqualOnAllArrayTypes(sum_values, [-1.2, -2, -3, -4], -10.2) self.assertEqualOnAllArrayTypes(sum_values, [], 0) - def test_agg_autocorrelation(self): + def test_agg_autocorrelation_returns_correct_values(self): - param = [{"f_agg": "mean"}] + param = [{"f_agg": "mean", "maxlag": 10}] x = [1, 1, 1, 1, 1, 1, 1] expected_res = 0 - res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""] + res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"] self.assertAlmostEqual(res, expected_res, places=4) x = [1, 2, -3] expected_res = 1 / np.var(x) * (((1 * 2 + 2 * (-3)) / 2 + (1 * -3)) / 2) - res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""] + res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"] self.assertAlmostEqual(res, expected_res, places=4) np.random.seed(42) x = np.random.normal(size=3000) expected_res = 0 - res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""] + res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"] self.assertAlmostEqual(res, expected_res, places=2) - param=[{"f_agg": "median"}] + param = [{"f_agg": "median", "maxlag": 10}] x = [1, 1, 1, 1, 1, 1, 1] expected_res = 0 - res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\""] + res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\"__maxlag_10"] self.assertAlmostEqual(res, expected_res, places=4) x = [1, 2, -3] expected_res = 1 / np.var(x) * (((1 * 2 + 2 * (-3)) / 2 + (1 * -3)) / 2) - res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\""] + res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\"__maxlag_10"] self.assertAlmostEqual(res, expected_res, places=4) + def test_agg_autocorrelation_returns_max_lag_does_not_affect_other_results(self): + + param = [{"f_agg": "mean", "maxlag": 1}, + {"f_agg": "mean", "maxlag": 10}] + x = range(10) + res1 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_1"] + res10 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"] + self.assertAlmostEqual(res1, 0.77777777, places=4) + self.assertAlmostEqual(res10, -0.64983164983165, places=4) + + param = [{"f_agg": "mean", "maxlag": 1}] + x = range(10) + res1 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_1"] + self.assertAlmostEqual(res1, 0.77777777, places=4) + def test_partial_autocorrelation(self): # Test for altering time series diff --git a/tsfresh/feature_extraction/feature_calculators.py b/tsfresh/feature_extraction/feature_calculators.py index a35d13fdc..feefc95e7 100644 --- a/tsfresh/feature_extraction/feature_calculators.py +++ b/tsfresh/feature_extraction/feature_calculators.py @@ -276,33 +276,48 @@ def sum_values(x): @set_property("fctype", "combiner") def agg_autocorrelation(x, param): - """ - Calculates the value of an aggregation function f_agg (e.g. var or mean) of the autocorrelation - (Compare to http://en.wikipedia.org/wiki/Autocorrelation#Estimation), taken over different all possible lags - (1 to length of x) + r""" + Calculates the value of an aggregation function :math:`f_{agg}` (e.g. the variance or the mean) over the + autocorrelation :math:`R(l)` for different lags. The autocorrelation :math:`R(l)` for lag :math:`l` is defined as .. math:: - \\frac{1}{n-1} \\sum_{l=1,\ldots, n} \\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu) + R(l) = \frac{1}{(n-l)\sigma^{2}} \sum_{t=1}^{n-l}(X_{t}-\mu )(X_{t+l}-\mu) - where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its - mean. + where :math:`X_i` are the values of the time series, :math:`n` its length. Finally, :math:`\sigma^2` and + :math:`\mu` are estimators for its variance and mean + (See `Estimation of the Autocorrelation function `_). + + The :math:`R(l)` for different lags :math:`l` form a vector. This feature calculator applies the aggregation + function :math:`f_{agg}` to this vector and returns + + .. math:: + + f_{agg} \left( R(1), \ldots, R(m)\right) \quad \text{for} \quad m = max(n, maxlag). + + Here :math:`maxlag` is the second parameter passed to this function. :param x: the time series to calculate the feature of :type x: pandas.Series - :param param: contains dictionaries {"attr": x} with x str, name of a numpy function (e.g. mean, var, std, median), - the name of the aggregator function that is applied to the autocorrelations + :param param: contains dictionaries {"attr": x, "maxlag", n} with x str, the name of a numpy function + (e.g. mean, var, std, median), its the name of the aggregator function that is applied to the + autocorrelations. Further, n is an int and the maximal number of lags to consider. :type param: list :return: the value of this feature :return type: float """ + # if the time series is longer than the following threshold, we use fft to calculate the acf + THRESHOLD_TO_USE_FFT = 1250 var = np.var(x) n = len(x) + max_maxlag = max([config["maxlag"] for config in param]) + if np.abs(var) < 10**-10 or n == 1: - a = 0 + a = [0] * len(x) else: - a = acf(x, unbiased=True, fft=n > 1250)[1:] - return [("f_agg_\"{}\"".format(config["f_agg"]), getattr(np, config["f_agg"])(a)) for config in param] + a = acf(x, unbiased=True, fft=n > THRESHOLD_TO_USE_FFT, nlags=max_maxlag)[1:] + return [("f_agg_\"{}\"__maxlag_{}".format(config["f_agg"], config["maxlag"]), + getattr(np, config["f_agg"])(a[:int(config["maxlag"])])) for config in param] @set_property("fctype", "combiner") diff --git a/tsfresh/feature_extraction/settings.py b/tsfresh/feature_extraction/settings.py index be5d23784..cef449a25 100644 --- a/tsfresh/feature_extraction/settings.py +++ b/tsfresh/feature_extraction/settings.py @@ -114,7 +114,7 @@ def __init__(self): "large_standard_deviation": [{"r": r * 0.05} for r in range(1, 20)], "quantile": [{"q": q} for q in [.1, .2, .3, .4, .6, .7, .8, .9]], "autocorrelation": [{"lag": lag} for lag in range(10)], - "agg_autocorrelation": [{"f_agg": s} for s in ["mean", "median", "var"]], + "agg_autocorrelation": [{"f_agg": s, "maxlag": 40} for s in ["mean", "median", "var"]], "partial_autocorrelation": [{"lag": lag} for lag in range(10)], "number_cwt_peaks": [{"n": n} for n in [1, 5]], "number_peaks": [{"n": n} for n in [1, 3, 5, 10, 50]],