Skip to content

Commit

Permalink
3_15_work1_V0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuhan-xue committed Mar 14, 2023
1 parent d5bd849 commit bf0d1f6
Show file tree
Hide file tree
Showing 5 changed files with 640 additions and 0 deletions.
330 changes: 330 additions & 0 deletions WORK1/WORK1.ipynb

Large diffs are not rendered by default.

68 changes: 68 additions & 0 deletions WORK1/XM.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
sta. XM
Lat:24deg. 27min. N
Lon:118deg. 40min. E
units: mm
time: 1997-08-01~1997-08-31 (GMT)
missvalue: 9999
1997 08 01 3940 4710 5350 5680 5280 4240 3130 2210 1540 1250 1390 2070
1997 08 01 2850 3770 4780 5690 5990 5450 4360 3410 2770 2290 2130 2490
1997 08 02 3270 4070 4880 5530 5730 5270 4220 2990 2080 1500 1270 1580
1997 08 02 2450 3190 4100 5200 6030 6130 5360 4200 3210 2620 2320 2290
1997 08 03 2810 3480 4260 5080 5640 5590 4790 3530 2410 1690 1230 1160
1997 08 03 1630 2470 3370 4370 5380 5960 5720 4700 3570 2740 2200 2000
1997 08 04 2260 2950 3800 4680 5440 5780 5410 4300 3040 2040 1380 1040
1997 08 04 1210 1970 2900 3850 4900 5800 6000 5260 4080 3030 2320 1910
1997 08 05 1860 2370 3200 4090 4980 5620 5720 5070 3840 2650 1790 1280
1997 08 05 1150 1550 2420 3370 4420 5450 6060 5860 4860 3630 2740 2140
1997 08 06 1770 1960 2700 3650 4630 5450 5920 5650 4580 3280 2300 1610
1997 08 06 1200 1280 1970 2960 3970 4990 5760 5980 5330 4160 3060 2320
1997 08 07 1860 1730 2200 3080 4030 4960 5640 5820 5170 3980 2820 2030
1997 08 07 1510 1230 1600 2510 3450 4430 5350 5860 5610 4660 3500 2580
1997 08 08 1970 1610 1770 2470 3310 4150 4990 5580 5470 4540 3230 2260
1997 08 08 1650 1270 1320 1910 2700 3560 4460 5230 5510 5020 3950 2900
1997 08 09 2200 1790 1540 1780 2420 3160 3990 4730 5200 4950 4020 2890
1997 08 09 2160 1700 1480 1700 2380 3160 3930 4740 5350 5310 4550 3480
1997 08 10 2600 2030 1640 1570 2030 2790 3580 4340 4970 5090 4600 3640
1997 08 10 1800 2120 1820 1770 2110 2790 3610 4440 5100 5360 5030 4140
1997 08 11 3180 2460 1890 1650 1800 2330 3040 3790 4520 5040 5020 4430
1997 08 11 3560 2850 2360 2080 2100 2540 3210 3910 4620 5100 5200 4720
1997 08 12 3930 3150 2510 2000 1770 1950 2480 3180 3920 4630 5080 5040
1997 08 12 4500 3780 3140 2660 2370 2390 2800 3450 4150 4760 5180 5230
1997 08 13 4760 3980 3190 2490 1970 1740 1880 2400 3070 3750 4470 5010
1997 08 13 5100 4680 3970 3320 2820 2490 2430 2750 3360 4010 4640 5090
1997 08 14 5230 4870 4070 3260 2500 1960 1620 1680 2220 2900 3620 4410
1997 08 14 5100 5370 5060 4370 3620 3000 2560 2360 2590 3140 3820 4530
1997 08 15 5130 5380 5120 4350 3390 2520 1840 1410 1410 1940 2720 3540
1997 08 15 4470 5350 5810 5560 4720 3750 3050 2530 2250 2420 3030 3840
1997 08 16 4710 5440 5820 5570 4690 3570 2570 1750 1200 1200 1910 2900
1997 08 16 3910 5020 6030 6490 6070 5000 3920 3150 2550 2210 2420 3280
1997 08 17 4350 3320 6130 6510 6100 4980 3690 2600 1710 1110 1180 2130
1997 08 17 3330 4480 5680 6780 7100 6350 5030 3930 3110 2460 2130 2650
1997 08 18 3820 5060 6160 7020 7170 6410 5030 3680 2550 1550 1000 1250
1997 08 18 2410 3630 4690 5780 6680 6660 5620 4110 2980 2210 1690 1640
1997 08 19 2400 3380 4250 5190 6100 6450 5750 4180 2750 1720 920 520
1997 08 19 1000 2000 2990 4240 5720 6830 6860 5630 4020 2900 2130 1520
1997 08 20 1470 2320 3440 4530 5680 6620 6690 5670 4100 2730 1680 890
1997 08 20 620 1280 2420 3540 4810 6180 7010 6630 5130 3620 2670 1900
1997 08 21 1280 1430 2540 3650 4710 5840 6700 6620 5380 3850 2560 1610
1997 08 21 950 810 1570 2670 3810 5110 6400 6970 6250 4720 3320 2370
1997 08 22 1590 1130 1550 5700 3870 5040 6200 6820 6410 4980 3540 2490
1997 08 22 1680 1040 1150 2120 3300 4370 5470 6430 6530 5540 4070 2850
1997 08 23 2010 1340 1050 1660 2730 3700 4680 5690 6220 5800 4490 3230
1997 08 23 2400 1780 1370 1610 2490 3490 4470 5430 6120 6010 5000 3630
1997 08 24 2630 1930 1340 1190 1820 2770 3710 4700 5590 5970 5480 4350
1997 08 24 3300 2580 2050 1730 2000 2820 3760 4660 5480 5910 5680 4680
1997 08 25 3520 2660 1980 1480 1400 2000 2860 3710 4590 5390 5710 5280
1997 08 25 4350 3430 2830 2360 2120 2360 3040 3880 4690 5340 5640 5340
1997 08 26 4470 3480 2690 4570 1630 9999 2100 2840 3620 4390 5090 5460
1997 08 26 5210 4480 3690 3090 2680 2450 2580 3080 3790 4520 5080 5390
1997 08 27 5190 4530 3670 2880 2310 1880 1800 2160 2840 3580 4320 5020
1997 08 27 5450 5360 4760 4050 3430 2960 2680 2690 3070 3710 4430 5030
1997 08 28 5370 5300 4780 3990 3170 2510 2070 1840 2090 2770 3570 4350
1997 08 28 5040 5520 5620 5190 4470 3770 3280 3000 2890 3160 3850 4730
1997 08 29 5460 5860 5840 5330 9999 3590 2790 2200 1890 1980 2600 3470
1997 08 29 4250 4960 5590 5750 5310 4410 3550 2890 2380 2260 2690 3360
1997 08 30 4080 4880 5610 5890 5360 4140 3010 2330 1860 1420 1680 2440
1997 08 30 3260 4160 4960 5550 5560 4930 3960 3090 2550 2180 2150 2620
1997 08 31 3390 4160 4890 5380 5340 4680 3580 2520 1730 1290 1250 1720
1997 08 31 2530 3430 4430 5350 5840 5530 4580 3480 2700 2140 1840 2060
Binary file added WORK1/__pycache__/grubbs.cpython-39.pyc
Binary file not shown.
242 changes: 242 additions & 0 deletions WORK1/grubbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
"""
Hanxue Yu 2023/2/14
因为我的Python 环境 安装不了outlier_utils,因此直接下载了该module源代码
module 地址:https://github.com/c-bata/outlier-utils
"""

"""
Smirnov-Grubbs test for outlier detection.
"""

import numpy as np
from scipy import stats
from math import sqrt
from collections import defaultdict

try:
import pandas as pd
except ImportError:
pd = None

__all__ = ['test',
'two_sided_test',
'two_sided_test_indices',
'two_sided_test_outliers',
'min_test',
'min_test_indices',
'min_test_outliers',
'max_test',
'max_test_indices',
'max_test_outliers',
'TwoSidedGrubbsTest',
'MinValueGrubbsTest',
'MaxValueGrubbsTest',
'OutputType']


DEFAULT_ALPHA = 0.95


# Test output types
class OutputType:
DATA = 0 # Output data without outliers
OUTLIERS = 1 # Output outliers
INDICES = 2 # Output outlier indices


class GrubbsTest(object):
def __init__(self, data):
self.original_data = data

def _copy_data(self):
if isinstance(self.original_data, np.ndarray):
return self.original_data
elif pd is not None and isinstance(self.original_data, pd.Series):
return self.original_data
elif isinstance(self.original_data, list):
return np.array(self.original_data)
else:
raise TypeError('Unsupported data format')

def _delete_item(self, data, index):
if pd is not None and isinstance(data, pd.Series):
return data.drop(index)
elif isinstance(data, np.ndarray):
return np.delete(data, index)
else:
raise TypeError('Unsupported data format')

def _get_indices(self, values):
last_seen = defaultdict(lambda: 0)
data = list(self.original_data)
indices = list()
for value in values:
start = last_seen[value]
index = data.index(value, start)
indices.append(index)
last_seen[value] = index + 1
return indices

def _get_g_test(self, data, alpha):
"""Compute a significant value score following these steps, being alpha
the requested significance level:
1. Find the upper critical value of the t-distribution with n-2
degrees of freedom and a significance level of alpha/2n
(for two-sided tests) or alpha/n (for one-sided tests).
2. Use this t value to find the score with the following formula:
((n-1) / sqrt(n)) * (sqrt(t**2 / (n-2 + t**2)))
:param numpy.array data: data set
:param float alpha: significance level
:return: G_test score
"""
n = len(data)
significance_level = self._get_t_significance_level(alpha, n)
t = stats.t.isf(significance_level, n-2)
return ((n-1) / sqrt(n)) * (sqrt(t**2 / (n-2 + t**2)))

def _test_once(self, data, alpha):
"""Perform one iteration of the Smirnov-Grubbs test.
:param numpy.array data: data set
:param float alpha: significance level
:return: the index of the outlier if one if found; None otherwise
"""
target_index, value = self._target(data)

g = value / data.std()
g_test = self._get_g_test(data, alpha)
return target_index if g > g_test else None

def run(self, alpha=DEFAULT_ALPHA, output_type=OutputType.DATA):
"""Run the Smirnov-Grubbs test to remove outliers in the given data set.
:param float alpha: significance level
:param int output_type: test output type (from OutputType class values)
:return: depending on the value of output_type, the data set without
outliers (DATA), the outliers themselves (OUTLIERS) or the indices of
the outliers in the original data set (INDICES)
"""
data = self._copy_data()
outliers = list()

while True:
outlier_index = self._test_once(data, alpha)
if outlier_index is None:
break
outlier = data[outlier_index]
outliers.append(outlier)
data = self._delete_item(data, outlier_index)

return_value = data
if output_type == OutputType.OUTLIERS:
return_value = outliers
elif output_type == OutputType.INDICES:
return_value = self._get_indices(outliers)
return return_value

def _target(self, data):
raise NotImplementedError

def _get_t_significance_level(self, alpha):
raise NotImplementedError


class TwoSidedGrubbsTest(GrubbsTest):
def _target(self, data):
"""Compute the index of the farthest value from the sample mean and its
distance.
:param numpy.array data: data set
:return int, float: the index of the element and its distance to the
mean
"""
relative_values = abs(data - data.mean())
index = relative_values.argmax()
value = relative_values[index]
return index, value

def _get_t_significance_level(self, alpha, n):
return alpha / (2*n)


class OneSidedGrubbsTest(GrubbsTest):
def _target(self, data):
"""Compute the index of the min/max value and its distance from the
sample mean.
:param numpy.array data: data set
:return int, float: the index of the min/max value and its distance to
the mean
"""
index = self._get_index(data)
value = data[index]
return index, abs(value - data.mean())

def _get_t_significance_level(self, alpha, n):
return alpha / n


class MinValueGrubbsTest(OneSidedGrubbsTest):
def _get_index(self, data):
return data.argmin()


class MaxValueGrubbsTest(OneSidedGrubbsTest):
def _get_index(self, data):
return data.argmax()


# Convenience functions to run single Grubbs tests

def _test(test_class, data, alpha, output_type):
return test_class(data).run(alpha, output_type=output_type)


def _two_sided_test(data, alpha, output_type):
return _test(TwoSidedGrubbsTest, data, alpha, output_type)


def _min_test(data, alpha, output_type):
return _test(MinValueGrubbsTest, data, alpha, output_type)


def _max_test(data, alpha, output_type):
return _test(MaxValueGrubbsTest, data, alpha, output_type)


def two_sided_test(data, alpha=DEFAULT_ALPHA):
return _two_sided_test(data, alpha, OutputType.DATA)


def two_sided_test_indices(data, alpha=DEFAULT_ALPHA):
return _two_sided_test(data, alpha, OutputType.INDICES)


def two_sided_test_outliers(data, alpha=DEFAULT_ALPHA):
return _two_sided_test(data, alpha, OutputType.OUTLIERS)


def min_test(data, alpha=DEFAULT_ALPHA):
return _min_test(data, alpha, OutputType.DATA)


def min_test_indices(data, alpha=DEFAULT_ALPHA):
return _min_test(data, alpha, OutputType.INDICES)


def min_test_outliers(data, alpha=DEFAULT_ALPHA):
return _min_test(data, alpha, OutputType.OUTLIERS)


def max_test(data, alpha=DEFAULT_ALPHA):
return _max_test(data, alpha, OutputType.DATA)


def max_test_indices(data, alpha=DEFAULT_ALPHA):
return _max_test(data, alpha, OutputType.INDICES)


def max_test_outliers(data, alpha=DEFAULT_ALPHA):
return _max_test(data, alpha, OutputType.OUTLIERS)


def test(data, alpha=DEFAULT_ALPHA):
return two_sided_test(data, alpha)
Binary file added WORK1/编程作业1.pptx
Binary file not shown.

0 comments on commit bf0d1f6

Please sign in to comment.