forked from chrisleaman/py-wave-runup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasets.py
153 lines (125 loc) · 5.37 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
This module is an interface for loading existing wave runup datasets. These datasets
can be used to evaluate existing models or train new models. Datasets are returned as
:obj:`pandas.DataFrame`.
"""
import io
import pkgutil
import random
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from sklearn.preprocessing import minmax_scale
from py_wave_runup import models
from py_wave_runup.utils import PerlinNoise
def load_power18():
"""
Loads wave runup data included with Power et al (2018)
This function loads the supplementary data from:
Power, H.E., Gharabaghi, B., Bonakdari, H., Robertson, B., Atkinson, A.L.,
Baldock, T.E., 2018. Prediction of wave runup on beaches using
Gene-Expression Programming and empirical relationships. Coastal Engineering.
https://doi.org/10.1016/j.coastaleng.2018.10.006
Examples:
>>> from py_wave_runup import datasets
>>> df = datasets.load_power18()
>>> df.describe()
hs tp beta roughness r2
count 1390.000000 1390.000000 1390.000000 1390.000000 1390.000000
mean 1.893131 9.227035 0.120612 0.024779 2.318814
std 1.309243 3.589004 0.062236 0.043617 1.776918
min 0.018576 0.805805 0.009000 0.000003 0.027336
25% 0.895942 7.517556 0.088228 0.001000 1.103500
50% 1.848050 9.963089 0.108422 0.003750 1.923500
75% 2.391756 10.995500 0.129220 0.007500 3.406660
max 7.174100 23.680333 0.286551 0.125000 12.669592
"""
# To load a resource in a package, need to use pkgutil.
# Refer https://stackoverflow.com/a/6028106
data = pkgutil.get_data(__name__, "datasets/power18.csv")
# Manually define names for each column
names = [
"dataset",
"beach",
"case",
"lab_field",
"hs",
"tp",
"beta",
"d50",
"roughness",
"r2",
]
# Need to use the io package to read in the .csv
# Refer to https://stackoverflow.com/a/20697069
df = pd.read_csv(io.BytesIO(data), encoding="utf8", names=names, skiprows=1)
return df
def load_random_sto06(
seed=12345,
t_start=datetime(2000, 1, 1),
t_end=datetime(2000, 1, 2),
dt=timedelta(hours=1),
hs_range=(1, 3),
tp_range=(6, 10),
beta_range=(0.08, 0.1),
noise_std=0.3,
):
"""
Loads a randomly generated wave runup dataframe.
This function returns a randomly generated :obj:`pandas.DataFrame` containing wave
parameters and noisey runup values calculated using the Stockdon et al (2006) runup
model. This random data is intended to be used to demonstrate runup analysis
without having to use actual data
Args:
seed (:obj:`int`): Seed the random number generator
t_start (:obj:`datetime.datetime`): Start time of the dataframe
t_end (:obj:`datetime.datetime`): End time of the dataframe
dt (:obj:`datetime.timedelta`): Time interval of the dataframe
hs_range (:obj:`tuple`): Range (`min`, `max`) of the significant wave height
tp_range (:obj:`tuple`): Range (`min`, `max`) of the peak wave period
beta_range (:obj:`tuple`): Range (`min`, `max`) of the nearshore slope
noise_std (:obj:`float`): Standard deviation (in meters) of the wave runup
statistics.
Returns:
A :obj:`pandas.DataFrame`
Examples:
Get
>>> from py_wave_runup import datasets
>>> df = datasets.load_random_sto06()
>>> df.head()
hs tp beta ... swash sig sinc
2000-01-01 00:00:00 3.000000 6.000000 0.098110 ... 1.171474 0.717714 0.894084
2000-01-01 01:00:00 2.850109 6.128432 0.099052 ... 1.378190 0.919351 1.104072
2000-01-01 02:00:00 2.865462 6.461471 0.099766 ... 1.154970 0.664189 0.866796
2000-01-01 03:00:00 2.942888 6.750824 0.100000 ... 1.223142 0.701520 0.918580
2000-01-01 04:00:00 2.906808 6.937742 0.099069 ... 2.001251 1.476527 1.687905
<BLANKLINE>
[5 rows x 8 columns]
"""
# Create the time index for our dataframe
index = pd.date_range(start=t_start, end=t_end, freq=dt)
df = pd.DataFrame(index=index)
# Create some random noise
random.seed(seed)
np.random.seed(seed)
pn = PerlinNoise(num_octaves=7, persistence=0.5)
# Generate input parameters. Note that we offset the noise value by the length of
# the index to avoid having the same timeseries shape for each parameter.
l = len(index)
hs = minmax_scale([pn.noise(i) for i, _ in enumerate(index)], hs_range)
tp = minmax_scale([pn.noise(i + l) for i, _ in enumerate(index)], tp_range)
beta = minmax_scale([pn.noise(i + 2 * l) for i, _ in enumerate(index)], beta_range)
# Generate wave parameters based on Stockdon
model = models.Stockdon2006(Hs=hs, Tp=tp, beta=beta)
# Generate additional noise
noise = np.random.normal(0, noise_std, l)
# Combine into a dataframe
df["hs"] = hs
df["tp"] = tp
df["beta"] = beta
df["r2"] = model.R2 + noise
df["setup"] = model.setup + noise
df["swash"] = model.swash + noise
df["sig"] = model.sig + noise
df["sinc"] = model.sinc + noise
return df