Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tutorial release, new features, and bug fix #75

Merged
merged 23 commits into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5325d25
fix: add missing https:// in the issue-template config file;
WenjieDu Apr 22, 2023
44335be
Add unit-test cases for `pypots-cli` (#72)
WenjieDu Apr 23, 2023
d84e595
fix: only report coverage if file .coverage exists;
WenjieDu Apr 23, 2023
a54cea3
Merge branch 'main' into dev
WenjieDu Apr 23, 2023
07128b4
fix: remove cli-testing case of show-coverage to avoid mis-calculation;
WenjieDu Apr 23, 2023
dd6b793
fix: must not delete .coverage file after testing;
WenjieDu Apr 23, 2023
568b3c5
Fix bugs in the code-coverage report (#73)
WenjieDu Apr 23, 2023
f330f85
feat: default disabling early-stopping mechanism during model training;
WenjieDu Apr 24, 2023
c27f22c
fix: return correct val_X and test_X in gene_physionet2012() when art…
WenjieDu Apr 24, 2023
ea04dd6
feat: add pypots.random.set_random_seed();
WenjieDu Apr 24, 2023
0787260
feat: enable `return_labels` in Dataset classes;
WenjieDu Apr 24, 2023
895f9bc
refactor: remove autoflake that is not quite useful;
WenjieDu Apr 25, 2023
504bdd0
feat: enable automatically saving model into file if necessary;
WenjieDu Apr 25, 2023
e2485de
fix: remove typing.Literal which is not supported in python 3.7;
WenjieDu Apr 25, 2023
922bbfb
fix: the disordered labels in the returned data;
WenjieDu Apr 25, 2023
c7b6e26
fix: mistaken logical code in auto_save_model_if_necessary;
WenjieDu Apr 25, 2023
ea560d4
Add devcontainer config (#76)
WenjieDu Apr 27, 2023
4df32de
fix: set return_labels=False for training Dataset for CRLI and VaDER;
WenjieDu Apr 27, 2023
baab39e
feat: add git stale config file;
WenjieDu Apr 27, 2023
cce28bd
doc: remove tutorials dir, will create a new repo to put all tutorials;
WenjieDu Apr 27, 2023
4b25fb6
fix: remove tutorials from checking;
WenjieDu Apr 27, 2023
1f42c77
feat: add jupyterlab as a dev dependency, update README;
WenjieDu Apr 27, 2023
39b2bbe
doc: update README to add the link of BrewedPOTS;
WenjieDu Apr 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: default disabling early-stopping mechanism during model training;
  • Loading branch information
WenjieDu committed Apr 24, 2023
commit f330f8566061e5b7df3c3b85d267be4082e451ac
8 changes: 8 additions & 0 deletions pypots/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class BaseNNModel(BaseModel):
patience : int,
Number of epochs the training procedure will keep if loss doesn't decrease.
Once exceeding the number, the training will stop.
Must be smaller than or equal to the value of `epoches`.

learning_rate : float,
The learning rate of the optimizer.
Expand Down Expand Up @@ -252,6 +253,13 @@ def __init__(
):
super().__init__(device, tb_file_saving_path)

if patience is None:
patience = -1 # early stopping on patience won't work if it is set as < 0
else:
assert (
patience <= epochs
), f"patience must be smaller than epoches which is {epochs}, but got patience={patience}"

# training hype-parameters
self.batch_size = batch_size
self.epochs = epochs
Expand Down
2 changes: 1 addition & 1 deletion pypots/classification/brits.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(
reconstruction_weight: float = 1,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/classification/grud.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def __init__(
n_classes: int,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/classification/raindrop.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ def __init__(
static,
batch_size=32,
epochs=100,
patience=10,
patience: int = None,
learning_rate=1e-3,
weight_decay=1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/clustering/crli.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def __init__(
D_steps: int = 1,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/clustering/vader.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def __init__(
batch_size: int = 32,
epochs: int = 100,
pretrain_epochs: int = 10,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
45 changes: 29 additions & 16 deletions pypots/data/generating.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,14 @@ def gene_incomplete_random_walk_dataset(
return data


def gene_physionet2012():
"""Generate PhysioNet2012."""
def gene_physionet2012(artificially_missing: bool = True):
"""Generate a full-prepared PhysioNet-2012 dataset for model testing.

Parameters
----------
artificially_missing : bool, default = True,
Whether to artificially mask out 10% observed values and hold out for imputation performance evaluation.
"""
# generate samples
df = load_specific_dataset("physionet_2012")
X = df["X"]
Expand All @@ -288,11 +294,13 @@ def gene_physionet2012():
val_set.to_numpy(),
test_set.to_numpy(),
)

# normalization
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
val_X = scaler.transform(val_X)
test_X = scaler.transform(test_X)

# reshape into time series samples
train_X = train_X.reshape(len(train_set_ids), 48, -1)
val_X = val_X.reshape(len(val_set_ids), 48, -1)
Expand All @@ -303,16 +311,6 @@ def gene_physionet2012():
test_y = y[y.index.isin(test_set_ids)]
train_y, val_y, test_y = train_y.to_numpy(), val_y.to_numpy(), test_y.to_numpy()

# mask values in the validation set as ground truth
val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar(val_X, 0.1)
val_X = masked_fill(val_X, 1 - val_X_missing_mask, torch.nan)

# mask values in the test set as ground truth
test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar(
test_X, 0.1
)
test_X = masked_fill(test_X, 1 - test_X_missing_mask, torch.nan)

data = {
"n_classes": 2,
"n_steps": 48,
Expand All @@ -321,11 +319,26 @@ def gene_physionet2012():
"train_y": train_y.flatten(),
"val_X": val_X,
"val_y": val_y.flatten(),
"val_X_intact": val_X_intact,
"val_X_indicating_mask": val_X_indicating_mask,
"test_X": test_X,
"test_y": test_y.flatten(),
"test_X_intact": test_X_intact,
"test_X_indicating_mask": test_X_indicating_mask,
}

if artificially_missing:
# mask values in the validation set as ground truth
val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar(
val_X, 0.1
)
val_X = masked_fill(val_X, 1 - val_X_missing_mask, torch.nan)

# mask values in the test set as ground truth
test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar(
test_X, 0.1
)
test_X = masked_fill(test_X, 1 - test_X_missing_mask, torch.nan)

data["test_X_intact"] = test_X_intact
data["test_X_indicating_mask"] = test_X_indicating_mask
data["val_X_intact"] = val_X_intact
data["val_X_indicating_mask"] = val_X_indicating_mask

return data
2 changes: 1 addition & 1 deletion pypots/imputation/brits.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def __init__(
rnn_hidden_size: int,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/imputation/saits.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def __init__(
MIT_weight: int = 1,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down
2 changes: 1 addition & 1 deletion pypots/imputation/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def __init__(
MIT_weight: int = 1,
batch_size: int = 32,
epochs: int = 100,
patience: int = 10,
patience: int = None,
learning_rate: float = 1e-3,
weight_decay: float = 1e-5,
num_workers: int = 0,
Expand Down