Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] SamTov measure memory scaling #476

Open
wants to merge 52 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7909e35
remove GPU keyword
PythonFZ Jan 21, 2022
99ff3d4
Add GPU check and include it in the memory_manager
PythonFZ Jan 21, 2022
5abee31
Merge branch 'main' into gpu_batching
SamTov Jan 21, 2022
d1add32
Merge branch 'main' into gpu_batching
PythonFZ Jan 24, 2022
ab1a4ce
Merge branch 'main' into gpu_batching
PythonFZ Jan 24, 2022
2d85c89
start memory measurement modules.
SamTov Jan 24, 2022
4cd6d00
Intiial commit to scaling function updates.
SamTov Jan 25, 2022
eb6283b
Merge branch 'main' into SamTov_Measure_Memory_Scaling
SamTov Jan 25, 2022
bee4387
run black and isort
SamTov Jan 25, 2022
00f5cd2
Merge remote-tracking branch 'origin/SamTov_Measure_Memory_Scaling' i…
SamTov Jan 25, 2022
83d24f6
remove file call in CI.
SamTov Jan 25, 2022
81c867c
Fix additional flake8 import complaint
SamTov Jan 25, 2022
505acff
add config memory testing and include an override for batching.
SamTov Jan 25, 2022
e320755
remove config argument.
SamTov Jan 25, 2022
d898394
resolve flake8 complaint.
SamTov Jan 25, 2022
c974a78
CI profiling
PythonFZ Feb 1, 2022
25d0fe3
CI profiling
PythonFZ Feb 1, 2022
2b97375
update sqlite
PythonFZ Feb 1, 2022
7f51983
typo
PythonFZ Feb 1, 2022
4b23a73
patch ubuntu version
PythonFZ Feb 1, 2022
4729e28
try conda for newer sqlite version
PythonFZ Feb 1, 2022
43ed753
try conda for newer sqlite version
PythonFZ Feb 1, 2022
a83e48e
update sqlite version
PythonFZ Feb 1, 2022
a8c8af8
bugfix
PythonFZ Feb 1, 2022
883474d
add a plot
PythonFZ Feb 1, 2022
8d77a09
plot everything
PythonFZ Feb 1, 2022
0601c84
plot everything
PythonFZ Feb 1, 2022
aedc169
run ADF memory test
PythonFZ Feb 1, 2022
b9ca27f
run ADF memory test
PythonFZ Feb 1, 2022
492370f
Update test_memory.py
PythonFZ Feb 1, 2022
719de34
Update test_memory.py
PythonFZ Feb 1, 2022
3542778
reduce size even further
PythonFZ Feb 1, 2022
1ebdc2f
Update test_memory.py
PythonFZ Feb 1, 2022
e12368c
Update test_memory.py
PythonFZ Feb 1, 2022
869c047
remove print
PythonFZ Feb 2, 2022
4bf249c
Merge branch 'main' into SamTov_Measure_Memory_Scaling
PythonFZ Feb 2, 2022
1713f8a
clean up a bit
PythonFZ Feb 2, 2022
7898a6b
fix black / flake8
PythonFZ Feb 2, 2022
8743d23
add plot function
PythonFZ Feb 2, 2022
327e538
add update to not spam to PR
PythonFZ Feb 2, 2022
427fd13
only run on push
PythonFZ Feb 2, 2022
f49303e
add package
PythonFZ Feb 2, 2022
3b8ad22
small code cleanup + update
PythonFZ Feb 2, 2022
8f14b22
Update lint.yaml
PythonFZ Feb 2, 2022
963580e
add diffusion + fix plots
PythonFZ Feb 2, 2022
de1ef0d
Merge remote-tracking branch 'origin/SamTov_Measure_Memory_Scaling' i…
PythonFZ Feb 2, 2022
b5cf14c
add continue-on-error to still gather the plot at the end.
PythonFZ Feb 2, 2022
d66ca6e
add GK diffusion
PythonFZ Feb 2, 2022
91e06b9
deselect memory by default
PythonFZ Feb 2, 2022
1eddad1
enable memory management
PythonFZ Feb 2, 2022
a262669
add einstein data range test
PythonFZ Feb 2, 2022
a756c6b
run with / without fixture
PythonFZ Feb 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Intiial commit to scaling function updates.
  • Loading branch information
SamTov committed Jan 25, 2022
commit 4cd6d007205d7d6e7f0804e1bb5f1d2c0c783956
105 changes: 105 additions & 0 deletions CI/memory_scaling/test_scaling_coefficients.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,117 @@
Module to test scaling coefficients.
"""
import pytest
from zinchub import DataHub
import pandas as pd

import mdsuite
import mdsuite.transformations

import sqlite3

import numpy as np


def _build_atomwise(data_scaling: int, system: bool = False):
"""
Build a numpy array of atom-wise data in steps of MBs.

Parameters
----------
data_scaling : int
Number of atoms in the data e.g. zeroth array of the data. 1 atom is 1/10
of a MB of data.
system : bool
If true, the returned array should be (n_confs, 3)

Returns
-------
data_array : np.ones
A numpy array of ones that matches close to 1/10 * data_scaling MBs in
size (~98%).
Notes
-----
TODO: When moved to (confs, n_atoms, dim), this will need to be updated to take the
first column as atoms otherwise the memory scaling will be wrong.

"""
if system:
return np.ones((data_scaling * 4096, 3))
else:
return np.ones((data_scaling, 4096, 3))


@pytest.fixture(scope="session")
def traj_file(tmp_path_factory) -> str:
"""Download trajectory file into a temporary directory and keep it for all tests"""
temporary_path = tmp_path_factory.getbasetemp()

NaCl = DataHub(url="https://github.com/zincware/DataHub/tree/main/NaCl_gk_i_q")
NaCl.get_file(path=temporary_path)

return (temporary_path / NaCl.file_raw).as_posix()


@pytest.fixture()
def mdsuite_project(tmp_path) -> mdsuite.Project:
"""
Build an MDSuite project with all data stored in a temp directory for easier
cleanup after the test.

Returns
-------
project : mdsuite.Project
MDSuite project to be used in the tests.
"""
project = mdsuite.Project(storage_path=tmp_path.as_posix())
project.add_experiment("NaCl", simulation_data=traj_file)

scaling_sizes = [10, 100, 500, 1000]

return project


def get_memory_usage(database: str, callable_name: str) -> float:
"""
Get the memory used from the dumped sql database.

Parameters
----------
database : str
Path to the sqlite database that will be read.
callable_name : str
Name of the function being measured and therefore, what memory value to
return.

Returns
-------
memory : float
memory used during the calculation.
"""
with sqlite3.connect(database) as db:
data = pd.read_sql_query("SELECT * from TEST_METRICS", db)

data = data.loc[data['ITEM'] == callable_name]

return data['MEM_USAGE']


def test_rdf_memory(mdsuite_project):
"""
Test the memory of the RDF.

Parameters
----------
mdsuite_project : mdsuite.Project
An mdsuite project with stored files in a tmp directory.

Returns
-------

"""
memory_array = np.zeros((2,))
mdsuite_project.run.RadialDistributionFunction(plot=False)
memory = get_memory_usage('pymon.db', test_rdf_memory.__name__)
memory_array[0] = memory

print(memory_array)
2 changes: 1 addition & 1 deletion mdsuite/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def _store_metadata(self, metadata: TrajectoryMetadata, update_with_pubchempy=Fa
----------
metadata: TrajectoryMetadata
update_with_pubchempy: bool
Load data from pubchempy and add it to fill missing infomration
Load data from pubchempy and add it to fill missing information.
"""
# new trajectory: store all metadata and construct a new database
self.temperature = metadata.temperature
Expand Down