Skip to content

Commit

Permalink
Modify existing function in external data helper (onnx#3280)
Browse files Browse the repository at this point in the history
* Modify existing function and add a new function to convert raw data as external data to allow converting attributes and saving the model

Signed-off-by: Anna Jung (VMware) <[email protected]>

* Modify existing save model api to optionally convert and save as external data

Signed-off-by: Anna Jung (VMware) <[email protected]>

* Add a fix for a few nits in the comment

Signed-off-by: Anna Jung (VMware) <[email protected]>

Co-authored-by: Ashwini Khade <[email protected]>
  • Loading branch information
Anna and askhade authored Mar 24, 2021
1 parent 39df720 commit 522b000
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 25 deletions.
21 changes: 21 additions & 0 deletions docs/ExternalData.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,27 @@ load_external_data_for_model(onnx_model, 'data/directory/path/')
# Then the onnx_model has loaded the external data from the specific directory
```

## Converting an ONNX Model to External Data
```python
import onnx
from onnx.external_data_helper import convert_model_to_external_data

onnx_model = ... # Your model in memory as ModelProto
convert_model_to_external_data(onnx_model, all_tensors_to_one_file=True, location='filename', size_threshold=1024, convert_attribute=False)
# Must be followed by save_model to save the converted model to a specific path
onnx.save_model(onnx_model, 'path/to/save/the/model.onnx')
# Then the onnx_model has converted raw data as external data and saved to specific directory
```

## Converting and Saving an ONNX Model to External Data
```python
import onnx

onnx_model = ... # Your model in memory as ModelProto
onnx.save_model(onnx_model, 'path/to/save/the/model.onnx', save_as_external_data=True, all_tensors_to_one_file=True, location='filename', size_threshold=1024, convert_attribute=False)
# Then the onnx_model has converted raw data as external data and saved to specific directory
```

## onnx.checker for Models with External Data

### Models with External Data (<2GB)
Expand Down
21 changes: 21 additions & 0 deletions docs/PythonAPIOverview.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ load_external_data_for_model(onnx_model, 'data/directory/path/')
# Then the onnx_model has loaded the external data from the specific directory
```

## Converting an ONNX Model to External Data
```python
from onnx.external_data_helper import convert_model_to_external_data

onnx_model = ... # Your model in memory as ModelProto
convert_model_to_external_data(onnx_model, all_tensors_to_one_file=True, location='filename', size_threshold=1024, convert_attribute=False)
# Then the onnx_model has converted raw data as external data
# Must be followed by save
```

## Saving an ONNX Model
```python
import onnx
Expand All @@ -45,6 +55,17 @@ onnx.save(onnx_model, 'path/to/the/model.onnx')
Runnable IPython notebooks:
- [save_model.ipynb](https://github.com/onnx/onnx/tree/master/onnx/examples/save_model.ipynb)


## Converting and Saving an ONNX Model to External Data
```python
import onnx

onnx_model = ... # Your model in memory as ModelProto
onnx.save_model(onnx_model, 'path/to/save/the/model.onnx', save_as_external_data=True, all_tensors_to_one_file=True, location='filename', size_threshold=1024, convert_attribute=False)
# Then the onnx_model has converted raw data as external data and saved to specific directory
```


## Manipulating TensorProto and Numpy Array
```python
import numpy
Expand Down
23 changes: 17 additions & 6 deletions onnx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os

from .onnx_cpp2py_export import ONNX_ML
from onnx.external_data_helper import load_external_data_for_model, write_external_data_tensors
from onnx.external_data_helper import load_external_data_for_model, write_external_data_tensors, convert_model_to_external_data
from .onnx_pb import * # noqa
from .onnx_operators_pb import * # noqa
from .onnx_data_pb import * # noqa
Expand Down Expand Up @@ -172,18 +172,29 @@ def load_tensor_from_string(s, format=None): # type: (bytes, Optional[Any]) ->
return _deserialize(s, TensorProto())


def save_model(proto, f, format=None): # type: (Union[ModelProto, bytes], Union[IO[bytes], Text], Optional[Any]) -> None
def save_model(proto, f, format=None, save_as_external_data=False, all_tensors_to_one_file=True, location=None, size_threshold=1024, convert_attribute=False):
# type: (Union[ModelProto, bytes], Union[IO[bytes], Text], Optional[Any], bool, bool, Optional[Text], int, bool) -> None
'''
Saves the ModelProto to the specified path.
Saves the ModelProto to the specified path and optionally, serialize tensors with raw data as external data before saving.
@params
proto should be a in-memory ModelProto
f can be a file-like object (has "write" function) or a string containing a file name
format is for future use
proto: should be a in-memory ModelProto
f: can be a file-like object (has "write" function) or a string containing a file name format for future use
all_tensors_to_one_file: If true, save all tensors to one external file specified by location.
If false, save each tensor to a file named with the tensor name.
location: specify the external file that all tensors to save to.
If not specified, will use the model name.
size_threshold: Threshold for size of data. Only when tensor's data is >= the size_threshold it will be converted
to external data. To convert every tensor with raw data to external data set size_threshold=0.
convert_attribute: If true, convert all tensors to external data
If false, convert only non-attribute tensors to external data
'''
if isinstance(proto, bytes):
proto = _deserialize(proto, ModelProto())

if save_as_external_data:
convert_model_to_external_data(proto, all_tensors_to_one_file, location, size_threshold, convert_attribute)

model_filepath = _get_file_path(f)
if model_filepath:
basepath = os.path.dirname(model_filepath)
Expand Down
15 changes: 11 additions & 4 deletions onnx/external_data_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
from itertools import chain
from typing import Iterable, Text, Optional

from .onnx_pb import TensorProto, ModelProto


Expand Down Expand Up @@ -99,8 +100,8 @@ def set_external_data(tensor, # type: TensorProto
entry.value = str(v)


def convert_model_to_external_data(model, all_tensors_to_one_file=True, location=None, size_threshold=1024):
# type: (ModelProto, bool, Optional[Text], int) -> None
def convert_model_to_external_data(model, all_tensors_to_one_file=True, location=None, size_threshold=1024, convert_attribute=False):
# type: (ModelProto, bool, Optional[Text], int, bool) -> None
"""
Call to set all tensors with raw data as external data. This call should preceed 'save_model'.
'save_model' saves all the tensors data as external data after calling this function.
Expand All @@ -112,16 +113,22 @@ def convert_model_to_external_data(model, all_tensors_to_one_file=True, location
If not specified, will use the model name.
size_threshold: Threshold for size of data. Only when tensor's data is >= the size_threshold
it will be converted to external data. To convert every tensor with raw data to external data set size_threshold=0.
convert_attribute: If true, convert all tensors to external data
If false, convert only non-attribute tensors to external data
"""
tensors = _get_initializer_tensors(model)
if convert_attribute:
tensors = _get_all_tensors(model)

if all_tensors_to_one_file:
file_name = Text(uuid.uuid1())
if location:
file_name = location
for tensor in _get_all_tensors(model):
for tensor in tensors:
if tensor.HasField("raw_data") and sys.getsizeof(tensor.raw_data) >= size_threshold:
set_external_data(tensor, file_name)
else:
for tensor in _get_all_tensors(model):
for tensor in tensors:
if tensor.HasField("raw_data") and sys.getsizeof(tensor.raw_data) >= size_threshold:
tensor_location = tensor.name
if not _is_valid_filename(tensor_location):
Expand Down
112 changes: 97 additions & 15 deletions onnx/test/test_external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,19 +203,36 @@ def create_test_model_proto(self): # type: () -> ModelProto
def test_check_model(self): # type: () -> None
checker.check_model(self.model)

def test_convert_model_to_from_one_file(self): # type: () -> None
def test_convert_model_to_external_data_with_size_threshold(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
external_data_file = str(uuid.uuid4())
convert_model_to_external_data(self.model, location=external_data_file, size_threshold=0)

convert_model_to_external_data(self.model, size_threshold=1024)
onnx.save_model(self.model, model_file_path)
self.assertTrue(Path.isfile(model_file_path))
self.assertTrue(Path.isfile(os.path.join(self.temp_dir, external_data_file)))

model = onnx.load_model(model_file_path)
initializer_tensor = model.graph.initializer[0]
self.assertFalse(initializer_tensor.HasField("data_location"))

def test_convert_model_to_external_data_without_size_threshold(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
convert_model_to_external_data(self.model, size_threshold=0)
onnx.save_model(self.model, model_file_path)

model = onnx.load_model(model_file_path)
initializer_tensor = model.graph.initializer[0]
self.assertTrue(initializer_tensor.HasField("data_location"))
self.assertTrue(np.allclose(to_array(initializer_tensor), self.initializer_value))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value))
def test_convert_model_to_external_data_from_one_file_with_location(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
external_data_file = str(uuid.uuid4())

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=True, location=external_data_file)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(os.path.join(self.temp_dir, external_data_file)))

model = onnx.load_model(model_file_path)

# test convert model from external data
convert_model_from_external_data(model)
Expand All @@ -228,36 +245,101 @@ def test_convert_model_to_from_one_file(self): # type: () -> None
self.assertTrue(np.allclose(to_array(initializer_tensor), self.initializer_value))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertFalse(len(initializer_tensor.external_data))
self.assertFalse(len(attribute_tensor.external_data))
self.assertEqual(attribute_tensor.data_location, TensorProto.DEFAULT)
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value))

def test_convert_model_to_external_data_one_file_per_tensor(self): # type: () -> None
def test_convert_model_to_external_data_from_one_file_without_location_uses_model_name(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
convert_model_to_external_data(self.model, all_tensors_to_one_file=False, size_threshold=0)

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=True)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(model_file_path))
self.assertTrue(Path.isfile(os.path.join(self.temp_dir, model_file_path)))

def test_convert_model_to_external_data_one_file_per_tensor_without_attribute(self): # type: () -> None
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False, convert_attribute=False)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(model_file_path))
self.assertTrue(Path.isfile(os.path.join(self.temp_dir, "input_value")))
self.assertFalse(Path.isfile(os.path.join(self.temp_dir, "attribute_value")))

def test_convert_model_to_external_data_one_file_per_tensor_with_attribute(self): # type: () -> None
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False, convert_attribute=True)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(model_file_path))
self.assertTrue(Path.isfile(os.path.join(self.temp_dir, "input_value")))
self.assertTrue(Path.isfile(os.path.join(self.temp_dir, "attribute_value")))

def test_convert_model_to_external_data_does_not_convert_attribute_values(self): # type: () -> None
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, convert_attribute=False, all_tensors_to_one_file=False)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(os.path.join(self.temp_dir, "input_value")))
self.assertFalse(Path.isfile(os.path.join(self.temp_dir, "attribute_value")))

model = onnx.load_model(model_file_path)
initializer_tensor = model.graph.initializer[0]
self.assertTrue(initializer_tensor.HasField("data_location"))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertFalse(attribute_tensor.HasField("data_location"))

def test_convert_model_to_external_data_converts_attribute_values(self): # type: () -> None
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, convert_attribute=True)
onnx.save_model(self.model, model_file_path)

model = onnx.load_model(model_file_path)

initializer_tensor = model.graph.initializer[0]
self.assertTrue(np.allclose(to_array(initializer_tensor), self.initializer_value))
self.assertTrue(initializer_tensor.HasField("data_location"))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value))
self.assertTrue(attribute_tensor.HasField("data_location"))

def test_convert_model_to_external_data_with_size_threshold(self): # type: () -> None
def test_save_model_does_not_convert_to_external_data_and_saves_the_model(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
convert_model_to_external_data(self.model, all_tensors_to_one_file=False, size_threshold=1024)
onnx.save_model(self.model, model_file_path)
onnx.save_model(self.model, model_file_path, save_as_external_data=False)
self.assertTrue(Path.isfile(model_file_path))
self.assertFalse(Path.isfile(os.path.join(self.temp_dir, "input_value")))
self.assertFalse(Path.isfile(os.path.join(self.temp_dir, "attribute_value")))

model = onnx.load_model(model_file_path)
initializer_tensor = model.graph.initializer[0]
self.assertFalse(initializer_tensor.HasField("data_location"))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertFalse(attribute_tensor.HasField("data_location"))

def test_save_model_does_convert_and_saves_the_model(self): # type: () -> None
model_file_path = self.get_temp_model_filename()
onnx.save_model(self.model,
model_file_path,
save_as_external_data=True,
all_tensors_to_one_file=True,
location=None,
size_threshold=0,
convert_attribute=False)

model = onnx.load_model(model_file_path)

initializer_tensor = model.graph.initializer[0]
self.assertTrue(initializer_tensor.HasField("data_location"))
self.assertTrue(np.allclose(to_array(initializer_tensor), self.initializer_value))

attribute_tensor = model.graph.node[0].attribute[0].t
self.assertFalse(attribute_tensor.HasField("data_location"))
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value))


Expand Down

0 comments on commit 522b000

Please sign in to comment.