-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* MidiDataset can initialize with an iterator and only expand when necessary. * reduce some memory overhead (we are starting to have >100k MidiDict and may get more in the future) * classmethod+property is better... * remove functools import * use separate workers to build dataset instead of process pool * add jsonl.zst support; unit test; fix bug * receive context length via commandline. It's more convenient than digging into the config file every time. * fix a minor output format mismatch when grad_checkpoint is true * format and small changes --------- Co-authored-by: Louis Bradshaw <[email protected]>
- Loading branch information
1 parent
e9d82c3
commit 4cd90fc
Showing
7 changed files
with
152 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import builtins | ||
import contextlib | ||
import io | ||
import zstandard | ||
import jsonlines | ||
import json | ||
|
||
|
||
class Reader: | ||
"""Reader for the jsonl.zst format.""" | ||
|
||
def __init__(self, path: str): | ||
"""Initializes the reader. | ||
Args: | ||
path (str): Path to the file. | ||
""" | ||
self.path = path | ||
|
||
def __iter__(self): | ||
with builtins.open(self.path, "rb") as fh: | ||
cctx = zstandard.ZstdDecompressor() | ||
reader = io.BufferedReader(cctx.stream_reader(fh)) | ||
yield from jsonlines.Reader(reader) | ||
|
||
|
||
class Writer: | ||
"""Writer for the jsonl.zst format.""" | ||
|
||
def __init__(self, path: str): | ||
"""Initializes the writer. | ||
Args: | ||
path (str): Path to the file. | ||
""" | ||
self.path = path | ||
|
||
def __enter__(self): | ||
self.fh = builtins.open(self.path, "wb") | ||
self.cctx = zstandard.ZstdCompressor() | ||
self.compressor = self.cctx.stream_writer(self.fh) | ||
return self | ||
|
||
def write(self, obj): | ||
self.compressor.write(json.dumps(obj).encode("UTF-8") + b"\n") | ||
|
||
def __exit__(self, exc_type, exc_value, traceback): | ||
self.compressor.flush(zstandard.FLUSH_FRAME) | ||
self.fh.flush() | ||
self.compressor.close() | ||
self.fh.close() | ||
|
||
|
||
@contextlib.contextmanager | ||
def open(path: str, mode: str = "r"): | ||
"""Read/Write a jsonl.zst file. | ||
Args: | ||
path (str): Path to the file. | ||
mode (str): Mode to open the file in. Only 'r' and 'w' are supported. | ||
Returns: | ||
Reader or Writer: Reader if mode is 'r', Writer if mode is 'w'. | ||
""" | ||
if mode == "r": | ||
yield Reader(path) | ||
elif mode == "w": | ||
with Writer(path) as writer: | ||
yield writer | ||
else: | ||
raise ValueError(f"Unsupported mode '{mode}'") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters