Skip to content

Commit

Permalink
receive context length via commandline. It's more convenient than dig…
Browse files Browse the repository at this point in the history
…ging into the config file every time.
  • Loading branch information
honglu2875 committed Nov 10, 2023
1 parent 8bbc14b commit 95f492d
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions aria/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,21 @@ def _parse_tokenized_dataset_args():
argp.add_argument("load_path", help="path midi_dict dataset")
argp.add_argument("save_path", help="path to save dataset")
argp.add_argument("-s", help="also produce shuffled", action="store_true")
argp.add_argument("-l", help="max sequence length", type=int, default=2048)

return argp.parse_args(sys.argv[2:])


def build_tokenized_dataset(args):
from aria.tokenizer import TokenizerLazy
from aria.data.datasets import TokenizedDataset
from aria.config import load_config

config = load_config()["data"]["dataset_gen_args"]
tokenizer = TokenizerLazy()
dataset = TokenizedDataset.build(
tokenizer=tokenizer,
save_path=args.save_path,
midi_dataset_path=args.load_path,
max_seq_len=config["max_seq_len"],
max_seq_len=args.l,
overwrite=True,
)
if args.s:
Expand Down

0 comments on commit 95f492d

Please sign in to comment.