Skip to content

Commit

Permalink
Upgrade transformers version to 4.18.0 (#2514)
Browse files Browse the repository at this point in the history
* Upgrade transformers version to 4.18.0

* Adapt tokenization test to upgrade

* Adapt tokenization test to upgrade
  • Loading branch information
bogdankostic authored May 6, 2022
1 parent caf1336 commit bce8457
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ install_requires =
torch>1.9,<1.11
requests
pydantic
transformers==4.13.0
transformers==4.18.0
nltk
pandas

Expand Down
4 changes: 1 addition & 3 deletions test/test_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def test_all_tokenizer_on_special_cases(caplog):
"This is a sentence with multiple tabs",
]

expected_to_fail = [(1, 1), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (2, 1), (2, 5)]
expected_to_fail = {(2, 1), (2, 5)}

for i_tok, tokenizer in enumerate(tokenizers):
for i_text, text in enumerate(texts):
Expand Down Expand Up @@ -299,8 +299,6 @@ def test_all_tokenizer_on_special_cases(caplog):
for ((start, end), w_index) in zip(encoded.offsets, encoded.words):
word_start_ch = word_spans[w_index][0]
token_offsets.append((start + word_start_ch, end + word_start_ch))
if getattr(tokenizer, "add_prefix_space", None):
token_offsets = [(start - 1, end) for start, end in token_offsets]

# verify that offsets align back to original text
if text == "力加勝北区ᴵᴺᵀᵃছজটডণত":
Expand Down

0 comments on commit bce8457

Please sign in to comment.