Skip to content

Commit

Permalink
Merge pull request #294 from EleutherAI/multiprocessing-duplication
Browse files Browse the repository at this point in the history
inference server duplication check
  • Loading branch information
AlexTMallen committed Nov 2, 2023
2 parents 937e71d + 6b2798c commit 84e99a3
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions elk/extraction/inference_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,18 @@ def imap(
q.put((closure_pkl, model_kwargs_pkl, shard))

generator = round_robin(self._result_queues) # type: ignore[arg-type]
seen_dummy = False
seen_ids = set()
for out in tqdm(generator, total=len(dataset), disable=not use_tqdm):
if out[0] == dummy_id:
if seen_dummy:
if dummy_id in seen_ids:
continue # ignore any extra dummy rows
else:
seen_dummy = True
elif out[0] in seen_ids:
raise RuntimeError(
"Round robin yielded duplicate items. "
"This may be due to multiprocessing queues returning "
"items repeatedly."
)
seen_ids.add(out[0])
yield out


Expand Down

0 comments on commit 84e99a3

Please sign in to comment.