Remove amazon polarity and chess; fix ethics prompts

EleutherAI · May 13, 2024 · dc4d8ef · dc4d8ef
1 parent caaa608
commit dc4d8ef
Showing 1 changed file with 8 additions and 36 deletions.
diff --git a/w2s/ds_registry.py b/w2s/ds_registry.py
@@ -103,7 +103,10 @@ def load_and_process_dataset(
  ds = ds.map(
  lambda ex: {
  "id": hashlib.sha1(ex["txt"].encode()).hexdigest()[:8],
- "soft_label": [1 - float(ex["hard_label"]), float(ex["hard_label"])],
+ "soft_label": [
+ 1 - float(ex["hard_label"]),
+ float(ex["hard_label"]),
+ ],
  }
  )
  results[split] = ds
@@ -264,10 +267,10 @@ def format_dream(ex, rng):
 
 
 def format_ethics_deontology(ex, rng):
- ans = "yes" if ex["label"] else "no"
+ "yes" if ex["label"] else "no"
  txt = (
  "Q: Is the following a morally valid"
- f" excuse?\n\n{ex['scenario']}\n\n{ex['excuse']}\n\nA: {ans}"
+ f" excuse?\n\n{ex['scenario']}\n\n{ex['excuse']}\n\nA:"
  )
  return dict(txt=txt, hard_label=ex["label"])
 
@@ -282,8 +285,7 @@ def format_ethics_deontology(ex, rng):
 
 
 def format_ethics_justice(ex, rng):
- ans = "yes" if ex["label"] else "no"
- txt = f"Q: Does this statement exemplify justice?\n\n{ex['scenario']}\n\nA: {ans}"
+ txt = f"Q: Does this statement exemplify justice?\n\n{ex['scenario']}\n\nA:"
  return dict(txt=txt, hard_label=ex["label"])
 
 
@@ -297,10 +299,9 @@ def format_ethics_justice(ex, rng):
 
 
 def format_ethics_virtue(ex, rng):
- ans = "yes" if ex["label"] else "no"
  txt = (
  "Q: Does this behavior match the adjective that"
- f" follows?\n\n{ex['scenario']}\n\nA: {ans}"
+ f" follows?\n\n{ex['scenario']}\n\nA:"
  )
  return dict(txt=txt, hard_label=ex["label"])
 
@@ -336,22 +337,6 @@ def format_ethics_utilitarianism(ex, rng):
 )
 
 
-LICHESS_N_TEST = 5000
-
-def format_lichess(ex, rng):
- return dict(ctx=ex['ctx'], target=ex['target'])
-
-
-register_dataset(
- "lichess",
- DatasetConfig(
- loader=hf_loader("EleutherAI/lichess-puzzles", n_test=LICHESS_N_TEST), # type: ignore
- formatter=format_lichess, # type: ignore
- task="generate",
- ),
-)
-
-
 def format_mc_taco(ex, rng):
  template = "{sentence}\n\nGiven the above, {question} Is the answer {answer}?"
  return dict(txt=template.format(**ex), hard_label=ex["label"])
@@ -368,19 +353,6 @@ def format_mc_taco(ex, rng):
 )
 
 
-def format_amazon_polarity(ex, rng):
- return dict(txt=f"{ex['title']} {ex['content']}", hard_label=ex["label"])
-
-
-register_dataset(
- "amazon_polarity",
- DatasetConfig(
- loader=hf_loader("amazon_polarity"), # type: ignore
- formatter=format_amazon_polarity, # type: ignore
- ),
-)
-
-
 def format_hellaswag(ex, rng):
  hard_label = int(rng.random() < 0.5)
  if hard_label: