Skip to content

Commit

Permalink
Rename generation -> extraction everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
norabelrose committed Feb 9, 2023
1 parent e87da87 commit 334ee45
Show file tree
Hide file tree
Showing 13 changed files with 17 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"name": "Python: Generation Cuda",
"type": "python",
"request": "launch",
"module": "elk.generation_main",
"module": "elk.extraction_main",
"console": "integratedTerminal",
"justMyCode": true,
"args" : [
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ Furthermore:
1. To generate the hidden states for one model `mdl` and all datasets, `cd elk` and then run

```bash
python generation_main.py --model deberta-v2-xxlarge-mnli --datasets imdb --prefix normal --device cuda --num-data 1000
python extraction_main.py --model deberta-v2-xxlarge-mnli --datasets imdb --prefix normal --device cuda --num-data 1000
```

To test `deberta-v2-xxlarge-mnli` with the misleading prefix, and only the `imdb` and `amazon-polarity` datasets, while printing extra information, run:

The name of prefix can be found in `./utils_generation/construct_prompts.py`. This command will save hidden states to `generation_results` and will save zero-shot accuracy to `generation_results/generation_results.csv`.
The name of prefix can be found in `./extraction/construct_prompts.py`. This command will save hidden states to `extraction_results` and will save zero-shot accuracy to `extraction_results/extraction_results.csv`.

1. To train a ccs model and a logistic regression model

Expand Down
2 changes: 1 addition & 1 deletion elk/default_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"dadisincorrect",
"teachernoimitate"
],
"models-layer-num": {
"models_layer_num": {
"t5-11b": 25,
"unifiedqa-t5-11b": 25,
"T0pp": 25,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,12 @@ def __init__(self, set_name):
self.module = None
else:
self.nomodule = False
from utils_generation.load_utils import getLoadName
from extraction.load_utils import getLoadName

self.module = DatasetTemplates(*getLoadName(set_name))

def getGlobalPromptsNum(set_name_list):
from utils_generation.load_utils import getLoadName
from extraction.load_utils import getLoadName

res = []
for set_name in set_name_list:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def create_hiddenstates(model, tokenizer, name_to_dataframe, args):
"""
with torch.no_grad():
for name, dataframe in name_to_dataframe.items():
# This part corresponds to hidden states generation
# This part corresponds to hidden states extraction
hidden_states = calculate_hidden_state(
args, model, tokenizer, dataframe, args.model
)
Expand Down
File renamed without changes.
10 changes: 5 additions & 5 deletions elk/utils_generation/parser.py → elk/extraction/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ def get_args():

assert args.model in models, NotImplementedError(
"You use model {}, but it's not . For any new model, please make sure you"
" implement the code in `load_utils` and `generation`, and then it in"
" implement the code in `load_utils` and `extraction`, and then it in"
" `parser.py`".format(args.model)
)

for prefix in args.prefix:
assert prefix in prefix, NotImplementedError(
"Invalid prefix name {}. Please check your prefix name. To add new prefix,"
" please mofidy `utils_generation/prompts.json` \
" please mofidy `extraction/prompts.json` \
and new prefix in {}.json.".format(
prefix, default_config_path
)
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_parser():
default=["normal"],
help=(
"The name of prefix added before the question. normal means no index. You"
" can go to `utils_generation/prompts.json` to add new prompt."
" can go to `extraction/prompts.json` to add new prompt."
),
)
parser.add_argument(
Expand Down Expand Up @@ -187,7 +187,7 @@ def get_parser():
help="The indices of prompt you want to use.",
)

# generation & zero-shot accuracy calculation
# extraction & zero-shot accuracy calculation
parser.add_argument(
"--cal-zeroshot",
type=int,
Expand Down Expand Up @@ -252,7 +252,7 @@ def get_parser():
parser.add_argument(
"--save-base-dir",
type=Path,
default="generation_results",
default="extraction_results",
help="The base dir where you want to save the directories of hidden states.",
)
parser.add_argument(
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions elk/generation_main.py → elk/extraction_main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import time
from elk.utils_generation.parser import get_args
from elk.utils_generation.load_utils import (
from elk.extraction.parser import get_args
from elk.extraction.load_utils import (
load_model,
put_model_on_device,
load_tokenizer,
load_datasets,
)
from elk.utils_generation.generation import create_records, create_hiddenstates
from elk.extraction.extraction import create_records, create_hiddenstates
from tqdm import tqdm

if __name__ == "__main__":
Expand Down
5 changes: 0 additions & 5 deletions elk/generate.sh

This file was deleted.

2 changes: 1 addition & 1 deletion elk/tests/utils_generation/test_gen_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from elk.utils_generation.parser import get_parser
from elk.extraction.parser import get_parser


def test_args_no_underscores():
Expand Down
2 changes: 1 addition & 1 deletion elk/utils_evaluation/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_parser(datasets, models, prefix):
parser.add_argument(
"--hidden-states-directory",
type=Path,
default="generation_results",
default="extraction_results",
help="Where the hidden states and zero-shot accuracy are loaded.",
)
parser.add_argument("--language-model-type", type=str, default="encoder")
Expand Down

0 comments on commit 334ee45

Please sign in to comment.