Created using Colaboratory

pliang279 · Mar 10, 2024 · 93611dd · 93611dd
1 parent a93f8d1
commit 93611dd
Showing 1 changed file with 45 additions and 8 deletions.
diff --git a/run_pretraining.ipynb b/run_pretraining.ipynb
@@ -14,7 +14,9 @@
  "cell_type": "code",
  "source": [
  "import os\n",
- "\n"
+ "\n",
+ "github_token = \"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\"\n",
+ "!git clone 'https://<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>:[email protected]/conniec14/sent_debias.git'"
  ],
  "metadata": {
  "colab": {
@@ -237,7 +239,7 @@
  "run = f\"\"\"python {dir}/run_classifier.py\\\n",
  " --data_dir {dir}/glue_data/{task_name} --task_name {task_name}\\\n",
  " --output_dir /content/results/reproduce_debias\\\n",
- " --debias --do_eval\\\n",
+ " --debias --do_train --do_eval\\\n",
  " --do_lower_case\\\n",
  " --normalize --tune_bert\"\"\"\n",
  "\n",
@@ -248,22 +250,57 @@
  "base_uri": "https://localhost:8080/"
  },
  "id": "gkZBKC2-1Gzu",
- "outputId": "02ef9567-a55d-4f00-d5e7-9c8c11daac5a"
+ "outputId": "6650a5fe-10ec-4686-889d-51ceb4f378ec"
  },
- "execution_count": 31,
+ "execution_count": 33,
  "outputs": [
  {
  "output_type": "stream",
  "name": "stdout",
  "text": [
  "output_dir=/content/results/reproduce_debias\n",
- "03/10/2024 20:10:34 - INFO - __main__ - device: cpu n_gpu: 0, distributed training: False, 16-bits training: False\n",
+ "03/10/2024 20:12:36 - INFO - __main__ - device: cpu n_gpu: 0, distributed training: False, 16-bits training: False\n",
+ "03/10/2024 20:12:37 - INFO - pytorch_pretrained_bert.tokenization - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084\n",
+ "03/10/2024 20:12:37 - INFO - __main__ - Initialize model with bert-base-uncased\n",
+ "Calling from_pretrained.\n",
+ "03/10/2024 20:12:37 - INFO - pytorch_pretrained_bert.modeling - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n",
+ "03/10/2024 20:12:37 - INFO - pytorch_pretrained_bert.modeling - extracting archive file /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmpvjxcba48\n",
+ "03/10/2024 20:12:44 - INFO - pytorch_pretrained_bert.modeling - Model config {\n",
+ " \"attention_probs_dropout_prob\": 0.1,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout_prob\": 0.1,\n",
+ " \"hidden_size\": 768,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 3072,\n",
+ " \"max_position_embeddings\": 512,\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 12,\n",
+ " \"type_vocab_size\": 2,\n",
+ " \"vocab_size\": 30522\n",
+ "}\n",
+ "\n",
+ "03/10/2024 20:12:48 - INFO - pytorch_pretrained_bert.modeling - Normalize=True\n",
+ "03/10/2024 20:12:48 - INFO - pytorch_pretrained_bert.modeling - Fine-tune bert=True\n",
+ "03/10/2024 20:12:48 - INFO - pytorch_pretrained_bert.modeling - dp=0.1 hidden size=768\n",
+ "03/10/2024 20:12:49 - INFO - pytorch_pretrained_bert.modeling - Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']\n",
+ "03/10/2024 20:12:49 - INFO - pytorch_pretrained_bert.modeling - Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+ "Get data from ['reddit', 'sst', 'wikitext', 'pom', 'meld', 'news_200']\n",
  "Traceback (most recent call last):\n",
  " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 1063, in <module>\n",
  " main()\n",
- " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 860, in main\n",
- " raise ValueError(\"At least one of `do_train` or `do_eval` must be True.\")\n",
- "ValueError: At least one of `do_train` or `do_eval` must be True.\n"
+ " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 891, in main\n",
+ " model, tokenizer, gender_dir_pretrained = prepare_model_and_bias(args, device, num_labels, cache_dir)\t\n",
+ " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 741, in prepare_model_and_bias\n",
+ " def_pairs = get_def_pairs(args.def_pairs_name)\n",
+ " File \"/content/sent_debias/debias-BERT/experiments/def_sent_utils.py\", line 299, in get_def_pairs\n",
+ " return get_all()\n",
+ " File \"/content/sent_debias/debias-BERT/experiments/def_sent_utils.py\", line 285, in get_all\n",
+ " bucket = get_single_domain(domain)\n",
+ " File \"/content/sent_debias/debias-BERT/experiments/def_sent_utils.py\", line 277, in get_single_domain\n",
+ " gender, race = get_rest(\"{}.txt\".format(domain))\n",
+ " File \"/content/sent_debias/debias-BERT/experiments/def_sent_utils.py\", line 104, in get_rest\n",
+ " f = open(os.path.join(DIRECTORY, filename), 'r')\n",
+ "FileNotFoundError: [Errno 2] No such file or directory: '../text_corpus/reddit.txt'\n"
  ]
  }
  ]