From 0201e9515a6fd39b4de72823330f09006d001426 Mon Sep 17 00:00:00 2001 From: Connie Date: Mon, 18 Mar 2024 14:54:38 -0400 Subject: [PATCH] Created a script for running sent_debias --- run_pca.ipynb | 1072 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1072 insertions(+) create mode 100644 run_pca.ipynb diff --git a/run_pca.ipynb b/run_pca.ipynb new file mode 100644 index 0000000..4b11fea --- /dev/null +++ b/run_pca.ipynb @@ -0,0 +1,1072 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyMNbcpoAw08u8O3CGXAUYiP", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mIG6T67yZDjA", + "outputId": "f0d6589e-8052-4ad1-8c24-178587419dfe" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "fatal: destination path 'sent_debias' already exists and is not an empty directory.\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "github_token = \"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\"\n", + "!git clone 'https://:x-oauth-basic@github.com/conniec14/sent_debias.git'" + ] + }, + { + "cell_type": "code", + "source": [ + "!apt-get install git -y\n", + "%pip install torch\n", + "%pip install huggingface transformers\n", + "%pip install numpy==1.18.1\n", + "%pip install sklearn==0.20.0\n", + "%pip install matplotlib==3.1.2\n", + "# %pip install gensim==3.1.2\n", + "%pip install tqdm==4.45.0\n", + "%pip install regex==2.5.77\n", + "%pip install pattern3\n", + "%pip install boto3" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "k0ExRvRAZMSq", + "outputId": "f0233564-6113-4b54-b042-7db464fb5132" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "git is already the newest version (1:2.34.1-1ubuntu1.10).\n", + "0 upgraded, 0 newly installed, 0 to remove and 38 not upgraded.\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.2.1+cu121)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.10.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m45.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)\n", + " Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m60.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)\n", + " Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)\n", + " Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch)\n", + " Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch)\n", + " Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch)\n", + " Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch)\n", + " Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch)\n", + " Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch)\n", + " Downloading nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch)\n", + " Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.2.0)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", + "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", + "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.99 nvidia-nvtx-cu12-12.1.105\n", + "Collecting huggingface\n", + " Downloading huggingface-0.0.1-py3-none-any.whl (2.5 kB)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.38.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.10.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n", + "Installing collected packages: huggingface\n", + "Successfully installed huggingface-0.0.1\n", + "Collecting numpy==1.18.1\n", + " Downloading numpy-1.18.1.zip (5.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", + " \n", + " \u001b[31m×\u001b[0m \u001b[32mPreparing metadata \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mpyproject.toml\u001b[0m\u001b[1;32m)\u001b[0m did not run successfully.\n", + " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", + " \u001b[31m╰─>\u001b[0m See above for output.\n", + " \n", + " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25herror\n", + "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n", + "\n", + "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n", + "\u001b[31m╰─>\u001b[0m See above for output.\n", + "\n", + "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n", + "\u001b[1;36mhint\u001b[0m: See above for details.\n", + "\u001b[31mERROR: Could not find a version that satisfies the requirement sklearn==0.20.0 (from versions: 0.0, 0.0.post1, 0.0.post2, 0.0.post4, 0.0.post5, 0.0.post7, 0.0.post9, 0.0.post10, 0.0.post11, 0.0.post12)\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[31mERROR: No matching distribution found for sklearn==0.20.0\u001b[0m\u001b[31m\n", + "\u001b[0mCollecting matplotlib==3.1.2\n", + " Downloading matplotlib-3.1.2.tar.gz (40.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 MB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.1.2) (0.12.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.1.2) (1.4.5)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.1.2) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.1.2) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.1.2) (1.25.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.1->matplotlib==3.1.2) (1.16.0)\n", + "Building wheels for collected packages: matplotlib\n", + " Building wheel for matplotlib (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for matplotlib: filename=matplotlib-3.1.2-cp310-cp310-linux_x86_64.whl size=11756951 sha256=aa15e760cf1213d3a6d6013d0fd6c02db9fa3e0f8af19199e36a346ff1916d07\n", + " Stored in directory: /root/.cache/pip/wheels/63/e2/4a/2c345f62f51199d32ca891f2ae2d76cd305dc4f3e9a2c96b3c\n", + "Successfully built matplotlib\n", + "Installing collected packages: matplotlib\n", + " Attempting uninstall: matplotlib\n", + " Found existing installation: matplotlib 3.7.1\n", + " Uninstalling matplotlib-3.7.1:\n", + " Successfully uninstalled matplotlib-3.7.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "arviz 0.15.1 requires matplotlib>=3.2, but you have matplotlib 3.1.2 which is incompatible.\n", + "bigframes 0.24.0 requires matplotlib>=3.7.1, but you have matplotlib 3.1.2 which is incompatible.\n", + "mizani 0.9.3 requires matplotlib>=3.5.0, but you have matplotlib 3.1.2 which is incompatible.\n", + "plotnine 0.12.4 requires matplotlib>=3.6.0, but you have matplotlib 3.1.2 which is incompatible.\n", + "seaborn 0.13.1 requires matplotlib!=3.6.1,>=3.4, but you have matplotlib 3.1.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed matplotlib-3.1.2\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "matplotlib", + "mpl_toolkits" + ] + }, + "id": "6c05a385726d4e40aea4fb51a98642dc" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[31mERROR: Could not find a version that satisfies the requirement gensim==3.1.2 (from versions: 0.2, 0.3.0, 0.4, 0.4.1, 0.4.2, 0.4.3, 0.4.4, 0.4.5, 0.4.6, 0.4.7, 0.5.0, 0.6.0, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.7.4, 0.7.5, 0.7.6, 0.7.7, 0.7.8, 0.8.0, 0.8.1, 0.8.2, 0.8.3, 0.8.4, 0.8.5, 0.8.6, 0.8.7, 0.8.8, 0.8.9, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.10.2, 0.10.3, 0.11.1, 0.12.0, 0.12.1, 0.12.2, 0.12.3, 0.12.4, 0.13.0, 0.13.1, 0.13.2, 0.13.3, 0.13.4, 1.0.0, 1.0.1, 2.0.0, 2.1.0, 2.2.0, 2.3.0, 3.0.0, 3.1.0, 3.2.0, 3.3.0, 3.4.0, 3.5.0, 3.6.0, 3.7.0, 3.7.1, 3.7.2, 3.7.3, 3.8.0, 3.8.1, 3.8.2, 3.8.3, 4.0.0, 4.0.1, 4.1.0, 4.1.1, 4.1.2, 4.2.0, 4.3.0, 4.3.1, 4.3.2)\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[31mERROR: No matching distribution found for gensim==3.1.2\u001b[0m\u001b[31m\n", + "\u001b[0mCollecting tqdm==4.45.0\n", + " Downloading tqdm-4.45.0-py2.py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.9/60.9 kB\u001b[0m \u001b[31m972.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.66.2\n", + " Uninstalling tqdm-4.66.2:\n", + " Successfully uninstalled tqdm-4.66.2\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "panel 1.3.8 requires tqdm>=4.48.0, but you have tqdm 4.45.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.45.0\n", + "\u001b[31mERROR: Could not find a version that satisfies the requirement regex==2.5.77 (from versions: 2013-02-16, 2013-02-23, 2013-03-11, 2013-05-21, 2013-06-05, 2013-06-26, 2013-08-04, 2013-10-04, 2013-10-12, 2013-10-21, 2013-10-22, 2013-10-23, 2013-10-24, 2013-10-25, 2013-10-26, 2013-11-29, 2013-12-31, 0.1.20100217, 0.1.20100226, 0.1.20100305, 0.1.20100323, 0.1.20100331, 0.1.20100706, 0.1.20100706.1, 0.1.20100709, 0.1.20100709.1, 0.1.20100719, 0.1.20100725, 0.1.20100814, 0.1.20100816, 0.1.20100824, 0.1.20100912, 0.1.20100913, 0.1.20100918, 0.1.20101009, 0.1.20101029, 0.1.20101030b0, 0.1.20101030, 0.1.20101101, 0.1.20101102a0, 0.1.20101102, 0.1.20101106, 0.1.20101113, 0.1.20101120, 0.1.20101121, 0.1.20101123, 0.1.20101130, 0.1.20101207, 0.1.20101210, 0.1.20101224, 0.1.20101228a0, 0.1.20101228, 0.1.20101229, 0.1.20101230, 0.1.20101231, 0.1.20110104, 0.1.20110106, 0.1.20110124, 0.1.20110313, 0.1.20110314, 0.1.20110315, 0.1.20110429, 0.1.20110502, 0.1.20110504, 0.1.20110510, 0.1.20110514, 0.1.20110524, 0.1.20110608a0, 0.1.20110608, 0.1.20110609, 0.1.20110610, 0.1.20110616, 0.1.20110623a0, 0.1.20110623, 0.1.20110627, 0.1.20110702, 0.1.20110717, 0.1.20110917a0, 0.1.20110917, 0.1.20110922a0, 0.1.20110922, 0.1.20110927, 0.1.20110929, 0.1.20111004, 0.1.20111005, 0.1.20111006, 0.1.20111014, 0.1.20111103, 0.1.20111223, 0.1.20120103, 0.1.20120105, 0.1.20120112, 0.1.20120114, 0.1.20120115, 0.1.20120119, 0.1.20120122, 0.1.20120123, 0.1.20120126, 0.1.20120128, 0.1.20120129, 0.1.20120208, 0.1.20120209, 0.1.20120301, 0.1.20120303, 0.1.20120316, 0.1.20120317, 0.1.20120323, 0.1.20120416, 0.1.20120502, 0.1.20120503, 0.1.20120504, 0.1.20120506, 0.1.20120611, 0.1.20120613, 0.1.20120705, 0.1.20120708, 0.1.20120709, 0.1.20120710, 0.1.20120803, 0.1.20120825, 0.1.20120904, 0.1.20121008, 0.1.20121017, 0.1.20121031, 0.1.20121105, 0.1.20121113, 0.1.20121120, 0.1.20121216, 0.1.20130120, 0.1.20130124, 0.1.20130125, 2014.1.10, 2014.1.20, 2014.1.30, 2014.2.16, 2014.2.19, 2014.4.10, 2014.5.17, 2014.5.23, 2014.6.28, 2014.8.15, 2014.8.28, 2014.9.18, 2014.9.22, 2014.10.1, 2014.10.2, 2014.10.7, 2014.10.9, 2014.10.23, 2014.10.24, 2014.11.3, 2014.11.13, 2014.11.14, 2014.12.15, 2014.12.24, 2015.3.18, 2015.5.7, 2015.5.10, 2015.5.28, 2015.6.2, 2015.6.4, 2015.6.9, 2015.6.10, 2015.6.14, 2015.6.15, 2015.6.19, 2015.6.21, 2015.6.24, 2015.7.12, 2015.7.19, 2015.9.14, 2015.9.15, 2015.9.23, 2015.9.28, 2015.10.1, 2015.10.5, 2015.10.22, 2015.10.29, 2015.11.5b0, 2015.11.7, 2015.11.8, 2015.11.9, 2015.11.12, 2015.11.14, 2015.11.22, 2016.1.10, 2016.2.23, 2016.2.24, 2016.2.25, 2016.3.2, 2016.3.24, 2016.3.26, 2016.3.31, 2016.4.1, 2016.4.2, 2016.4.3, 2016.4.8, 2016.4.15, 2016.4.25, 2016.5.13, 2016.5.14, 2016.5.15, 2016.5.23, 2016.6.2, 2016.6.5, 2016.6.14, 2016.6.19, 2016.6.24, 2016.7.14, 2016.7.21, 2016.8.27, 2016.9.22, 2016.10.22, 2016.11.18, 2016.11.21, 2016.12.27, 2017.1.12, 2017.1.14, 2017.1.17, 2017.2.8, 2017.4.5, 2017.4.23, 2017.4.29, 2017.5.26, 2017.6.7, 2017.6.20, 2017.6.23, 2017.7.11, 2017.7.26, 2017.7.28, 2017.9.23, 2017.11.8, 2017.11.9, 2017.12.5, 2017.12.9, 2017.12.12, 2018.1.10, 2018.2.3, 2018.2.8, 2018.2.21, 2018.6.6, 2018.6.9, 2018.6.20, 2018.6.21, 2018.7.11, 2018.8.17, 2018.8.29, 2018.11.2, 2018.11.3, 2018.11.6, 2018.11.7, 2018.11.22, 2019.1.23, 2019.1.24, 2019.2.3, 2019.2.5, 2019.2.6, 2019.2.7, 2019.2.18, 2019.2.19, 2019.2.20, 2019.2.21, 2019.3.8, 2019.3.9, 2019.3.12, 2019.4.9, 2019.4.10, 2019.4.12, 2019.4.14, 2019.5.25, 2019.6.2, 2019.6.5, 2019.6.8, 2019.8.19, 2019.11.1, 2019.12.9, 2019.12.17, 2019.12.18, 2019.12.19, 2019.12.20, 2020.1.7, 2020.1.8, 2020.2.18, 2020.2.20, 2020.4.4, 2020.5.7, 2020.5.13, 2020.5.14, 2020.6.7, 2020.6.8, 2020.7.14, 2020.9.27, 2020.10.11, 2020.10.15, 2020.10.22, 2020.10.23, 2020.10.28, 2020.11.11, 2020.11.13, 2021.3.17, 2021.4.4, 2021.7.1, 2021.7.5, 2021.7.6, 2021.8.3, 2021.8.21, 2021.8.27, 2021.8.28, 2021.9.24, 2021.9.30, 2021.10.8, 2021.10.21, 2021.10.23, 2021.11.1, 2021.11.2, 2021.11.9, 2021.11.10, 2022.1.18, 2022.3.2, 2022.3.15, 2022.4.24, 2022.6.2, 2022.7.9, 2022.7.24, 2022.7.25, 2022.8.17, 2022.9.11, 2022.9.13, 2022.10.31, 2023.3.22, 2023.3.23, 2023.5.2, 2023.5.4, 2023.5.5, 2023.6.3, 2023.8.8, 2023.10.3, 2023.12.25)\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[31mERROR: No matching distribution found for regex==2.5.77\u001b[0m\u001b[31m\n", + "\u001b[0mCollecting pattern3\n", + " Downloading pattern3-3.0.0.tar.gz (23.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from pattern3) (4.12.3)\n", + "Collecting cherrypy (from pattern3)\n", + " Downloading CherryPy-18.9.0-py3-none-any.whl (348 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m348.8/348.8 kB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting docx (from pattern3)\n", + " Downloading docx-0.2.4.tar.gz (54 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting feedparser (from pattern3)\n", + " Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.3/81.3 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pdfminer3k (from pattern3)\n", + " Downloading pdfminer3k-1.3.4-py3-none-any.whl (100 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.8/100.8 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting simplejson (from pattern3)\n", + " Downloading simplejson-3.19.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (137 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.9/137.9 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pdfminer.six (from pattern3)\n", + " Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->pattern3) (2.5)\n", + "Collecting cheroot>=8.2.1 (from cherrypy->pattern3)\n", + " Downloading cheroot-10.0.0-py3-none-any.whl (101 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.6/101.6 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting portend>=2.1.1 (from cherrypy->pattern3)\n", + " Downloading portend-3.2.0-py3-none-any.whl (5.3 kB)\n", + "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from cherrypy->pattern3) (10.1.0)\n", + "Collecting zc.lockfile (from cherrypy->pattern3)\n", + " Downloading zc.lockfile-3.0.post1-py3-none-any.whl (9.8 kB)\n", + "Collecting jaraco.collections (from cherrypy->pattern3)\n", + " Downloading jaraco.collections-5.0.0-py3-none-any.whl (10 kB)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from docx->pattern3) (4.9.4)\n", + "Requirement already satisfied: Pillow>=2.0 in /usr/local/lib/python3.10/dist-packages (from docx->pattern3) (9.4.0)\n", + "Collecting sgmllib3k (from feedparser->pattern3)\n", + " Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->pattern3) (3.3.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->pattern3) (42.0.5)\n", + "Collecting ply (from pdfminer3k->pattern3)\n", + " Downloading ply-3.11-py2.py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.6/49.6 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting jaraco.functools (from cheroot>=8.2.1->cherrypy->pattern3)\n", + " Downloading jaraco.functools-4.0.0-py3-none-any.whl (9.8 kB)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->pattern3) (1.16.0)\n", + "Collecting tempora>=1.8 (from portend>=2.1.1->cherrypy->pattern3)\n", + " Downloading tempora-5.5.1-py3-none-any.whl (13 kB)\n", + "Collecting jaraco.text (from jaraco.collections->cherrypy->pattern3)\n", + " Downloading jaraco.text-3.12.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from zc.lockfile->cherrypy->pattern3) (67.7.2)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->pattern3) (2.21)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from tempora>=1.8->portend>=2.1.1->cherrypy->pattern3) (2023.4)\n", + "Collecting jaraco.context>=4.1 (from jaraco.text->jaraco.collections->cherrypy->pattern3)\n", + " Downloading jaraco.context-4.3.0-py3-none-any.whl (5.3 kB)\n", + "Collecting autocommand (from jaraco.text->jaraco.collections->cherrypy->pattern3)\n", + " Downloading autocommand-2.2.2-py3-none-any.whl (19 kB)\n", + "Requirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from jaraco.text->jaraco.collections->cherrypy->pattern3) (7.0.0)\n", + "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->jaraco.text->jaraco.collections->cherrypy->pattern3) (2.6.4)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from inflect->jaraco.text->jaraco.collections->cherrypy->pattern3) (4.10.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->jaraco.text->jaraco.collections->cherrypy->pattern3) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->jaraco.text->jaraco.collections->cherrypy->pattern3) (2.16.3)\n", + "Building wheels for collected packages: pattern3, docx, sgmllib3k\n", + " Building wheel for pattern3 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pattern3: filename=pattern3-3.0.0-py2.py3-none-any.whl size=18554334 sha256=76e184fd29e6d304db30fa9b7da4c64c2858bd7c2d3d65996c9801fc6c3b0737\n", + " Stored in directory: /root/.cache/pip/wheels/c2/e6/76/468d5343ebe1d64778175e1d6147a2ee1244c58090410f0c71\n", + " Building wheel for docx (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=65607672136d651ffd4b9589eb4e413c442e8138d37ced639e6b3ce3d92d8038\n", + " Stored in directory: /root/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767\n", + " Building wheel for sgmllib3k (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6049 sha256=9cb14f057e95e09cd37e4b10efca1fa9a8c6f5fad78bd10ebd4ea81849aa94b5\n", + " Stored in directory: /root/.cache/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5\n", + "Successfully built pattern3 docx sgmllib3k\n", + "Installing collected packages: sgmllib3k, ply, zc.lockfile, simplejson, pdfminer3k, jaraco.functools, jaraco.context, feedparser, docx, autocommand, tempora, cheroot, portend, pdfminer.six, jaraco.text, jaraco.collections, cherrypy, pattern3\n", + "Successfully installed autocommand-2.2.2 cheroot-10.0.0 cherrypy-18.9.0 docx-0.2.4 feedparser-6.0.11 jaraco.collections-5.0.0 jaraco.context-4.3.0 jaraco.functools-4.0.0 jaraco.text-3.12.0 pattern3-3.0.0 pdfminer.six-20231228 pdfminer3k-1.3.4 ply-3.11 portend-3.2.0 sgmllib3k-1.0.0 simplejson-3.19.2 tempora-5.5.1 zc.lockfile-3.0.post1\n", + "Collecting boto3\n", + " Downloading boto3-1.34.64-py3-none-any.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting botocore<1.35.0,>=1.34.64 (from boto3)\n", + " Downloading botocore-1.34.64-py3-none-any.whl (12.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)\n", + " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", + "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)\n", + " Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.64->boto3) (2.8.2)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.64->boto3) (2.0.7)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.35.0,>=1.34.64->boto3) (1.16.0)\n", + "Installing collected packages: jmespath, botocore, s3transfer, boto3\n", + "Successfully installed boto3-1.34.64 botocore-1.34.64 jmespath-1.0.1 s3transfer-0.10.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "! pip install pyflame\n", + "%load_ext pyflame\n", + "c.PyFlameMagic.flamegraph_script_path = '/content/sent_debias/debias-BERT/experiments/'" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5SUSTo8zmHy1", + "outputId": "9afaf01f-b03a-4e34-c0d5-3ef70cd68e93" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting pyflame\n", + " Downloading pyflame-0.3.1-py3-none-any.whl (11 kB)\n", + "Installing collected packages: pyflame\n", + "Successfully installed pyflame-0.3.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%config PyFlameMagic.flamegraph_script_path = '/content/sent_debias/debias-BERT/experiments/flamegraph.pl'" + ], + "metadata": { + "id": "ZLmBQkz2x0BK" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "! sed -i '36s/.*/ pass/' /usr/local/lib/python3.10/dist-packages/pattern3/text/tree.py\n", + "# !unzip /content/sent_debias/debias-BERT/experiments/glue_data.zip /content/sent_debias/debias-BERT/experiments/" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7N4_hh4LZP1y", + "outputId": "d90d00e1-9bb1-4d5f-b5ef-b93e0b41fd1e" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "sed: can't read /usr/local/lib/python3.10/dist-packages/pattern3/text/tree.pyn: No such file or directory\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "task_name = \"SST-2\"\n", + "#\"cola\"\n", + "#\"sst-2\"\n", + "#\"qnli\"\n", + "\n", + "glue_dir = \"\"\n", + "dir = \"/content/sent_debias/debias-BERT/experiments\"" + ], + "metadata": { + "id": "PQP4Jz47ZoDK" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "%%pyflame\n", + "run = f\"\"\"python {dir}/run_classifier.py\\\n", + " --data_dir {dir}/glue_data/{task_name} --task_name {task_name}\\\n", + " --output_dir bert-base-uncased\\\n", + " --debias --do_eval\\\n", + " --do_lower_case\\\n", + " --normalize\"\"\"\n", + "\n", + "!{run}" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "TveeAWOMZswp", + "outputId": "5dafd9d3-049e-4028-b918-54236b1e5856" + }, + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "output_dir=bert-base-uncased\n", + "03/18/2024 18:49:15 - INFO - __main__ - device: cuda n_gpu: 1, distributed training: False, 16-bits training: False\n", + "Calling from_pretrained.\n", + "03/18/2024 18:49:15 - INFO - pytorch_pretrained_bert.modeling - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n", + "03/18/2024 18:49:15 - INFO - pytorch_pretrained_bert.modeling - extracting archive file /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmprufqf73a\n", + "03/18/2024 18:49:19 - INFO - pytorch_pretrained_bert.modeling - Model config {\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"max_position_embeddings\": 512,\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"type_vocab_size\": 2,\n", + " \"vocab_size\": 30522\n", + "}\n", + "\n", + "03/18/2024 18:49:21 - INFO - pytorch_pretrained_bert.modeling - Normalize=True\n", + "03/18/2024 18:49:21 - INFO - pytorch_pretrained_bert.modeling - Fine-tune bert=False\n", + "03/18/2024 18:49:21 - INFO - pytorch_pretrained_bert.modeling - dp=0.1 hidden size=768\n", + "03/18/2024 18:49:22 - INFO - pytorch_pretrained_bert.modeling - Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']\n", + "03/18/2024 18:49:22 - INFO - pytorch_pretrained_bert.modeling - Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n", + "03/18/2024 18:49:22 - INFO - pytorch_pretrained_bert.tokenization - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084\n", + "Get data from ['reddit', 'sst', 'wikitext', 'pom', 'meld', 'news_200']\n", + "reddit.txt 9\n", + "reddit has 1167 pairs of templates\n", + "sst has 1511 pairs of templates\n", + "wikitext.txt 9\n", + "wikitext has 13750 pairs of templates\n", + "pom has 2808 pairs of templates\n", + "meld.txt 9\n", + "meld has 1202 pairs of templates\n", + "news_200.txt 9\n", + "news_200 has 162622 pairs of templates\n", + "183060 pairs of templates in total\n", + " 0% 0/183060 [00:00\n", + " main()\n", + " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 990, in main\n", + " gender_dir_tuned = compute_gender_dir(device, tokenizer, bert_encoder, def_pairs, args.max_seq_length, k=args.num_dimension, load=False, task=args.task_name)\n", + " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 349, in compute_gender_dir\n", + " all_embeddings = extract_embeddings_pair(bert_encoder, tokenizer, def_examples, max_seq_length, device, load, task, \n", + " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 294, in extract_embeddings_pair\n", + " embeddings_b = bert_encoder.encode(input_ids=inputs_b, token_type_ids=segments_b, attention_mask=mask_b, word_level=False)\n", + " File \"/content/sent_debias/debias-BERT/experiments/run_classifier.py\", line 225, in encode\n", + " embeddings = self.bert(input_ids, token_type_ids=token_type_ids, \n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 1004, in forward\n", + " encoded_layers, pooled_output = self.bert(input_ids, token_type_ids=token_type_ids, \n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 732, in forward\n", + " encoded_layers = self.encoder(embedding_output,\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 406, in forward\n", + " hidden_states = layer_module(hidden_states, attention_mask)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 392, in forward\n", + " intermediate_output = self.intermediate(attention_output)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 365, in forward\n", + " hidden_states = self.intermediate_act_fn(hidden_states)\n", + " File \"/content/sent_debias/debias-BERT/experiments/../pytorch_pretrained_bert/modeling.py\", line 124, in gelu\n", + " return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\n", + "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 27.06 MiB is free. Process 263168 has 14.72 GiB memory in use. Of the allocated memory 14.53 GiB is allocated by PyTorch, and 68.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "