This is an implementation of our arXiv preprint paper (https://arxiv.org/abs/2101.00036) "KART: Privacy Leakage Framework of Language Models Pre-trained with Clinical Records."
- Python 3.6.4
- Make sure that
$HOME
is set to environment variable$PYTHONPATH
.
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py > ~/get-poetry.py
cd ~
python get-poetry.py --version 1.1.4
poetry config virtualenvs.in-project true
cd ~
git clone [email protected]:yutanakamura-tky/kart.git
cd ~/kart
poetry install
cd ~/kart/src
bash make_mimic_iii_dummy_phi.sh
cd ~/kart/src
bash get_google_bert_model.sh
cd ~/kart/src
bash make_pretraining_data.sh
cd ~/kart/src
bash pretrain_bert_from_scratch.sh
cd ~/kart/src
bash pretrain_bert_from_bert_base_uncased.sh
Please cite our arXiv paper:
@misc{kart,
Author = {Yuta Nakamura and Shouhei Hanaoka and Yukihiro Nomura and Naoto Hayashi and Osamu Abe and Shuntaro Yada and Shoko Wakamiya and Eiji Aramaki},
Title = {KART: Privacy Leakage Framework of Language Models Pre-trained with Clinical Records},
Year = {2020},
Eprint = {arXiv:2101.00036},
}