Skip to content

Commit

Permalink
Add initial implementation of training and generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Elleo committed Apr 16, 2020
1 parent ab26b73 commit 52170f7
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 0 deletions.
10 changes: 10 additions & 0 deletions generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python3
import gpt_2_simple as gpt2

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

petition = gpt2.generate(sess, prefix="<|startofpetition|>", truncate="<|endofpetition|>", include_prefix=False, return_as_list=True)[0]
petition = petition.replace("<|startofpetition|>", "") # Sometimes this gets duplicated

print(petition)
49 changes: 49 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3
import gpt_2_simple as gpt2
import os
import requests
import json

TRAINING_STEPS = 1000
MODEL_NAME = "124M"

if not os.path.isdir(os.path.join("models", MODEL_NAME)):
print(f"Downloading {MODEL_NAME} model...")
gpt2.download_gpt2(model_name=MODEL_NAME)

output_filename = "petitions.txt"

output = open(output_filename, 'w')

for dirname, subdirs, files in os.walk(os.path.join('uk_petitions_data', 'petitions')):
print(dirname)
for filename in files:
if filename[-5:] == ".json":
print(filename)
petition_json = json.load(open(os.path.join(dirname, filename), 'r'))
if 'data' in petition_json:
petition = petition_json['data']['attributes']
if 'attributes' in petition_json:
petition = petition_json['attributes']
else:
continue
output.write("<|startofpetition|>\n")
output.write(petition['action'] + "\n")
output.write("=================\n\n")
if 'background' in petition and petition['background'] is not None:
output.write(petition['background'].replace("\r", "") + "\n")
if 'additional_details' in petition and petition['additional_details'] is not None:
output.write(petition['additional_details'].replace("\r", "") + "\n")
if petition['rejected_at'] is not None and petition['rejection']['details'] is not None and petition['rejection']['details'][:8] != "https://":
output.write("\n\nReason for rejection:\n\n")
output.write(petition['rejection']['details'].replace("\r", "") + "\n")
output.write("<|endofpetition|>\n\n")

output.close()

sess = gpt2.start_tf_sess()
gpt2.finetune(sess,
output_filename,
model_name=MODEL_NAME,
steps=TRAINING_STEPS,
save_every=100)

0 comments on commit 52170f7

Please sign in to comment.