Skip to content

Commit

Permalink
Add functions and classes
Browse files Browse the repository at this point in the history
  • Loading branch information
mrspiggot committed Feb 19, 2023
1 parent 767e900 commit 09a5ca3
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Split.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
prompt,completion
"OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries.","Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-like interactions."
"The implications for future AI technology are immense, with the potential to open up new markets and applications.","Fine-tuning also makes machine learning more accessible, democratizing the field and making it easier to adopt. All of this adds up to a technological milestone that has the potential to significantly impact how we interact with AI in the future."
"With GPT-3's ability to learn and adapt, the future looks bright for those who can harness the power of this impressive technology.","The process of fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."
1 change: 1 addition & 0 deletions Split.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"prompt": "OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries.", "completion": "Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-like interactions."}, {"prompt": "The implications for future AI technology are immense, with the potential to open up new markets and applications.", "completion": "Fine-tuning also makes machine learning more accessible, democratizing the field and making it easier to adopt. All of this adds up to a technological milestone that has the potential to significantly impact how we interact with AI in the future."}, {"prompt": "With GPT-3's ability to learn and adapt, the future looks bright for those who can harness the power of this impressive technology.", "completion": "The process of fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."}]
Binary file added Split.xlsx
Binary file not shown.
61 changes: 61 additions & 0 deletions fullClass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import re
import pandas as pd
import json

class LucidateTextSplitter:
def __init__(self, text, n):
self.text = text
self.n = n

def split_into_sentences_with_prompts(self):
if self.text == "":
raise ValueError("Input text cannot be empty.")
if self.n <= 0:
raise ValueError("n must be a positive integer.")
sentences = re.split("(?<=[.!?]) +", self.text)
if len(sentences) < self.n:
raise ValueError("Input text must have at least n sentences.")
prompts = sentences[::self.n]
completions = []
for i in range(len(prompts) - 1):
completion = " ".join(sentences[self.n * i + 1:self.n * (i + 1)])
completions.append(completion)
completions.append(" ".join(sentences[self.n * (len(prompts) - 1) + 1:]))
data = {'prompt': prompts, 'completion': completions}
df = pd.DataFrame(data)
return df

def save_as_excel(self, filename):
df = self.split_into_sentences_with_prompts()
df.to_excel(filename, index=False)
def save_as_csv(self, filename):
df = self.split_into_sentences_with_prompts()
df.to_csv(filename, index=False)
def save_as_json(self, filename):
df = self.split_into_sentences_with_prompts()
data = []
for i in range(len(df)):
row = {'prompt': df.iloc[i]['prompt'], 'completion': df.iloc[i]['completion']}
data.append(row)
with open(filename, 'w') as f:
json.dump(data, f)



text = "OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries. " \
"Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. " \
"Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-" \
"like interactions. The implications for future AI technology are immense, with the potential to open up new " \
"markets and applications. Fine-tuning also makes machine learning more accessible, democratizing the field " \
"and making it easier to adopt. All of this adds up to a technological milestone that has the potential to s" \
"ignificantly impact how we interact with AI in the future. With GPT-3's ability to learn and adapt, the " \
"future looks bright for those who can harness the power of this impressive technology. The process of " \
"fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of " \
"GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."
n = 3
splitter = LucidateTextSplitter(text, n)
df = splitter.split_into_sentences_with_prompts()
print(df)
splitter.save_as_json("Split.json")
splitter.save_as_csv("Split.csv")
splitter.save_as_excel("Split.xlsx")
22 changes: 22 additions & 0 deletions generatePrompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import re
import pandas as pd

def split_into_sentences_with_prompts(text, n):
sentences = re.split("(?<=[.!?]) +", text)
prompts = sentences[::n]
completions = []
for i in range(len(prompts) - 1):
completion = " ".join(sentences[n*i+1:n*(i+1)])
completions.append(completion)
completions.append(" ".join(sentences[n*(len(prompts)-1)+1:]))
data = {'prompt': prompts, 'completion': completions}
df = pd.DataFrame(data)
return df

text = "This is sentence 1. This is sentence 2. This is sentence 3. This is sentence 4. This is sentence 5. This is sentence 6. This is sentence 7. This is sentence 8. This is sentence 9. This is sentence 10. This is sentence 11. This is sentence 12. This is sentence 13. This is sentence 14. This is sentence 15. This is sentence 16. This is sentence 17. This is sentence 18. This is sentence 19. This is sentence 20. This is sentence 21. This is sentence 22. This is sentence 23. This is sentence 24. This is sentence 25. This is sentence 26. This is sentence 27. This is sentence 28. This is sentence 29. This is sentence 30. This is sentence 31. This is sentence 32. This is sentence 33. This is sentence 34. This is sentence 35. This is sentence 36. This is sentence 37. This is sentence 38. This is sentence 39. This is sentence 40. This is sentence 41. This is sentence 42. This is sentence 43. This is sentence 44. This is sentence 45. This is sentence 46. This is sentence 47. This is sentence 48. This is sentence 49. This is sentence 50."
n = 5
df = split_into_sentences_with_prompts(text, n)
print(df)



12 changes: 12 additions & 0 deletions para2sentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import re

def split_into_sentences(text):
# split the text by end of sentence tokens. '.', '!' & '?'
sentences = re.split("(?<=[.!?]) +", text)
return sentences

text = "This is the first sentence. And this, my friends, " \
"is the second one! Is this the third one? Finally; the end."

sentences = split_into_sentences(text)
print(sentences)
32 changes: 32 additions & 0 deletions promptsClass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re
import pandas as pd

class TextSplitter:
def __init__(self, text, n):
self.text = text
self.n = n

def split_into_sentences_with_prompts(self):
if self.text == "":
raise ValueError("Input text cannot be empty.")
if self.n <= 0:
raise ValueError("n must be a positive integer.")
sentences = re.split("(?<=[.!?]) +", self.text)
if len(sentences) < self.n:
raise ValueError("Input text must have at least n sentences.")
prompts = sentences[::self.n]
completions = []
for i in range(len(prompts) - 1):
completion = " ".join(sentences[self.n * i + 1:self.n * (i + 1)])
completions.append(completion)
completions.append(" ".join(sentences[self.n * (len(prompts) - 1) + 1:]))
data = {'prompt': prompts, 'completion': completions}
df = pd.DataFrame(data)
return df


text = "This is sentence 1. This is sentence 2. This is sentence 3. This is sentence 4. This is sentence 5. This is sentence 6. This is sentence 7. This is sentence 8. This is sentence 9. This is sentence 10. This is sentence 11. This is sentence 12. This is sentence 13. This is sentence 14. This is sentence 15. This is sentence 16. This is sentence 17. This is sentence 18. This is sentence 19. This is sentence 20. This is sentence 21. This is sentence 22. This is sentence 23. This is sentence 24. This is sentence 25. This is sentence 26. This is sentence 27. This is sentence 28. This is sentence 29. This is sentence 30. This is sentence 31. This is sentence 32. This is sentence 33. This is sentence 34. This is sentence 35. This is sentence 36. This is sentence 37. This is sentence 38. This is sentence 39. This is sentence 40. This is sentence 41. This is sentence 42. This is sentence 43. This is sentence 44. This is sentence 45. This is sentence 46. This is sentence 47. This is sentence 48. This is sentence 49. This is sentence 50."
n = 5
splitter = TextSplitter(text, n)
df = splitter.split_into_sentences_with_prompts()
print(df)
14 changes: 14 additions & 0 deletions scrape_5quotes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import requests
from bs4 import BeautifulSoup

url = 'http:https://quotes.toscrape.com/'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

for i in range(5):
quote = soup.find_all('span', class_='text')[i].get_text()
author = soup.find_all('small', class_='author')[i].get_text()
print(f'Quote {i+1}:')
print(quote)
print(author)
print('-' * 40)
12 changes: 12 additions & 0 deletions scrape_quotes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import requests
from bs4 import BeautifulSoup

url = 'http:https://quotes.toscrape.com/'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

quote = soup.find('span', class_='text').get_text()
author = soup.find('small', class_='author').get_text()

print(quote)
print(author)
14 changes: 14 additions & 0 deletions soup_wiki.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import requests
from bs4 import BeautifulSoup
import textwrap

url = 'https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

first_paragraph = soup.find('div', class_='mw-parser-output').p.get_text()

wrapped_text = textwrap.fill(first_paragraph, width=80)

print(wrapped_text)

0 comments on commit 09a5ca3

Please sign in to comment.