Add functions and classes

salikshah · Feb 19, 2023 · 09a5ca3 · 09a5ca3
1 parent 767e900
commit 09a5ca3
Show file tree

Hide file tree

Showing 10 changed files with 172 additions and 0 deletions.
diff --git a/Split.csv b/Split.csv
@@ -0,0 +1,4 @@
+prompt,completion
+"OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries.","Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-like interactions."
+"The implications for future AI technology are immense, with the potential to open up new markets and applications.","Fine-tuning also makes machine learning more accessible, democratizing the field and making it easier to adopt. All of this adds up to a technological milestone that has the potential to significantly impact how we interact with AI in the future."
+"With GPT-3's ability to learn and adapt, the future looks bright for those who can harness the power of this impressive technology.","The process of fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."
diff --git a/Split.json b/Split.json
@@ -0,0 +1 @@
+[{"prompt": "OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries.", "completion": "Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-like interactions."}, {"prompt": "The implications for future AI technology are immense, with the potential to open up new markets and applications.", "completion": "Fine-tuning also makes machine learning more accessible, democratizing the field and making it easier to adopt. All of this adds up to a technological milestone that has the potential to significantly impact how we interact with AI in the future."}, {"prompt": "With GPT-3's ability to learn and adapt, the future looks bright for those who can harness the power of this impressive technology.", "completion": "The process of fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."}]
diff --git a/Split.xlsx b/Split.xlsx
diff --git a/fullClass.py b/fullClass.py
@@ -0,0 +1,61 @@
+import re
+import pandas as pd
+import json
+
+class LucidateTextSplitter:
+ def __init__(self, text, n):
+ self.text = text
+ self.n = n
+
+ def split_into_sentences_with_prompts(self):
+ if self.text == "":
+ raise ValueError("Input text cannot be empty.")
+ if self.n <= 0:
+ raise ValueError("n must be a positive integer.")
+ sentences = re.split("(?<=[.!?]) +", self.text)
+ if len(sentences) < self.n:
+ raise ValueError("Input text must have at least n sentences.")
+ prompts = sentences[::self.n]
+ completions = []
+ for i in range(len(prompts) - 1):
+ completion = " ".join(sentences[self.n * i + 1:self.n * (i + 1)])
+ completions.append(completion)
+ completions.append(" ".join(sentences[self.n * (len(prompts) - 1) + 1:]))
+ data = {'prompt': prompts, 'completion': completions}
+ df = pd.DataFrame(data)
+ return df
+
+ def save_as_excel(self, filename):
+ df = self.split_into_sentences_with_prompts()
+ df.to_excel(filename, index=False)
+ def save_as_csv(self, filename):
+ df = self.split_into_sentences_with_prompts()
+ df.to_csv(filename, index=False)
+ def save_as_json(self, filename):
+ df = self.split_into_sentences_with_prompts()
+ data = []
+ for i in range(len(df)):
+ row = {'prompt': df.iloc[i]['prompt'], 'completion': df.iloc[i]['completion']}
+ data.append(row)
+ with open(filename, 'w') as f:
+ json.dump(data, f)
+
+
+
+text = "OpenAI's GPT-3 can be fine-tuned for specialized purposes, opening up a new level of AI for industries. " \
+ "Chatbots and assistants can be enhanced to better meet user needs and provide more personalized service. " \
+ "Fine-tuning also leads to more accurate and precise natural language processing (NLP), enabling complex human-" \
+ "like interactions. The implications for future AI technology are immense, with the potential to open up new " \
+ "markets and applications. Fine-tuning also makes machine learning more accessible, democratizing the field " \
+ "and making it easier to adopt. All of this adds up to a technological milestone that has the potential to s" \
+ "ignificantly impact how we interact with AI in the future. With GPT-3's ability to learn and adapt, the " \
+ "future looks bright for those who can harness the power of this impressive technology. The process of " \
+ "fine-tuning could help revolutionize industries and create new opportunities for innovation. The potential of " \
+ "GPT-3's fine-tuning is limitless, and we are only beginning to scratch the surface of what is possible."
+n = 3
+splitter = LucidateTextSplitter(text, n)
+df = splitter.split_into_sentences_with_prompts()
+print(df)
+splitter.save_as_json("Split.json")
+splitter.save_as_csv("Split.csv")
+splitter.save_as_excel("Split.xlsx")
diff --git a/generatePrompts.py b/generatePrompts.py
@@ -0,0 +1,22 @@
+import re
+import pandas as pd
+
+def split_into_sentences_with_prompts(text, n):
+ sentences = re.split("(?<=[.!?]) +", text)
+ prompts = sentences[::n]
+ completions = []
+ for i in range(len(prompts) - 1):
+ completion = " ".join(sentences[n*i+1:n*(i+1)])
+ completions.append(completion)
+ completions.append(" ".join(sentences[n*(len(prompts)-1)+1:]))
+ data = {'prompt': prompts, 'completion': completions}
+ df = pd.DataFrame(data)
+ return df
+
+text = "This is sentence 1. This is sentence 2. This is sentence 3. This is sentence 4. This is sentence 5. This is sentence 6. This is sentence 7. This is sentence 8. This is sentence 9. This is sentence 10. This is sentence 11. This is sentence 12. This is sentence 13. This is sentence 14. This is sentence 15. This is sentence 16. This is sentence 17. This is sentence 18. This is sentence 19. This is sentence 20. This is sentence 21. This is sentence 22. This is sentence 23. This is sentence 24. This is sentence 25. This is sentence 26. This is sentence 27. This is sentence 28. This is sentence 29. This is sentence 30. This is sentence 31. This is sentence 32. This is sentence 33. This is sentence 34. This is sentence 35. This is sentence 36. This is sentence 37. This is sentence 38. This is sentence 39. This is sentence 40. This is sentence 41. This is sentence 42. This is sentence 43. This is sentence 44. This is sentence 45. This is sentence 46. This is sentence 47. This is sentence 48. This is sentence 49. This is sentence 50."
+n = 5
+df = split_into_sentences_with_prompts(text, n)
+print(df)
+
+
+
diff --git a/para2sentence.py b/para2sentence.py
@@ -0,0 +1,12 @@
+import re
+
+def split_into_sentences(text):
+ # split the text by end of sentence tokens. '.', '!' & '?'
+ sentences = re.split("(?<=[.!?]) +", text)
+ return sentences
+
+text = "This is the first sentence. And this, my friends, " \
+ "is the second one! Is this the third one? Finally; the end."
+
+sentences = split_into_sentences(text)
+print(sentences)
diff --git a/promptsClass.py b/promptsClass.py
@@ -0,0 +1,32 @@
+import re
+import pandas as pd
+
+class TextSplitter:
+ def __init__(self, text, n):
+ self.text = text
+ self.n = n
+
+ def split_into_sentences_with_prompts(self):
+ if self.text == "":
+ raise ValueError("Input text cannot be empty.")
+ if self.n <= 0:
+ raise ValueError("n must be a positive integer.")
+ sentences = re.split("(?<=[.!?]) +", self.text)
+ if len(sentences) < self.n:
+ raise ValueError("Input text must have at least n sentences.")
+ prompts = sentences[::self.n]
+ completions = []
+ for i in range(len(prompts) - 1):
+ completion = " ".join(sentences[self.n * i + 1:self.n * (i + 1)])
+ completions.append(completion)
+ completions.append(" ".join(sentences[self.n * (len(prompts) - 1) + 1:]))
+ data = {'prompt': prompts, 'completion': completions}
+ df = pd.DataFrame(data)
+ return df
+
+
+text = "This is sentence 1. This is sentence 2. This is sentence 3. This is sentence 4. This is sentence 5. This is sentence 6. This is sentence 7. This is sentence 8. This is sentence 9. This is sentence 10. This is sentence 11. This is sentence 12. This is sentence 13. This is sentence 14. This is sentence 15. This is sentence 16. This is sentence 17. This is sentence 18. This is sentence 19. This is sentence 20. This is sentence 21. This is sentence 22. This is sentence 23. This is sentence 24. This is sentence 25. This is sentence 26. This is sentence 27. This is sentence 28. This is sentence 29. This is sentence 30. This is sentence 31. This is sentence 32. This is sentence 33. This is sentence 34. This is sentence 35. This is sentence 36. This is sentence 37. This is sentence 38. This is sentence 39. This is sentence 40. This is sentence 41. This is sentence 42. This is sentence 43. This is sentence 44. This is sentence 45. This is sentence 46. This is sentence 47. This is sentence 48. This is sentence 49. This is sentence 50."
+n = 5
+splitter = TextSplitter(text, n)
+df = splitter.split_into_sentences_with_prompts()
+print(df)
diff --git a/scrape_5quotes.py b/scrape_5quotes.py
@@ -0,0 +1,14 @@
+import requests
+from bs4 import BeautifulSoup
+
+url = 'http:https://quotes.toscrape.com/'
+response = requests.get(url)
+soup = BeautifulSoup(response.content, 'html.parser')
+
+for i in range(5):
+ quote = soup.find_all('span', class_='text')[i].get_text()
+ author = soup.find_all('small', class_='author')[i].get_text()
+ print(f'Quote {i+1}:')
+ print(quote)
+ print(author)
+ print('-' * 40)
diff --git a/scrape_quotes.py b/scrape_quotes.py
@@ -0,0 +1,12 @@
+import requests
+from bs4 import BeautifulSoup
+
+url = 'http:https://quotes.toscrape.com/'
+response = requests.get(url)
+soup = BeautifulSoup(response.content, 'html.parser')
+
+quote = soup.find('span', class_='text').get_text()
+author = soup.find('small', class_='author').get_text()
+
+print(quote)
+print(author)
diff --git a/soup_wiki.py b/soup_wiki.py
@@ -0,0 +1,14 @@
+import requests
+from bs4 import BeautifulSoup
+import textwrap
+
+url = 'https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)'
+response = requests.get(url)
+soup = BeautifulSoup(response.content, 'html.parser')
+
+first_paragraph = soup.find('div', class_='mw-parser-output').p.get_text()
+
+wrapped_text = textwrap.fill(first_paragraph, width=80)
+
+print(wrapped_text)
+