forked from mrspiggot/prompts_and_completions
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
61 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import youtube_dl | ||
import openai | ||
import os | ||
|
||
# Set up OpenAI credentials | ||
openai.api_key = "YOUR_API_KEY" | ||
|
||
# Define function to transcribe audio using OpenAI | ||
def transcribe_audio(audio_file): | ||
response = openai.Completion.create( | ||
engine="davinci", | ||
prompt="Transcribe the following audio:\n" + audio_file, | ||
max_tokens=2048, | ||
n=1, | ||
stop=None, | ||
temperature=0.5, | ||
) | ||
return response.choices[0].text.strip() | ||
|
||
# Download YouTube video and extract audio using youtube_dl | ||
url = "https://www.youtube.com/watch?v=YOUR_VIDEO_ID" | ||
ydl_opts = { | ||
'format': 'bestaudio/best', | ||
'outtmpl': '%(id)s.%(ext)s', | ||
'postprocessors': [{ | ||
'key': 'FFmpegExtractAudio', | ||
'preferredcodec': 'wav', | ||
'preferredquality': '192', | ||
}], | ||
} | ||
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | ||
ydl.download([url]) | ||
|
||
# Transcribe audio and chop up text into prompts and completions | ||
audio_file = "YOUR_VIDEO_ID.wav" | ||
transcribed_text = transcribe_audio(audio_file) | ||
prompt_length = 50 | ||
completion_length = 20 | ||
prompts = [transcribed_text[i:i+prompt_length] for i in range(0, len(transcribed_text), prompt_length)] | ||
completions = [transcribed_text[i:i+completion_length] for i in range(prompt_length, len(transcribed_text), completion_length)] | ||
|
||
# Fine-tune GPT-3 using prompts and completions | ||
model_engine = "davinci" | ||
model_name = "YOUR_MODEL_NAME" | ||
model_prompt = "\n".join(prompts) | ||
model_completion = "\n".join(completions) | ||
fine_tuned_model = openai.FineTune.create( | ||
model=model_name, | ||
prompt=model_prompt, | ||
examples=[{"text": model_completion}], | ||
temperature=0.7, | ||
max_tokens=1024, | ||
n_epochs=5, | ||
batch_size=4, | ||
learning_rate=1e-5, | ||
labels=["transcription"], | ||
create= True | ||
) | ||
|
||
# Print the fine-tuned model's ID | ||
print(f"Fine-tuned model ID: {fine_tuned_model.id}") |