Skip to content

Commit

Permalink
public push
Browse files Browse the repository at this point in the history
  • Loading branch information
scotthavird committed Nov 28, 2022
1 parent 15c039f commit 57cc589
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 2,389 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,4 @@ dmypy.json

# Pyre type checker
.pyre/
data*
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,34 @@ CONTENT_URL=https://augie-public-test.s3.amazonaws.com/89e5915c-cf8b-4f18-9b22-3
docker-compose up --build
```

## settings

```python
def request_transcript():
transcript_request = {
'audio_url': content_url,
#'filter_profanity': True, # Profanity Filtering https://www.assemblyai.com/docs/core-transcription#profanity-filtering
#'punctuate': True, # Automate Punctuation and Casing https://www.assemblyai.com/docs/core-transcription#automatic-punctuation-and-casing
#'language_detection': True, # Automatic Language Detection https://www.assemblyai.com/docs/core-transcription#automatic-language-detection
#'auto_highlights': True, # Detect Important Phrases and Words https://www.assemblyai.com/docs/audio-intelligence#detect-important-phrases-and-words
#'content_safety': True, # Content Moderation https://www.assemblyai.com/docs/audio-intelligence#content-moderation
#'iab_categories': True, # Topic Detection(IAB Categories) https://www.assemblyai.com/docs/audio-intelligence#topic-detection-iab-classification
#'sentiment_analysis': True, # Sentiment Analysis https://www.assemblyai.com/docs/audio-intelligence#sentiment-analysis
#'summary_type': 'bullets', # Summary bullets https://www.assemblyai.com/docs/audio-intelligence#summarization
#'summary_type': 'gist', # Summary gist
#'summary_type': 'headline', # Summary headline
#'summary_type': 'paragraph', # Summary paragraph
#'auto_chapters': True, # Automatic Chapters https://www.assemblyai.com/docs/audio-intelligence#auto-chapters
#'entity_detection': True, # Entity Detection https://www.assemblyai.com/docs/audio-intelligence#entity-detection
'dual_channel': False
}
```

## results

### paragraph
output will be saved to `data.*.*` files

### paragraph example

```json
[
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: "3.7"
services:
operator:
build: .
image: assemgly_ai:0.1.0
image: assembly_ai:0.1.0
env_file:
- default.env
volumes:
Expand Down
33 changes: 17 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@
def request_transcript():
transcript_request = {
'audio_url': content_url,
'filter_profanity': True, # Profanity Filtering https://www.assemblyai.com/docs/core-transcription#profanity-filtering
'punctuate': True, # Automate Punctuation and Casing https://www.assemblyai.com/docs/core-transcription#automatic-punctuation-and-casing
'language_detection': True, # Automatic Language Detection https://www.assemblyai.com/docs/core-transcription#automatic-language-detection
'auto_highlights': True, # Detect Important Phrases and Words https://www.assemblyai.com/docs/audio-intelligence#detect-important-phrases-and-words
'content_safety': True, # Content Moderation https://www.assemblyai.com/docs/audio-intelligence#content-moderation
'iab_categories': True, # Topic Detection(IAB Categories) https://www.assemblyai.com/docs/audio-intelligence#topic-detection-iab-classification
'sentiment_analysis': True, # Sentiment Analysis https://www.assemblyai.com/docs/audio-intelligence#sentiment-analysis
'summary_type': 'bullets', # Summary bullets https://www.assemblyai.com/docs/audio-intelligence#summarization
#'filter_profanity': True, # Profanity Filtering https://www.assemblyai.com/docs/core-transcription#profanity-filtering
#'punctuate': True, # Automate Punctuation and Casing https://www.assemblyai.com/docs/core-transcription#automatic-punctuation-and-casing
#'language_detection': True, # Automatic Language Detection https://www.assemblyai.com/docs/core-transcription#automatic-language-detection
#'auto_highlights': True, # Detect Important Phrases and Words https://www.assemblyai.com/docs/audio-intelligence#detect-important-phrases-and-words
#'content_safety': True, # Content Moderation https://www.assemblyai.com/docs/audio-intelligence#content-moderation
#'iab_categories': True, # Topic Detection(IAB Categories) https://www.assemblyai.com/docs/audio-intelligence#topic-detection-iab-classification
#'sentiment_analysis': True, # Sentiment Analysis https://www.assemblyai.com/docs/audio-intelligence#sentiment-analysis
#'summary_type': 'bullets', # Summary bullets https://www.assemblyai.com/docs/audio-intelligence#summarization
#'summary_type': 'gist', # Summary gist
#'summary_type': 'headline', # Summary headline
#'summary_type': 'paragraph', # Summary paragraph
'auto_chapters': True, # Automatic Chapters https://www.assemblyai.com/docs/audio-intelligence#auto-chapters
'entity_detection': True, # Entity Detection https://www.assemblyai.com/docs/audio-intelligence#entity-detection
#'auto_chapters': True, # Automatic Chapters https://www.assemblyai.com/docs/audio-intelligence#auto-chapters
#'entity_detection': True, # Entity Detection https://www.assemblyai.com/docs/audio-intelligence#entity-detection
'dual_channel': False
}
transcript_response = requests.post(
transcript_endpoint,
Expand Down Expand Up @@ -80,7 +81,7 @@ def main():
# save request transcript
transcript_response = request_transcript()
json_object = json.dumps(transcript_response, indent=4)
with open('request.json', 'w') as outfile:
with open('data.request.json', 'w') as outfile:
outfile.write(json_object)

# poll and wait
Expand All @@ -89,31 +90,31 @@ def main():

# save transcription
json_object = json.dumps(transcription, indent=4)
with open('transcription.json', 'w') as outfile:
with open('data.transcription.json', 'w') as outfile:
outfile.write(json_object)

# save paragraphs
paragraphs = get_paragraphs(polling_endpoint)
json_object = json.dumps(paragraphs, indent=4)
with open('paragraphs.json', 'w') as outfile:
with open('data.paragraphs.json', 'w') as outfile:
outfile.write(json_object)

# save sentences
sentences = get_sentences(polling_endpoint)
json_object = json.dumps(sentences, indent=4)
with open('sentences.json', 'w') as outfile:
with open('data.sentences.json', 'w') as outfile:
outfile.write(json_object)

# save srt
response = requests.get(polling_endpoint + '/srt', headers=headers)
response = response.text
with open('srt.txt', 'w') as outfile:
with open('data.srt.txt', 'w') as outfile:
outfile.write(response)

# save vtt
response = requests.get(polling_endpoint + '/vtt', headers=headers)
response = response.text
with open('vtt.txt', 'w') as outfile:
with open('data.vtt.txt', 'w') as outfile:
outfile.write(response)

if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 57cc589

Please sign in to comment.