Skip to content

Commit

Permalink
OpenAI-DotNet 5.1.0 (RageAgainstThePixel#37)
Browse files Browse the repository at this point in the history
- Closes RageAgainstThePixel#34 adds Audio Endpoint to support 
  - audio speech to text
  - audio translation
- updated unit tests
  • Loading branch information
StephenHodgson committed Mar 8, 2023
1 parent 3bf166f commit ee6cac0
Show file tree
Hide file tree
Showing 17 changed files with 540 additions and 13 deletions.
Binary file added OpenAI-DotNet-Tests/Assets/Ja-botchan_1-1_1-2.mp3
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_03_Edits
internal class TestFixture_04_Edits
{
[Test]
public async Task Test_1_GetBasicEdit()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_04_Images
internal class TestFixture_05_Images
{
[Test]
public async Task Test_1_GenerateImages()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_05_Embeddings
internal class TestFixture_06_Embeddings
{
[Test]
public async Task Test_1_CreateEmbedding()
Expand Down
35 changes: 35 additions & 0 deletions OpenAI-DotNet-Tests/TestFixture_07_Audio.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using NUnit.Framework;
using OpenAI.Audio;
using System;
using System.IO;
using System.Threading.Tasks;

namespace OpenAI.Tests
{
internal class TestFixture_07_Audio
{
[Test]
public async Task Test_1_Transcription()
{
var api = new OpenAIClient(OpenAIAuthentication.LoadFromEnv());
Assert.IsNotNull(api.AudioEndpoint);
var transcriptionAudio = Path.GetFullPath("..\\..\\..\\Assets\\T3mt39YrlyLoq8laHSdf.mp3");
var request = new AudioTranscriptionRequest(transcriptionAudio, language: "en");
var result = await api.AudioEndpoint.CreateTranscriptionAsync(request);
Assert.IsNotNull(result);
Console.WriteLine(result);
}

[Test]
public async Task Test_2_Translation()
{
var api = new OpenAIClient(OpenAIAuthentication.LoadFromEnv());
Assert.IsNotNull(api.AudioEndpoint);
var translationAudio = Path.GetFullPath("..\\..\\..\\Assets\\Ja-botchan_1-1_1-2.mp3");
var request = new AudioTranslationRequest(Path.GetFullPath(translationAudio));
var result = await api.AudioEndpoint.CreateTranslationAsync(request);
Assert.IsNotNull(result);
Console.WriteLine(result);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_06_Files
internal class TestFixture_08_Files
{
[Test]
public async Task Test_01_UploadFile()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_07_FineTuning
internal class TestFixture_09_FineTuning
{
private async Task<FileData> CreateTestTrainingDataAsync(OpenAIClient api)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace OpenAI.Tests
{
internal class TestFixture_08_Moderations
internal class TestFixture_10_Moderations
{
[Test]
public async Task Test_1_Moderate()
Expand Down
112 changes: 112 additions & 0 deletions OpenAI-DotNet/Audio/AudioEndpoint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
using System.IO;
using System.Net.Http;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;

namespace OpenAI.Audio
{
/// <summary>
/// Speech to text.
/// </summary>
public sealed class AudioEndpoint : BaseEndPoint
{
private class AudioResponse
{
public AudioResponse(string text)
{
Text = text;
}

[JsonPropertyName("text")]
public string Text { get; }
}

/// <inheritdoc />
public AudioEndpoint(OpenAIClient api) : base(api) { }

/// <inheritdoc />
protected override string GetEndpoint()
=> $"{Api.BaseUrl}audio";

/// <summary>
/// Transcribes audio into the input language.
/// </summary>
/// <param name="request"><see cref="AudioTranscriptionRequest"/>.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>The transcribed text.</returns>
public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
{
using var content = new MultipartFormDataContent();
using var audioData = new MemoryStream();
await request.Audio.CopyToAsync(audioData, cancellationToken);
content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName);
content.Add(new StringContent(request.Model), "model");

if (!string.IsNullOrWhiteSpace(request.Prompt))
{
content.Add(new StringContent(request.Prompt), "prompt");
}

var responseFormat = request.ResponseFormat;
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");

if (request.Temperature.HasValue)
{
content.Add(new StringContent(request.Temperature.ToString()), "temperature");
}

if (!string.IsNullOrWhiteSpace(request.Language))
{
content.Add(new StringContent(request.Language), "language");
}

request.Dispose();

var response = await Api.Client.PostAsync($"{GetEndpoint()}/transcriptions", content, cancellationToken);
var responseAsString = await response.ReadAsStringAsync(cancellationToken);

return responseFormat == AudioResponseFormat.Json
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
: responseAsString;
}

/// <summary>
/// Translates audio into into English.
/// </summary>
/// <param name="request"></param>
/// <param name="cancellationToken"></param>
/// <returns>The translated text.</returns>
public async Task<string> CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
{
using var content = new MultipartFormDataContent();
using var audioData = new MemoryStream();
await request.Audio.CopyToAsync(audioData, cancellationToken);
content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName);
content.Add(new StringContent(request.Model), "model");

if (!string.IsNullOrWhiteSpace(request.Prompt))
{
content.Add(new StringContent(request.Prompt), "prompt");
}

var responseFormat = request.ResponseFormat;
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");

if (request.Temperature.HasValue)
{
content.Add(new StringContent(request.Temperature.ToString()), "temperature");
}

request.Dispose();

var response = await Api.Client.PostAsync($"{GetEndpoint()}/translations", content, cancellationToken);
var responseAsString = await response.ReadAsStringAsync(cancellationToken);

return responseFormat == AudioResponseFormat.Json
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
: responseAsString;
}
}
}
11 changes: 11 additions & 0 deletions OpenAI-DotNet/Audio/AudioResponseFormat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace OpenAI.Audio
{
public enum AudioResponseFormat
{
Json = 0,
Verbose_Json,
Text,
Srt,
Vtt
}
}
Loading

0 comments on commit ee6cac0

Please sign in to comment.