diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4d1a7db --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +assets/banner.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 79d2a9a..699b301 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Demo on [Google Colab](https://colab.research.google.com/drive/1JVtJ6CDwxtKfFmEd Here is a typical run using `bark.cpp`: ```java -make -j && ./main -p "This is an audio generated by bark.cpp" +./main -p "This is an audio generated by bark.cpp" __ __ / /_ ____ ______/ /__ _________ ____ @@ -50,19 +50,19 @@ make -j && ./main -p "This is an audio generated by bark.cpp" bark_tokenize_input: prompt: 'This is an audio generated by bark.cpp' bark_tokenize_input: number of tokens in prompt = 513, first 8 tokens: 20795 20172 20199 33733 58966 20203 28169 20222 -Generating semantic tokens: [========> ] (17%) +Generating semantic tokens: 17% bark_print_statistics: sample time = 10.98 ms / 138 tokens bark_print_statistics: predict time = 614.96 ms / 4.46 ms per token bark_print_statistics: total time = 633.54 ms -Generating coarse tokens: [==================================================>] (100%) +Generating coarse tokens: 100% bark_print_statistics: sample time = 3.75 ms / 410 tokens bark_print_statistics: predict time = 3263.17 ms / 7.96 ms per token bark_print_statistics: total time = 3274.00 ms -Generating fine tokens: [==================================================>] (100%) +Generating fine tokens: 100% bark_print_statistics: sample time = 38.82 ms / 6144 tokens bark_print_statistics: predict time = 4729.86 ms / 0.77 ms per token @@ -75,11 +75,10 @@ main: eval time = 8806.57 ms main: total time = 9131.68 ms ``` -Here are typical audio pieces generated by `bark.cpp`: +Here is a video of Bark running on the iPhone: -https://github.com/PABannier/bark.cpp/assets/12958149/f9f240fd-975f-4d69-9bb3-b295a61daaff +https://github.com/PABannier/bark.cpp/assets/12958149/bc807c0b-adfa-4c47-a05b-a2d8ba157dd8 -https://github.com/PABannier/bark.cpp/assets/12958149/c0caadfd-bed9-4a48-8c17-3215963facc1 ## Usage diff --git a/examples/bark.swiftui/README.md b/examples/bark.swiftui/README.md index f64487a..760f783 100644 --- a/examples/bark.swiftui/README.md +++ b/examples/bark.swiftui/README.md @@ -8,6 +8,10 @@ A sample SwiftUI app using [bark.cpp](https://github.com/PABannier/bark.cpp/) to **Note:** Pay attention to the folder path: `whisper.swiftui.demo/Resources/models` is the appropriate directory to place resources whilst `whisper.swiftui.demo/Models` is related to actual code. +https://github.com/PABannier/bark.cpp/assets/12958149/bc807c0b-adfa-4c47-a05b-a2d8ba157dd8 + + [^1]: I recommend the Bark small model for running on an iOS device. [^2]: The `Release` build can boost performance of audio generation. In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project. + diff --git a/examples/bark.swiftui/bark.swiftui/Bindings/ProgressData.swift b/examples/bark.swiftui/bark.swiftui/Bindings/ProgressData.swift index 1343cdd..841e63f 100644 --- a/examples/bark.swiftui/bark.swiftui/Bindings/ProgressData.swift +++ b/examples/bark.swiftui/bark.swiftui/Bindings/ProgressData.swift @@ -13,7 +13,7 @@ class ProgressData: ObservableObject { static let shared = ProgressData() @Published var progress: Float = 0.0 - @Published var stepTitle: String = "Progress..." + @Published var stepTitle: String = "Progress (0%)" private init() {} } diff --git a/examples/bark.swiftui/bark.swiftui/Models/AudioPlayer.swift b/examples/bark.swiftui/bark.swiftui/Models/AudioPlayer.swift index 8d9a633..fc7859a 100644 --- a/examples/bark.swiftui/bark.swiftui/Models/AudioPlayer.swift +++ b/examples/bark.swiftui/bark.swiftui/Models/AudioPlayer.swift @@ -12,15 +12,24 @@ class AudioPlayer { private var audioEngine: AVAudioEngine private var playerNode: AVAudioPlayerNode private var audioFormat: AVAudioFormat - private var buffer: AVAudioPCMBuffer init(samples: [Float], sampleRate: Double = 24000.0) { + // Initialize the AVFoundation objects audioEngine = AVAudioEngine() playerNode = AVAudioPlayerNode() audioFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)! - // Setup the audio engine + // Configure the audio session for playback + let audioSession = AVAudioSession.sharedInstance() + do { + try audioSession.setCategory(.playback, mode: .default) + try audioSession.setActive(true) + } catch { + print("Failed to configure audio session: \(error)") + } + + // Set up the audio engine audioEngine.attach(playerNode) audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: audioFormat) @@ -30,11 +39,12 @@ class AudioPlayer { print("Error starting audio engine: \(error)") } - // Copy samples to the buffer + // Prepare the buffer buffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: UInt32(samples.count))! buffer.frameLength = buffer.frameCapacity let channelData = buffer.floatChannelData![0] + // Copy samples to buffer for i in 0..