Skip to content

Commit

Permalink
Merge pull request #16 from occ-ai/roy.overhaul_detection_vad_models
Browse files Browse the repository at this point in the history
Update whisper-utils.h and whisper-utils.cpp to use update_whisper_mo…
  • Loading branch information
royshil committed May 5, 2024
2 parents c5257f9 + 447127f commit 7aa24ff
Show file tree
Hide file tree
Showing 25 changed files with 1,585 additions and 538 deletions.
1 change: 1 addition & 0 deletions .github/scripts/.Aptfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ package 'jq'
package 'ninja-build', bin: 'ninja'
package 'pkg-config'
package 'libopenblas-dev'
package 'libavformat-dev'
81 changes: 80 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,55 @@ if(ENABLE_QT)
AUTORCC ON)
endif()

function(find_libav)
if(UNIX AND NOT APPLE)
find_package(PkgConfig REQUIRED)
pkg_check_modules(
FFMPEG
REQUIRED
IMPORTED_TARGET
libavformat
libavcodec
libavutil
libswresample)
if(FFMPEG_FOUND)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE PkgConfig::FFMPEG)
else()
message(FATAL_ERROR "FFMPEG not found!")
endif()
return()
endif()

if(NOT buildspec)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/buildspec.json" buildspec)
endif()
string(
JSON
version
GET
${buildspec}
dependencies
prebuilt
version)

if(MSVC)
set(arch ${CMAKE_GENERATOR_PLATFORM})
elseif(APPLE)
set(arch universal)
endif()
set(deps_root "${CMAKE_CURRENT_SOURCE_DIR}/.deps/obs-deps-${version}-${arch}")

target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${deps_root}/include")
target_link_libraries(
${CMAKE_PROJECT_NAME}
PRIVATE "${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avcodec${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avformat${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avutil${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}swresample${CMAKE_STATIC_LIBRARY_SUFFIX}")
endfunction(find_libav)

find_libav()

set(USE_SYSTEM_CURL
OFF
CACHE STRING "Use system cURL")
Expand All @@ -50,14 +99,44 @@ endif()
include(cmake/BuildWhispercpp.cmake)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Whispercpp)

set(USE_SYSTEM_ONNXRUNTIME
OFF
CACHE STRING "Use system ONNX Runtime")

set(DISABLE_ONNXRUNTIME_GPU
OFF
CACHE STRING "Disables GPU support of ONNX Runtime (Only valid on Linux)")

if(DISABLE_ONNXRUNTIME_GPU)
target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE DISABLE_ONNXRUNTIME_GPU)
endif()

if(USE_SYSTEM_ONNXRUNTIME)
if(OS_LINUX)
find_package(Onnxruntime 1.16.3 REQUIRED)
set(Onnxruntime_INCLUDE_PATH
${Onnxruntime_INCLUDE_DIR} ${Onnxruntime_INCLUDE_DIR}/onnxruntime
${Onnxruntime_INCLUDE_DIR}/onnxruntime/core/session ${Onnxruntime_INCLUDE_DIR}/onnxruntime/core/providers/cpu)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIBRARIES}")
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${Onnxruntime_INCLUDE_PATH}")
else()
message(FATAL_ERROR "System ONNX Runtime is only supported on Linux!")
endif()
else()
include(cmake/FetchOnnxruntime.cmake)
endif()

target_sources(
${CMAKE_PROJECT_NAME}
PRIVATE src/plugin-main.c
src/cleanstream-filter.cpp
src/cleanstream-filter.c
src/audio-utils/read-audio-file.cpp
src/model-utils/model-downloader.cpp
src/model-utils/model-downloader-ui.cpp
src/model-utils/model-infos.cpp
src/whisper-utils/whisper-utils.cpp
src/whisper-utils/whisper-processing.cpp)
src/whisper-utils/whisper-processing.cpp
src/whisper-utils/silero-vad-onnx.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
2 changes: 1 addition & 1 deletion buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}
},
"name": "obs-cleanstream",
"version": "0.0.5",
"version": "0.0.6",
"author": "Roy Shilkrot",
"website": "https://github.com/occ-ai/obs-cleanstream/",
"email": "[email protected]",
Expand Down
97 changes: 97 additions & 0 deletions cmake/FetchOnnxruntime.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
include(FetchContent)

set(CUSTOM_ONNXRUNTIME_URL
""
CACHE STRING "URL of a downloaded ONNX Runtime tarball")

set(CUSTOM_ONNXRUNTIME_HASH
""
CACHE STRING "Hash of a downloaded ONNX Runtime tarball")

set(Onnxruntime_VERSION "1.17.1")

if(CUSTOM_ONNXRUNTIME_URL STREQUAL "")
set(USE_PREDEFINED_ONNXRUNTIME ON)
else()
if(CUSTOM_ONNXRUNTIME_HASH STREQUAL "")
message(FATAL_ERROR "Both of CUSTOM_ONNXRUNTIME_URL and CUSTOM_ONNXRUNTIME_HASH must be present!")
else()
set(USE_PREDEFINED_ONNXRUNTIME OFF)
endif()
endif()

if(USE_PREDEFINED_ONNXRUNTIME)
set(Onnxruntime_BASEURL "https://github.com/microsoft/onnxruntime/releases/download/v${Onnxruntime_VERSION}")

if(APPLE)
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-osx-universal2-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=9FA57FA6F202A373599377EF75064AE568FDA8DA838632B26A86024C7378D306)
elseif(MSVC)
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-win-x64-${Onnxruntime_VERSION}.zip")
set(OOnnxruntime_HASH SHA256=4802AF9598DB02153D7DA39432A48823FF69B2FB4B59155461937F20782AA91C)
else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-aarch64-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=70B6F536BB7AB5961D128E9DBD192368AC1513BFFB74FE92F97AAC342FBD0AC1)
else()
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-x64-gpu-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=613C53745EA4960ED368F6B3AB673558BB8561C84A8FA781B4EA7FB4A4340BE4)
endif()
endif()
else()
set(Onnxruntime_URL "${CUSTOM_ONNXRUNTIME_URL}")
set(Onnxruntime_HASH "${CUSTOM_ONNXRUNTIME_HASH}")
endif()

FetchContent_Declare(
onnxruntime
URL ${Onnxruntime_URL}
URL_HASH ${Onnxruntime_HASH})
FetchContent_MakeAvailable(onnxruntime)

if(APPLE)
set(Onnxruntime_LIB "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.${Onnxruntime_VERSION}.dylib")
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}")
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include")
target_sources(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}")
set_property(SOURCE "${Onnxruntime_LIB}" PROPERTY MACOSX_PACKAGE_LOCATION Frameworks)
source_group("Frameworks" FILES "${Onnxruntime_LIB}")
# add a codesigning step
add_custom_command(
TARGET "${CMAKE_PROJECT_NAME}"
PRE_BUILD VERBATIM
COMMAND /usr/bin/codesign --force --verify --verbose --sign "${CODESIGN_IDENTITY}" "${Onnxruntime_LIB}")
add_custom_command(
TARGET "${CMAKE_PROJECT_NAME}"
POST_BUILD
COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libonnxruntime.${Onnxruntime_VERSION}.dylib"
"@loader_path/../Frameworks/libonnxruntime.${Onnxruntime_VERSION}.dylib" $<TARGET_FILE:${CMAKE_PROJECT_NAME}>)
elseif(MSVC)
add_library(Ort INTERFACE)
set(Onnxruntime_LIB_NAMES onnxruntime;onnxruntime_providers_shared)
foreach(lib_name IN LISTS Onnxruntime_LIB_NAMES)
add_library(Ort::${lib_name} SHARED IMPORTED)
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_IMPLIB ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.lib)
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_LOCATION ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll)
set_target_properties(Ort::${lib_name} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${onnxruntime_SOURCE_DIR}/include)
target_link_libraries(Ort INTERFACE Ort::${lib_name})
install(FILES ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll DESTINATION "obs-plugins/64bit")
endforeach()

target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Ort)

else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}")
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS})
else()
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}")
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS}
"${onnxruntime_SOURCE_DIR}/lib/libonnxruntime_providers_shared.so")
endif()
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ${Onnxruntime_LINK_LIBS})
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include")
install(FILES ${Onnxruntime_INSTALL_LIBS} DESTINATION "${CMAKE_INSTALL_LIBDIR}/obs-plugins/${CMAKE_PROJECT_NAME}")
set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/${CMAKE_PROJECT_NAME}")
endif()
15 changes: 15 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
CleanStreamAudioFilter="Clean stream audio filter"
CleanStreamFilterPlugin="Clean stream filter plugin"
detect_regex="Detect regex"
advanced_settings="Advanced settings"
filler_p_threshold="Filler p threshold"
do_silence="Do silence"
vad_enabled="VAD enabled"
log_level="Log level"
log_words="Log words"
whisper_model="Whisper model"
Whisper_Parameters="Whisper Parameters"
whisper_sampling_method="Whisper sampling method"
n_threads="Number of threads"
n_max_text_ctx="Number of max text context"
no_context="No context"
replace_sound_path="Replace Sound Path"
replace_sound="Replace Sound"
File renamed without changes.
Binary file added data/models/silero-vad/silero_vad.onnx
Binary file not shown.
Binary file added data/sounds/beep.wav
Binary file not shown.
Binary file added data/sounds/horn.wav
Binary file not shown.
137 changes: 137 additions & 0 deletions src/audio-utils/read-audio-file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#if defined(_WIN32) || defined(__APPLE__)

extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
}

#include <iostream>
#include <vector>

#include "read-audio-file.h"
#include "obs.h"
#include "plugin-support.h"

AudioDataFloat read_audio_file(const char *filename, int targetSampleRate)
{
AVFormatContext *formatContext = nullptr;
if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) {
obs_log(LOG_ERROR, "Error opening file");
return {};
}

if (avformat_find_stream_info(formatContext, nullptr) < 0) {
obs_log(LOG_ERROR, "Error finding stream information");
return {};
}

int audioStreamIndex = -1;
for (unsigned int i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIndex = i;
break;
}
}

if (audioStreamIndex == -1) {
obs_log(LOG_ERROR, "No audio stream found");
return {};
}

AVCodecParameters *codecParams = formatContext->streams[audioStreamIndex]->codecpar;
const AVCodec *codec = avcodec_find_decoder(codecParams->codec_id);
if (!codec) {
obs_log(LOG_ERROR, "Decoder not found");
return {};
}

AVCodecContext *codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
obs_log(LOG_ERROR, "Failed to allocate codec context");
return {};
}

if (avcodec_parameters_to_context(codecContext, codecParams) < 0) {
obs_log(LOG_ERROR, "Failed to copy codec parameters to codec context");
return {};
}

if (avcodec_open2(codecContext, codec, nullptr) < 0) {
obs_log(LOG_ERROR, "Failed to open codec");
return {};
}

AVFrame *frame = av_frame_alloc();
AVPacket packet;

// set up swresample
AVChannelLayout ch_layout;
av_channel_layout_from_string(&ch_layout, "mono");
SwrContext *swr = nullptr;
int ret;
ret = swr_alloc_set_opts2(&swr, &ch_layout, AV_SAMPLE_FMT_FLT, targetSampleRate,
&(codecContext->ch_layout), codecContext->sample_fmt,
codecContext->sample_rate, 0, nullptr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR, "Failed to set up swr context: %s", errbuf);
return {};
}
// init swr
ret = swr_init(swr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR, "Failed to initialize swr context: %s", errbuf);
return {};
}

AudioDataFloat audioFrames;

float *convertBuffer[1];
convertBuffer[0] = (float *)av_malloc(4096 * sizeof(float));
while (av_read_frame(formatContext, &packet) >= 0) {
if (packet.stream_index == audioStreamIndex) {
if (avcodec_send_packet(codecContext, &packet) == 0) {
while (avcodec_receive_frame(codecContext, frame) == 0) {
int ret = swr_convert(swr, (uint8_t **)convertBuffer, 4096,
(const uint8_t **)frame->data,
frame->nb_samples);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR,
"Failed to convert audio frame: %s",
errbuf);
return {};
}
audioFrames.insert(audioFrames.end(), convertBuffer[0],
convertBuffer[0] + ret);
}
}
}
av_packet_unref(&packet);
}
av_free(convertBuffer[0]);

obs_log(LOG_INFO,
"Converted %lu frames of audio data (orig: %d, %s sample format, %d channels, %s)",
audioFrames.size(), codecContext->sample_rate,
av_get_sample_fmt_name(codecContext->sample_fmt),
codecContext->ch_layout.nb_channels,
av_sample_fmt_is_planar(codecContext->sample_fmt) ? "planar" : "packed");

swr_free(&swr);
av_frame_free(&frame);
avcodec_free_context(&codecContext);
avformat_close_input(&formatContext);

return audioFrames;
}

#endif
Loading

0 comments on commit 7aa24ff

Please sign in to comment.