Skip to content

Commit

Permalink
Merge pull request #21 from occ-ai/roy.bump_whisper_ver
Browse files Browse the repository at this point in the history
chore: Update version to 0.0.9 and whispercpp to 1.6.2
  • Loading branch information
royshil committed Jun 3, 2024
2 parents bf972f4 + 74ebd0b commit 62a05a4
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 64 deletions.
2 changes: 1 addition & 1 deletion buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}
},
"name": "obs-cleanstream",
"version": "0.0.8",
"version": "0.0.9",
"author": "Roy Shilkrot",
"website": "https://github.com/occ-ai/obs-cleanstream/",
"email": "[email protected]",
Expand Down
22 changes: 11 additions & 11 deletions cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ if(APPLE)
endif(NOT DEFINED ENV{MACOS_ARCH})

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.2.tar.gz"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.3.tar.gz"
)
if($ENV{MACOS_ARCH} STREQUAL "x86_64")
set(WHISPER_CPP_HASH "00C308AF0BFFF7619934403A8080CC9AFC4EDAA328D7587E617150A2C6A33313")
set(WHISPER_CPP_HASH "94DB35C69E958C8A59F0F331734F4F4F45F4BB13D2F54D3C838457E8590874C4")
elseif($ENV{MACOS_ARCH} STREQUAL "arm64")
set(WHISPER_CPP_HASH "0478E2079E07FA81BEE77506101003F4A4C8F0DF9E23757BD7E1D25DCBD1DB30")
set(WHISPER_CPP_HASH "ACA1DF8F34F4946B56FEED89B7548C9AD56D1DD89615C96BDEB6E4734A946451")
else()
message(
FATAL_ERROR
Expand Down Expand Up @@ -59,16 +59,16 @@ elseif(WIN32)
endif()

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-windows-${CUDA_PREFIX}-0.0.2.zip"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-windows-${CUDA_PREFIX}-0.0.3.zip"
)
if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(WHISPER_CPP_HASH "6DE628A51B9352624A1EC397231591FA3370E6BB42D9364F4F91F11DD18F77D2")
set(WHISPER_CPP_HASH "A7243E649E0B6D080AA6D2210DB0AC08C597FA11B88C3068B8A60083AD9E62EF")
elseif($ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(WHISPER_CPP_HASH "97BF58520F1818B7C9F4E996197F3097934E5E0BBA92B0B016C6B28BE9FF1642")
set(WHISPER_CPP_HASH "7957AC76A0E6517C95951B3BECCB554CD992E30DAF8716681B40F375590F69F1")
elseif($ENV{CPU_OR_CUDA} STREQUAL "12.2.0")
set(WHISPER_CPP_HASH "48C059A3364E0AAD9FB0D4194BA554865928D22A27ECE5E3C116DC672D5D6EDE")
set(WHISPER_CPP_HASH "0F6BC1F91C573A867D6972554FC29C3D8EAFD7994FA0FEBBEAFCF945DC8A9F41")
elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0")
set(WHISPER_CPP_HASH "29A5530E83896DE207F0199535CBBB24DF0D63B1373BA66139AD240BA67120EB")
set(WHISPER_CPP_HASH "51CB6750ADDF96F38106E4E88212FCC06500999E568E5A4EDC6D42CA6D7CA99D")
else()
message(
FATAL_ERROR
Expand Down Expand Up @@ -106,11 +106,11 @@ elseif(WIN32)
install(FILES ${WHISPER_DLLS} DESTINATION "obs-plugins/64bit")

else()
set(Whispercpp_Build_GIT_TAG "v1.5.5")
set(Whispercpp_Build_GIT_TAG "v1.6.2")
set(WHISPER_EXTRA_CXX_FLAGS "-fPIC")
set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=OFF -DWHISPER_OPENBLAS=OFF)

# On Linux and MacOS build a static Whisper library
# On Linux build a static Whisper library
ExternalProject_Add(
Whispercpp_Build
DOWNLOAD_EXTRACT_TIMESTAMP true
Expand All @@ -131,7 +131,7 @@ else()

ExternalProject_Get_Property(Whispercpp_Build INSTALL_DIR)

# on Linux and MacOS add the static Whisper library to the link line
# add the static Whisper library to the link line
add_library(Whispercpp::Whisper STATIC IMPORTED)
set_target_properties(
Whispercpp::Whisper
Expand Down
106 changes: 54 additions & 52 deletions src/whisper-utils/whisper-processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ struct whisper_context *init_whisper_context(const std::string &model_path_in,
cparams.use_gpu = false;
obs_log(LOG_INFO, "Using CPU for inference");
#endif
cparams.flash_attn = false;

struct whisper_context *ctx = nullptr;
try {
Expand Down Expand Up @@ -136,77 +137,78 @@ int run_whisper_inference(struct cleanstream_data *gf, const float *pcm32f_data,
int(pcm32f_size), float(pcm32f_size) / WHISPER_SAMPLE_RATE,
gf->whisper_params.n_threads);

std::string text_preproc;
float sentence_p = 0.0f;
int64_t t0;
int64_t t1;

// run the inference
int whisper_full_result = -1;
try {
gf->whisper_params.duration_ms =
(int)((float)pcm32f_size / WHISPER_SAMPLE_RATE * 1000.0f);
whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params,
pcm32f_data, (int)pcm32f_size);
} catch (const std::exception &e) {
obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what());
whisper_free(gf->whisper_context);
gf->whisper_context = nullptr;
return DETECTION_RESULT_UNKNOWN;
}
int whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params,
pcm32f_data, (int)pcm32f_size);

if (whisper_full_result != 0) {
obs_log(LOG_WARNING, "failed to process audio, error %d", whisper_full_result);
return DETECTION_RESULT_UNKNOWN;
} else {
if (whisper_full_result != 0) {
obs_log(LOG_WARNING, "failed to process audio, error %d",
whisper_full_result);
return DETECTION_RESULT_UNKNOWN;
}
if (whisper_full_n_segments(gf->whisper_context) == 0) {
return DETECTION_RESULT_SILENCE;
}
const int n_segment = 0;
const char *text = whisper_full_get_segment_text(gf->whisper_context, n_segment);
const int64_t t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment);
const int64_t t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment);
text_preproc = whisper_full_get_segment_text(gf->whisper_context, n_segment);
t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment);
t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment);

float sentence_p = 0.0f;
const int n_tokens = whisper_full_n_tokens(gf->whisper_context, n_segment);
for (int j = 0; j < n_tokens; ++j) {
sentence_p += whisper_full_get_token_p(gf->whisper_context, n_segment, j);
}
sentence_p /= (float)n_tokens;
} catch (const std::exception &e) {
obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what());
whisper_free(gf->whisper_context);
gf->whisper_context = nullptr;
return DETECTION_RESULT_UNKNOWN;
}

std::string text_preproc = text;

if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}
if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}

// if language is en convert text to lowercase
if (strcmp(gf->whisper_params.language, "en") == 0) {
std::string text_lower;
std::transform(text_preproc.begin(), text_preproc.end(), text_lower.begin(),
::tolower);
text_preproc = text_lower;
// remove leading and trailing non-alphanumeric characters
text_preproc = remove_leading_trailing_nonalpha(text_preproc);
} else {
// fix UTF8 encoding
std::string text_fixed = fix_utf8(text);
text_preproc = text_fixed;
}
// if language is en convert text to lowercase
if (strcmp(gf->whisper_params.language, "en") == 0) {
std::transform(text_preproc.begin(), text_preproc.end(), text_preproc.begin(),
::tolower);
// remove leading and trailing non-alphanumeric characters
text_preproc = remove_leading_trailing_nonalpha(text_preproc);
} else {
// fix UTF8 encoding
text_preproc = fix_utf8(text_preproc);
}

if (gf->log_words) {
obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(),
to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str());
}
if (gf->log_words) {
obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(),
to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str());
}

if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}
if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}

// use a regular expression to detect filler words with a word boundary
try {
if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) {
std::regex filler_regex(gf->detect_regex);
if (std::regex_search(text_preproc, filler_regex,
std::regex_constants::match_any)) {
return DETECTION_RESULT_BEEP;
}
// use a regular expression to detect filler words with a word boundary
try {
if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) {
std::regex filler_regex(gf->detect_regex);
if (std::regex_search(text_preproc, filler_regex,
std::regex_constants::match_any)) {
return DETECTION_RESULT_BEEP;
}
} catch (const std::regex_error &e) {
obs_log(LOG_ERROR, "Regex error: %s", e.what());
}
} catch (const std::regex_error &e) {
obs_log(LOG_ERROR, "Regex error: %s", e.what());
}

return DETECTION_RESULT_SPEECH;
Expand Down

0 comments on commit 62a05a4

Please sign in to comment.