Skip to content

Commit

Permalink
Bump whisper.cpp. Simple settings mode (#60)
Browse files Browse the repository at this point in the history
* bump whispercpp, simple settings mode

* lint
  • Loading branch information
royshil authored Dec 21, 2023
1 parent 8c02e0c commit b45b235
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 44 deletions.
4 changes: 2 additions & 2 deletions buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
}
},
"name": "obs-localvocal",
"version": "0.0.7",
"version": "0.0.8",
"author": "Roy Shilkrot",
"website": "https://github.com/obs-ai/obs-localvocal",
"website": "https://github.com/occ-ai/obs-localvocal",
"email": "[email protected]",
"uuids": {
"macosPackage": "CB66E5DF-FF45-4BEA-B38B-7AD3705860C9",
Expand Down
2 changes: 1 addition & 1 deletion cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ include(ExternalProject)

set(CMAKE_OSX_ARCHITECTURES_ "arm64$<SEMICOLON>x86_64")

set(Whispercpp_Build_GIT_TAG "ec7a6f04f9c32adec2e6b0995b8c728c5bf56f35")
set(Whispercpp_Build_GIT_TAG "8986690c2a7b81b2b5d79cdc186b5aa672311740")

if(${CMAKE_BUILD_TYPE} STREQUAL Release OR ${CMAKE_BUILD_TYPE} STREQUAL RelWithDebInfo)
set(Whispercpp_BUILD_TYPE Release)
Expand Down
11 changes: 6 additions & 5 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
LocalVocalPlugin="LocalVocal Plugin"
transcription_filterAudioFilter="LocalVocal Transcription"
vad_enabled="VAD Enabled"
log_level="Log Level"
log_words="Log Output Words"
log_level="Internal Log Level"
log_words="Log Output to Console"
caption_to_stream="Stream Captions"
step_by_step_processing="Step-by-step processing (⚠️ processing will increase)"
step_by_step_processing="Step-by-step processing (⚠️ increased processing)"
step_size_msec="Step size (ms)"
subtitle_sources="Subtitles Output"
none_no_output="None / No output"
text_file_output="Text File output"
output_filename="Output filename"
whisper_model="Whisper Model"
external_model_file="External model file"
whisper_parameters="Whisper Parameters"
whisper_parameters="Advanced Settings"
language="Language"
whisper_sampling_method="Whisper Sampling Method"
n_threads="Number of threads"
Expand Down Expand Up @@ -41,4 +41,5 @@ save_srt="Save in SRT format (no file truncation)"
only_while_recording="Write output only while recording"
process_while_muted="Process speech while source is muted"
rename_file_to_match_recording="Rename file to match recording"
min_sub_duration="Minimal subtitle duration (msec)"
min_sub_duration="Min. sub duration (ms)"
advanced_settings="Advanced Settings"
42 changes: 31 additions & 11 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ struct transcription_filter_data {
struct circlebuf input_buffers[MAX_PREPROC_CHANNELS];

/* Resampler */
audio_resampler_t *resampler = nullptr;
audio_resampler_t *resampler;

/* whisper */
char *whisper_model_path = nullptr;
struct whisper_context *whisper_context = nullptr;
char *whisper_model_path;
struct whisper_context *whisper_context;
whisper_full_params whisper_params;

float filler_p_threshold;
Expand All @@ -81,21 +81,41 @@ struct transcription_filter_data {
bool rename_file_to_match_recording = false;

// Text source to output the subtitles
obs_weak_source_t *text_source = nullptr;
char *text_source_name = nullptr;
std::mutex *text_source_mutex = nullptr;
obs_weak_source_t *text_source;
char *text_source_name;
std::mutex *text_source_mutex;
// Callback to set the text in the output text source (subtitles)
std::function<void(const DetectionResultWithText &result)> setTextCallback;
// Output file path to write the subtitles
std::string output_file_path = "";
std::string whisper_model_file_currently_loaded = "";
std::string output_file_path;
std::string whisper_model_file_currently_loaded;

// Use std for thread and mutex
std::thread whisper_thread;

std::mutex *whisper_buf_mutex = nullptr;
std::mutex *whisper_ctx_mutex = nullptr;
std::condition_variable *wshiper_thread_cv = nullptr;
std::mutex *whisper_buf_mutex;
std::mutex *whisper_ctx_mutex;
std::condition_variable *wshiper_thread_cv;

// ctor
transcription_filter_data()
{
// initialize all pointers to nullptr
for (size_t i = 0; i < MAX_PREPROC_CHANNELS; i++) {
copy_buffers[i] = nullptr;
}
context = nullptr;
resampler = nullptr;
whisper_model_path = nullptr;
whisper_context = nullptr;
text_source = nullptr;
text_source_mutex = nullptr;
whisper_buf_mutex = nullptr;
whisper_ctx_mutex = nullptr;
wshiper_thread_cv = nullptr;
output_file_path = "";
whisper_model_file_currently_loaded = "";
}
};

// Audio packet info
Expand Down
60 changes: 36 additions & 24 deletions src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <Windows.h>
#endif

#include <QString>

inline enum speaker_layout convert_speaker_layout(uint8_t channels)
{
switch (channels) {
Expand Down Expand Up @@ -125,19 +127,7 @@ void transcription_filter_destroy(void *data)
static_cast<struct transcription_filter_data *>(data);

obs_log(gf->log_level, "transcription_filter_destroy");
{
std::lock_guard<std::mutex> lock(*gf->whisper_ctx_mutex);
if (gf->whisper_context != nullptr) {
whisper_free(gf->whisper_context);
gf->whisper_context = nullptr;
gf->wshiper_thread_cv->notify_all();
}
}

// join the thread
if (gf->whisper_thread.joinable()) {
gf->whisper_thread.join();
}
shutdown_whisper_thread(gf);

if (gf->text_source_name) {
bfree(gf->text_source_name);
Expand Down Expand Up @@ -448,14 +438,14 @@ void transcription_filter_update(void *data, obs_data_t *s)
obs_weak_source_release(old_weak_text_source);
}

obs_log(gf->log_level, "transcription_filter: update whisper model");
update_whsiper_model_path(gf, s);

if (!gf->whisper_ctx_mutex) {
if (gf->whisper_ctx_mutex == nullptr) {
obs_log(LOG_ERROR, "whisper_ctx_mutex is null");
return;
}

obs_log(gf->log_level, "transcription_filter: update whisper model");
update_whsiper_model_path(gf, s);

obs_log(gf->log_level, "transcription_filter: update whisper params");
std::lock_guard<std::mutex> lock(*gf->whisper_ctx_mutex);

Expand Down Expand Up @@ -492,7 +482,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
{
obs_log(LOG_INFO, "transcription filter create");

struct transcription_filter_data *gf = new transcription_filter_data;
struct transcription_filter_data *gf = new transcription_filter_data();

// Get the number of channels for the input source
gf->channels = audio_output_get_channels(obs_get_audio());
Expand Down Expand Up @@ -648,6 +638,7 @@ void transcription_filter_defaults(obs_data_t *s)
obs_data_set_default_bool(s, "rename_file_to_match_recording", true);
obs_data_set_default_int(s, "step_size_msec", 1000);
obs_data_set_default_int(s, "min_sub_duration", 3000);
obs_data_set_default_bool(s, "advanced_settings", false);

// Whisper parameters
obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH);
Expand Down Expand Up @@ -684,12 +675,6 @@ obs_properties_t *transcription_filter_properties(void *data)

obs_properties_t *ppts = obs_properties_create();

obs_properties_add_bool(ppts, "vad_enabled", MT_("vad_enabled"));
obs_property_t *list = obs_properties_add_list(ppts, "log_level", MT_("log_level"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
obs_property_list_add_int(list, "INFO", LOG_INFO);
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
obs_properties_add_bool(ppts, "log_words", MT_("log_words"));
obs_properties_add_bool(ppts, "caption_to_stream", MT_("caption_to_stream"));
obs_property_t *step_by_step_processing = obs_properties_add_bool(
Expand Down Expand Up @@ -799,10 +784,31 @@ obs_properties_t *transcription_filter_properties(void *data)
return true;
});

obs_property_t *advanced_settings_prop =
obs_properties_add_bool(ppts, "advanced_settings", MT_("advanced_settings"));
obs_property_set_modified_callback(advanced_settings_prop, [](obs_properties_t *props,
obs_property_t *property,
obs_data_t *settings) {
UNUSED_PARAMETER(property);
// If advanced settings is enabled, show the advanced settings group
const bool show_hide = obs_data_get_bool(settings, "advanced_settings");
obs_property_set_visible(obs_properties_get(props, "whisper_params_group"),
show_hide);
return true;
});

obs_properties_t *whisper_params_group = obs_properties_create();
obs_properties_add_group(ppts, "whisper_params_group", MT_("whisper_parameters"),
OBS_GROUP_NORMAL, whisper_params_group);

obs_properties_add_bool(whisper_params_group, "vad_enabled", MT_("vad_enabled"));
obs_property_t *list = obs_properties_add_list(whisper_params_group, "log_level",
MT_("log_level"), OBS_COMBO_TYPE_LIST,
OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
obs_property_list_add_int(list, "INFO", LOG_INFO);
obs_property_list_add_int(list, "WARNING", LOG_WARNING);

// Add language selector
obs_property_t *whisper_language_select_list = obs_properties_add_list(
whisper_params_group, "whisper_language_select", MT_("language"),
Expand Down Expand Up @@ -885,6 +891,12 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_add_float_slider(whisper_params_group, "length_penalty",
MT_("length_penalty"), -1.0f, 1.0f, 0.1f);

// Add a informative text about the plugin
obs_properties_add_text(
ppts, "info",
QString(PLUGIN_INFO_TEMPLATE).arg(PLUGIN_VERSION).toStdString().c_str(),
OBS_TEXT_INFO);

UNUSED_PARAMETER(data);
return ppts;
}
5 changes: 5 additions & 0 deletions src/transcription-filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ void transcription_filter_deactivate(void *data);
void transcription_filter_defaults(obs_data_t *s);
obs_properties_t *transcription_filter_properties(void *data);

const char *const PLUGIN_INFO_TEMPLATE =
"<a href=\"https://github.com/occ-ai/obs-localvocal/\">LocalVocal</a> (%1) by "
"<a href=\"https://github.com/occ-ai\">OCC AI</a> ❤️ "
"<a href=\"https://www.patreon.com/RoyShilkrot\">Support & Follow</a>";

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion src/whisper-utils/whisper-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf)
void start_whisper_thread_with_path(struct transcription_filter_data *gf, const std::string &path)
{
obs_log(gf->log_level, "start_whisper_thread_with_path: %s", path.c_str());
if (!gf->whisper_ctx_mutex) {
if (gf->whisper_ctx_mutex == nullptr) {
obs_log(LOG_ERROR, "cannot init whisper: whisper_ctx_mutex is null");
return;
}
Expand Down

0 comments on commit b45b235

Please sign in to comment.