Skip to content

Commit

Permalink
refactor: Update buffer_output_type translations in locale files (#119)
Browse files Browse the repository at this point in the history
* refactor: Update buffer_output_type translations in locale files

* refactor: Update buffer_num_chars_per_line translation in locale files

* refactor: Remove unused code related to buffer output type selection

* refactor: Update TokenBufferThread to use TokenBufferString for caption building

* refactor: Update TokenBufferThread to use TokenBufferString for caption building
  • Loading branch information
royshil authored Jun 26, 2024
1 parent db13750 commit 958266f
Show file tree
Hide file tree
Showing 17 changed files with 211 additions and 66 deletions.
1 change: 1 addition & 0 deletions data/locale/ar-SA.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="الترجمة مع السياق"
whisper_translate="ترجمة إلى الإنجليزية (Whisper)"
buffer_size_msec="حجم الذاكرة المؤقتة (ملي ثانية)"
overlap_size_msec="حجم التداخل (ملي ثانية)"
buffer_output_type="نوع مخرجات الذاكرة المؤقتة"
1 change: 1 addition & 0 deletions data/locale/de-DE.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Mit Kontext übersetzen"
whisper_translate="Ins Englische übersetzen (Flüstern)"
buffer_size_msec="Puffergröße (ms)"
overlap_size_msec="Überlappungsgröße (ms)"
buffer_output_type="Pufferausgabetyp"
3 changes: 2 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,5 @@ translation_no_repeat_ngram_size="No-repeat ngram size"
translation_max_input_length="Max input length"
buffered_output_parameters="Buffered output parameters"
buffer_num_lines="Number of lines"
buffer_num_chars_per_line="Characters per line"
buffer_num_chars_per_line="Amount per line"
buffer_output_type="Output type"
1 change: 1 addition & 0 deletions data/locale/es-ES.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traducir con contexto"
whisper_translate="Traducir al inglés (Whisper)"
buffer_size_msec="Tamaño del búfer (ms)"
overlap_size_msec="Tamaño de superposición (ms)"
buffer_output_type="Tipo de salida de búfer"
1 change: 1 addition & 0 deletions data/locale/fr-FR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traduire avec contexte"
whisper_translate="Traduire en anglais (Whisper)"
buffer_size_msec="Taille du tampon (ms)"
overlap_size_msec="Taille de chevauchement (ms)"
buffer_output_type="Type de sortie du tampon"
1 change: 1 addition & 0 deletions data/locale/hi-IN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="संदर्भ के साथ अनुवाद
whisper_translate="अंग्रेजी में अनुवाद करें (व्हिस्पर)"
buffer_size_msec="बफ़र आकार (ms)"
overlap_size_msec="ओवरलैप आकार (ms)"
buffer_output_type="बफ़र आउटपुट प्रकार"
1 change: 1 addition & 0 deletions data/locale/ja-JP.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="コンテキスト付きで翻訳"
whisper_translate="英語に翻訳(ウィスパー)"
buffer_size_msec="バッファサイズ(ms)"
overlap_size_msec="オーバーラップサイズ(ms)"
buffer_output_type="バッファ出力タイプ"
1 change: 1 addition & 0 deletions data/locale/ko-KR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="컨텍스트와 함께 번역"
whisper_translate="영어로 번역 (속삭임)"
buffer_size_msec="버퍼 크기 (ms)"
overlap_size_msec="오버랩 크기 (ms)"
buffer_output_type="버퍼 출력 유형"
1 change: 1 addition & 0 deletions data/locale/pl-PL.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Tłumacz z kontekstem"
whisper_translate="Tłumacz na angielski (Whisper)"
buffer_size_msec="Rozmiar bufora (ms)"
overlap_size_msec="Rozmiar nakładki (ms)"
buffer_output_type="Typ wyjścia bufora"
1 change: 1 addition & 0 deletions data/locale/pt-BR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="Traduzir com contexto"
whisper_translate="Traduzir para inglês (Whisper)"
buffer_size_msec="Tamanho do buffer (ms)"
overlap_size_msec="Tamanho da sobreposição (ms)"
buffer_output_type="Tipo de saída do buffer"
1 change: 1 addition & 0 deletions data/locale/ru-RU.ini
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ translate_add_context="Перевести с контекстом"
whisper_translate="Перевести на английский (Whisper)"
buffer_size_msec="Размер буфера (мс)"
overlap_size_msec="Размер перекрытия (мс)"
buffer_output_type="Тип выходных данных буфера"
1 change: 1 addition & 0 deletions data/locale/zh-CN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ translate_add_context="带上下文翻译"
whisper_translate="翻译为英语(Whisper)"
buffer_size_msec="缓冲区大小(毫秒)"
overlap_size_msec="重叠大小(毫秒)"
buffer_output_type="缓冲区输出类型"
2 changes: 2 additions & 0 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ struct transcription_filter_data {
TokenBufferThread captions_monitor;
int buffered_output_num_lines = 2;
int buffered_output_num_chars = 30;
TokenBufferSegmentation buffered_output_output_type =
TokenBufferSegmentation::SEGMENTATION_TOKEN;

// ctor
transcription_filter_data() : whisper_buf_mutex(), whisper_ctx_mutex(), wshiper_thread_cv()
Expand Down
78 changes: 52 additions & 26 deletions src/transcription-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ void transcription_filter_destroy(void *data)

void transcription_filter_update(void *data, obs_data_t *s)
{
obs_log(LOG_INFO, "LocalVocal filter update");
struct transcription_filter_data *gf =
static_cast<struct transcription_filter_data *>(data);
obs_log(gf->log_level, "LocalVocal filter update");

gf->log_level = (int)obs_data_get_int(s, "log_level");
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
Expand All @@ -188,11 +188,13 @@ void transcription_filter_update(void *data, obs_data_t *s)
bool new_buffered_output = obs_data_get_bool(s, "buffered_output");
int new_buffer_num_lines = (int)obs_data_get_int(s, "buffer_num_lines");
int new_buffer_num_chars_per_line = (int)obs_data_get_int(s, "buffer_num_chars_per_line");
TokenBufferSegmentation new_buffer_output_type =
(TokenBufferSegmentation)obs_data_get_int(s, "buffer_output_type");

if (new_buffered_output) {
obs_log(LOG_INFO, "buffered_output enable");
obs_log(gf->log_level, "buffered_output enable");
if (!gf->buffered_output || !gf->captions_monitor.isEnabled()) {
obs_log(LOG_INFO, "buffered_output currently disabled, enabling");
obs_log(gf->log_level, "buffered_output currently disabled, enabling");
gf->buffered_output = true;
gf->captions_monitor.initialize(
gf,
Expand All @@ -203,18 +205,23 @@ void transcription_filter_update(void *data, obs_data_t *s)
}
},
new_buffer_num_lines, new_buffer_num_chars_per_line,
std::chrono::seconds(10));
std::chrono::seconds(3), new_buffer_output_type);
} else {
if (new_buffer_num_lines != gf->buffered_output_num_lines ||
new_buffer_num_chars_per_line != gf->buffered_output_num_chars) {
obs_log(LOG_INFO, "buffered_output parameters changed, updating");
new_buffer_num_chars_per_line != gf->buffered_output_num_chars ||
new_buffer_output_type != gf->buffered_output_output_type) {
obs_log(gf->log_level,
"buffered_output parameters changed, updating");
gf->captions_monitor.clear();
gf->captions_monitor.setNumSentences(new_buffer_num_lines);
gf->captions_monitor.setNumPerSentence(
new_buffer_num_chars_per_line);
gf->buffered_output_num_lines = new_buffer_num_lines;
gf->buffered_output_num_chars = new_buffer_num_chars_per_line;
gf->captions_monitor.setSegmentation(new_buffer_output_type);
}
}
gf->buffered_output_num_lines = new_buffer_num_lines;
gf->buffered_output_num_chars = new_buffer_num_chars_per_line;
gf->buffered_output_output_type = new_buffer_output_type;
} else {
obs_log(gf->log_level, "buffered_output disable");
if (gf->buffered_output) {
Expand Down Expand Up @@ -349,13 +356,23 @@ void transcription_filter_update(void *data, obs_data_t *s)
}
}

if (gf->initial_creation && gf->context != nullptr && obs_source_enabled(gf->context)) {
obs_log(LOG_INFO, "Initial filter creation and source enabled");
if (gf->context != nullptr && obs_source_enabled(gf->context)) {
if (gf->initial_creation) {
obs_log(LOG_INFO, "Initial filter creation and source enabled");

// source was enabled on creation
update_whisper_model(gf);
gf->active = true;
gf->initial_creation = false;
// source was enabled on creation
update_whisper_model(gf);
gf->active = true;
gf->initial_creation = false;
} else {
// check if the whisper model selection has changed
const std::string new_model_path =
obs_data_get_string(s, "whisper_model_path");
if (gf->whisper_model_path != new_model_path) {
obs_log(LOG_INFO, "New model selected: %s", new_model_path.c_str());
update_whisper_model(gf);
}
}
}
}

Expand Down Expand Up @@ -506,9 +523,11 @@ void transcription_filter_defaults(obs_data_t *s)
obs_data_set_default_bool(s, "buffered_output", false);
obs_data_set_default_int(s, "buffer_num_lines", 2);
obs_data_set_default_int(s, "buffer_num_chars_per_line", 30);
obs_data_set_default_int(s, "buffer_output_type",
(int)TokenBufferSegmentation::SEGMENTATION_TOKEN);

obs_data_set_default_bool(s, "vad_enabled", true);
obs_data_set_default_double(s, "vad_threshold", 0.5);
obs_data_set_default_double(s, "vad_threshold", 0.65);
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
obs_data_set_default_bool(s, "log_words", false);
obs_data_set_default_bool(s, "caption_to_stream", false);
Expand Down Expand Up @@ -669,6 +688,16 @@ obs_properties_t *transcription_filter_properties(void *data)
return true;
});

// Add language selector
obs_property_t *whisper_language_select_list =
obs_properties_add_list(ppts, "whisper_language_select", MT_("language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// iterate over all available languages and add them to the list
for (auto const &pair : whisper_available_lang_reverse) {
obs_property_list_add_string(whisper_language_select_list, pair.first.c_str(),
pair.second.c_str());
}

// add translation option group
obs_properties_t *translation_group = obs_properties_create();
obs_property_t *translation_group_prop = obs_properties_add_group(
Expand Down Expand Up @@ -806,7 +835,8 @@ obs_properties_t *transcription_filter_properties(void *data)
{"whisper_params_group", "log_words", "caption_to_stream", "buffer_size_msec",
"overlap_size_msec", "step_by_step_processing", "min_sub_duration",
"process_while_muted", "buffered_output", "vad_enabled", "log_level",
"suppress_sentences", "sentence_psum_accept_thresh", "vad_threshold"}) {
"suppress_sentences", "sentence_psum_accept_thresh", "vad_threshold",
"buffered_output_group"}) {
obs_property_set_visible(obs_properties_get(props, prop_name.c_str()),
show_hide);
}
Expand All @@ -820,6 +850,12 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_t *buffered_output_group = obs_properties_create();
obs_properties_add_group(ppts, "buffered_output_group", MT_("buffered_output_parameters"),
OBS_GROUP_NORMAL, buffered_output_group);
// add buffer "type" character or word
obs_property_t *buffer_type_list = obs_properties_add_list(
buffered_output_group, "buffer_output_type", MT_("buffer_output_type"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(buffer_type_list, "Character", SEGMENTATION_TOKEN);
obs_property_list_add_int(buffer_type_list, "Word", SEGMENTATION_WORD);
// add buffer lines parameter
obs_properties_add_int_slider(buffered_output_group, "buffer_num_lines",
MT_("buffer_num_lines"), 1, 5, 1);
Expand Down Expand Up @@ -868,16 +904,6 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_add_group(ppts, "whisper_params_group", MT_("whisper_parameters"),
OBS_GROUP_NORMAL, whisper_params_group);

// Add language selector
obs_property_t *whisper_language_select_list = obs_properties_add_list(
whisper_params_group, "whisper_language_select", MT_("language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// iterate over all available languages and add them to the list
for (auto const &pair : whisper_available_lang_reverse) {
obs_property_list_add_string(whisper_language_select_list, pair.first.c_str(),
pair.second.c_str());
}

obs_property_t *whisper_sampling_method_list = obs_properties_add_list(
whisper_params_group, "whisper_sampling_method", MT_("whisper_sampling_method"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
Expand Down
16 changes: 16 additions & 0 deletions src/transcription-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <string>
#include <vector>
#include <chrono>
#include <algorithm>
#include <cctype>

// Fix UTF8 string for Windows
std::string fix_utf8(const std::string &str);
Expand All @@ -25,4 +27,18 @@ inline uint64_t now_ms()
// Split a string into words based on spaces
std::vector<std::string> split_words(const std::string &str_copy);

// trim (strip) string from leading and trailing whitespaces
template<typename StringLike> StringLike trim(const StringLike &str)
{
StringLike str_copy = str;
str_copy.erase(str_copy.begin(),
std::find_if(str_copy.begin(), str_copy.end(),
[](unsigned char ch) { return !std::isspace(ch); }));
str_copy.erase(std::find_if(str_copy.rbegin(), str_copy.rend(),
[](unsigned char ch) { return !std::isspace(ch); })
.base(),
str_copy.end());
return str_copy;
}

#endif // TRANSCRIPTION_UTILS_H
Loading

0 comments on commit 958266f

Please sign in to comment.