Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into fcitx
Browse files Browse the repository at this point in the history
  • Loading branch information
Fcitx Bot committed Jun 15, 2024
2 parents 8acc79f + 07ce456 commit e379550
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 234 deletions.
2 changes: 2 additions & 0 deletions src/converter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ mozc_cc_library(
"//request:conversion_request",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
],
)

Expand All @@ -203,6 +204,7 @@ mozc_cc_test(
":nbest_generator",
":node",
":segments",
":segments_matchers",
"//data_manager/testing:mock_data_manager",
"//dictionary:user_dictionary_stub",
"//engine:modules",
Expand Down
1 change: 1 addition & 0 deletions src/converter/immutable_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class ImmutableConverter : public ImmutableConverterInterface {
FRIEND_TEST(NBestGeneratorTest, InnerSegmentBoundary);
FRIEND_TEST(NBestGeneratorTest, MultiSegmentConnectionTest);
FRIEND_TEST(NBestGeneratorTest, SingleSegmentConnectionTest);
FRIEND_TEST(NBestGeneratorTest, NoPartialCandidateBetweenAlphabets);
friend class NBestGeneratorTest;

enum InsertCandidatesType {
Expand Down
12 changes: 12 additions & 0 deletions src/converter/nbest_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/ascii.h"
#include "base/vlog.h"
#include "converter/candidate_filter.h"
#include "converter/connector.h"
Expand All @@ -60,6 +61,13 @@ using ::mozc::dictionary::SuppressionDictionary;
constexpr int kFreeListSize = 512;
constexpr int kCostDiff = 3453; // log prob of 1/1000

bool IsBetweenAlphabets(const Node &left, const Node &right) {
DCHECK(!left.value.empty());
DCHECK(!right.value.empty());
return absl::ascii_isalpha(left.value.back()) &&
absl::ascii_isalpha(right.value.front());
}

} // namespace

const NBestGenerator::QueueElement *NBestGenerator::CreateNewElement(
Expand Down Expand Up @@ -274,6 +282,10 @@ CandidateFilter::ResultType NBestGenerator::MakeCandidateFromElement(
const QueueElement *elm = element->next;
for (; elm->next != nullptr; elm = elm->next) {
nodes.push_back(elm->node);
if (elm->next != nullptr &&
IsBetweenAlphabets(*elm->node, *elm->next->node)) {
continue;
}
if (segmenter_->IsBoundary(*elm->node, *elm->next->node, false)) {
break;
}
Expand Down
47 changes: 47 additions & 0 deletions src/converter/nbest_generator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@
#include "converter/lattice.h"
#include "converter/node.h"
#include "converter/segments.h"
#include "converter/segments_matchers.h"
#include "data_manager/testing/mock_data_manager.h"
#include "dictionary/user_dictionary_stub.h"
#include "engine/modules.h"
#include "request/conversion_request.h"
#include "testing/gmock.h"
#include "testing/gunit.h"

namespace mozc {
Expand Down Expand Up @@ -287,4 +289,49 @@ TEST_F(NBestGeneratorTest, InnerSegmentBoundary) {
EXPECT_EQ(content_values[2], "行きたい");
}

TEST_F(NBestGeneratorTest, NoPartialCandidateBetweenAlphabets) {
auto data_and_converter = std::make_unique<MockDataAndImmutableConverter>();
ImmutableConverter *converter = data_and_converter->GetConverter();

Segments segments;
const std::string kInput = "AAA";
{
Segment *segment = segments.add_segment();
segment->set_segment_type(Segment::FREE);
segment->set_key(kInput);
}

Lattice lattice;
lattice.SetKey(kInput);
ConversionRequest request;
request.set_request_type(ConversionRequest::PREDICTION);
converter->MakeLattice(request, &segments, &lattice);

std::vector<uint16_t> group;
converter->MakeGroup(segments, &group);
converter->Viterbi(segments, &lattice);

std::unique_ptr<NBestGenerator> nbest_generator =
data_and_converter->CreateNBestGenerator(&lattice);

constexpr bool kSingleSegment = true; // For real time conversion
const Node *begin_node = lattice.bos_nodes();
const Node *end_node = GetEndNode(request, *converter, segments, *begin_node,
group, kSingleSegment);

// Since the test dictionary contains "A", partial candidates "A" and "AA" can
// be generated but they should be suppressed because they are split between
// alphabets.
const NBestGenerator::Options options = {
.boundary_mode = NBestGenerator::ONLY_EDGE,
.candidate_mode = NBestGenerator::BUILD_FROM_ONLY_FIRST_INNER_SEGMENT |
NBestGenerator::FILL_INNER_SEGMENT_INFO,
};
nbest_generator->Reset(begin_node, end_node, options);
Segment result_segment;
nbest_generator->SetCandidates(request, "", 10, &result_segment);
EXPECT_THAT(result_segment, HasSingleCandidate(::testing::Field(
"value", &Segment::Candidate::value, "AAA")));
}

} // namespace mozc
6 changes: 0 additions & 6 deletions src/prediction/dictionary_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1464,12 +1464,6 @@ DictionaryPredictor::GetTypingCorrectionMixingParams(
typing_corrected_results);
}

// Literal-at-least-second parameter is now defined as flag.
typing_correction_mixing_params.literal_at_least_second =
request.request()
.decoder_experiment_params()
.typing_correction_literal_at_least_second();

return typing_correction_mixing_params;
}

Expand Down
114 changes: 2 additions & 112 deletions src/protocol/commands.proto
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ message Capability {
[default = NO_TEXT_DELETION_CAPABILITY];
}

// Next ID: 80
// Next ID: 82
// Bundles together some Android experiment flags so that they can be easily
// retrieved throughout the native code. These flags are generally specific to
// the decoder, and are made available when the decoder is initialized.
Expand All @@ -580,13 +580,6 @@ message DecoderExperimentParams {

optional int32 mobile_history_prediction_size = 2;

reserved 3; // Deprecated enable_strict_candidate_filter

reserved 4; // Deprecated enable_new_spatial_scoring
reserved 5; // Deprecated spatial_cost_penalty
reserved 6; // Deprecated spatial_cost_penalty_min_char_length
reserved 7; // Deprecated enrich_partial_candidates

enum VariationCharacterType {
NO_VARIATION = 0;
// Standardized variation sequences for Japanese.
Expand All @@ -598,56 +591,6 @@ message DecoderExperimentParams {
// Bitmap of enabled variation character types.
optional uint32 variation_character_types = 8 [default = 0];

reserved 9; // Deprecated enable_number_decoder
reserved 10; // Deprecated cancel_segment_model_penalty_for_prediction
reserved 11; // Deprecated enable_environmental_filter_rewriter
reserved 12; // Deprecated undo_partial_commit
reserved 13; // Deprecated use_actual_converter_for_realtime_conversion
reserved 14; // Deprecated enable_single_kanji_prediction
reserved 15; // Deprecated single_kanji_prediction_cost_offset
reserved 17; // Deprecated typing_correction_max_count
reserved 18; // Deprecated typing_correction_max_rank

// Parameters for typing correction candidates
optional float typing_correction_identity_score_max_diff = 21 [default = 0.0];
optional float typing_correction_top_score_max_diff = 22 [default = 1.0];

reserved 38; // Deprecated typing_correction_conversion_cost_max_diff
reserved 46; // Deprecated enable_typing_correction_mixer_v2
reserved 47; // Deprecated
// typing_correction_literal_on_top_correction_score_max_diff
reserved 48; // Deprecated
// typing_correction_literal_on_top_conversion_cost_max_diff

// Trigger literal on top if
// correction_score <=
// typing_correction_literal_on_top_length_score_max_diff
// * (typing_correction_literal_on_top_length_decay^(input_length-3))
// Literal candidate is placed at least second position.
optional bool typing_correction_literal_at_least_second = 49
[default = false];
// Character-length based triggering.
optional float typing_correction_literal_on_top_length_score_max_diff = 56
[default = 0.0];
optional float typing_correction_literal_on_top_length_decay = 57
[default = 0.0];

// Parameters for completion
// The diff against the identity score.
optional float typing_completion_identity_score_max_diff = 39
[default = -1.0];
// The maximum number of characters to complete. If zero, completion is
// disabled.
optional int32 typing_completion_max_length = 40 [default = 0];
// Starts the completion when |query| >= start_length.
optional int32 typing_completion_start_length = 41 [default = 4];

reserved 16; // Deprecated use_typing_correction_diff_cost
reserved 19; // Deprecated typing_correction_cost_offset
reserved 20; // Deprecated cancel_content_word_suffix_penalty
reserved 23; // Deprecated typing_correction_score_offset
reserved 24; // Deprecated typing_correction_move_literal_candidate_to_top
reserved 30; // Deprecated enable_number_style_learning

optional bool disable_zero_query_suffix_prediction = 36 [default = false];

Expand Down Expand Up @@ -701,62 +644,9 @@ message DecoderExperimentParams {

optional int32 max_composition_event_to_process = 64 [default = 2];

// Fix the bug on the literal_on_top correction.
// Fix the bug on the literal_on_top correction.
optional bool fix_literal_on_top = 65 [default = false];

// Adds kana modifier insensitive corrections even when the score is smaller
// than identity score. This will increase the coverage. Adds only when
// top_score - correction_score < max_diff.
optional float kana_modifier_insensitive_corrections_top_score_max_diff = 66
[default = 0.0];
// Extra penalty added to the corrections.
optional float kana_modifier_insensitive_corrections_penalty = 67
[default = 0.0];

optional float typing_correction_reranker_max_score_diff_override = 68
[default = 0.0];
// When the top correction is kana_modifier_insensitive_correction, suppress
// other corrections.
optional bool typing_correction_promote_kana_modifier_insensitive_only = 69
[default = false];

// Penalty for the first character mismatch.
optional float typing_correction_first_char_mismatch_penalty = 70
[default = 0.0];

// Penalties for the modifier corrections when intended modifiers exist.
// `local` is for the same modifier type, global ignores the modifier type.
optional float typing_correction_intended_modifier_local_penalty = 71
[default = 0.0];
optional float typing_correction_intended_modifier_global_penalty = 72
[default = 0.0];
// exponential decay factor to assign larger penalty when
// intended and added modifiers are closely located.
// The actual penalty is computed as
// (global|local)_penalty * decay^(distance - 1);
optional float typing_correction_intended_modifier_decay = 73 [default = 0.0];

// New triggering model uses zero threshold and dynamic penalty to define the
// triggering condition. This flag is used to define all legacy thresholds as
// penalty values.
optional bool typing_correction_threshold_as_penalty = 74 [default = false];

// Post correction parameters.
// Maximum context length. Note this is not a character length, but the
// token length. Larger size would yield larger latency penalties.
// When zero, default setting is used.
optional int32 post_correction_max_context_size = 75 [default = 0];

// Maximum number of unique corrections. When zero, default setting is used.
optional int32 post_correction_max_correction_size = 76 [default = 0];

// Maximum number of corrections per query. When zero, default setting is
// used.
optional int32 post_correction_nbest_size = 79 [default = 0];

// Uses the surrounding context information fed from the client.
optional bool post_correction_use_surrounding_context = 77 [default = false];

// Remove user history prediction entry when its selected ratio is lower
// than the value.
optional float user_history_prediction_min_selected_ratio = 78
Expand Down
Loading

0 comments on commit e379550

Please sign in to comment.