Skip to content

Commit

Permalink
Apply inner segment boundary information with size 1, which may have …
Browse files Browse the repository at this point in the history
…better information for learning.

Example: Dictionary predictor may produce (all_value, content_value) = ("私の", "私の") with inner segment boundary { ("私の", "私") }.
PiperOrigin-RevId: 694010741
  • Loading branch information
Toshiyuki Hanaoka authored and hiroyuki-komatsu committed Nov 7, 2024
1 parent 1a0d98a commit a6ad2cf
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/data/test/renderer/win32/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

# Renderer test images.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")

package(default_visibility = ["//renderer/win32:__subpackages__"])

proto_library(
Expand Down
1 change: 1 addition & 0 deletions src/data_manager/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"mozc_cc_binary",
Expand Down
1 change: 1 addition & 0 deletions src/ipc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# Visibility: please choose a more appropriate default for the package,
# and update any rules that should be different.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"mozc_cc_binary",
Expand Down
1 change: 1 addition & 0 deletions src/prediction/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# Visibility: please choose a more appropriate default for the package,
# and update any rules that should be different.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"mozc_cc_binary",
Expand Down
2 changes: 1 addition & 1 deletion src/prediction/dictionary_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,7 @@ std::shared_ptr<Result> DictionaryPredictor::MaybeGetPreviousTopResult(
// 3. current key is shorter than previous key.
// 4. current key is the prefix of previous key.
// 5. current result is not a partial suggestion.
if (prev_top_result && cur_top_key_length > prev_top_key_length &&
if (prev_top_result && cur_top_key_length >= prev_top_key_length &&
std::abs(current_top_result.cost - prev_top_result->cost) < max_diff &&
current_top_result.key.size() < prev_top_result->key.size() &&
!(current_top_result.types & PREFIX) &&
Expand Down
1 change: 1 addition & 0 deletions src/protocol/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load("//bazel:stubs.bzl", "jspb_proto_library")

package(default_visibility = ["//visibility:public"])
Expand Down
35 changes: 35 additions & 0 deletions src/rewriter/environmental_filter_rewriter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ constexpr EmojiData kTestEmojiList[] = {
{"🪬", EmojiVersion::E14_0}, // 1FAAC
{"🫃", EmojiVersion::E14_0}, // 1FAC3
{"🫠", EmojiVersion::E14_0}, // 1FAE0

// Emoji 16.0 Example
{"🪏", EmojiVersion::E16_0}, // 1FA8F
{"🫆", EmojiVersion::E16_0}, // 1FAC6
{"🫟", EmojiVersion::E16_0}, // 1FADF
};

// This data manager overrides GetEmojiRewriterData() to return the above test
Expand Down Expand Up @@ -292,6 +297,36 @@ TEST_F(EnvironmentalFilterRewriterTest, EmojiFilterTest) {
}
}

TEST_F(EnvironmentalFilterRewriterTest, EmojiFilterE160Test) {
// Emoji 16.0 characters are filtered by default.
{
Segments segments;
const ConversionRequest request;

segments.Clear();
AddSegment("えもじ", {"🪏", "🫆", "🫟"}, &segments);

EXPECT_TRUE(rewriter_->Rewrite(request, &segments));
EXPECT_EQ(segments.conversion_segment(0).candidates_size(), 0);
}

// Emoji 16.0 characters are added when they are renderable.
{
commands::Request request;
request.add_additional_renderable_character_groups(
commands::Request::EMOJI_16_0);
ConversionRequest conversion_request;
Segments segments;
conversion_request.set_request(&request);

segments.Clear();
AddSegment("えもじ", {"🪏", "🫆", "🫟"}, &segments);

EXPECT_FALSE(rewriter_->Rewrite(conversion_request, &segments));
EXPECT_EQ(segments.conversion_segment(0).candidates_size(), 3);
}
}

TEST_F(EnvironmentalFilterRewriterTest, RemoveTest) {
Segments segments;
const ConversionRequest request;
Expand Down
2 changes: 1 addition & 1 deletion src/rewriter/user_segment_history_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ Segments UserSegmentHistoryRewriter::MakeLearningSegmentsFromInnerSegments(
Segments ret;
for (const Segment &segment : segments) {
const Segment::Candidate &candidate = segment.candidate(0);
if (candidate.inner_segment_boundary.size() <= 1) {
if (candidate.inner_segment_boundary.empty()) {
// No inner segment info
Segment *seg = ret.add_segment();
*seg = segment;
Expand Down
44 changes: 44 additions & 0 deletions src/rewriter/user_segment_history_rewriter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1787,6 +1787,50 @@ TEST_F(UserSegmentHistoryRewriterTest, SupportInnerSegmentsOnLearning) {
rewriter->Finish(convreq, &segments);
}

{
// Inner segment boundary with size 1 may have better information.
segments.Clear();
InitSegments(&segments, 1, 2);
constexpr absl::string_view kKey = "わたしの";
constexpr absl::string_view kValue = "私の";
segments.mutable_segment(0)->set_key(kKey);
Segment::Candidate *candidate =
segments.mutable_segment(0)->mutable_candidate(1);

candidate->value = kValue;
candidate->content_value = kValue;
candidate->key = kKey;
candidate->content_key = kKey;
// "わたしの, 私の", "わたし, 私"
candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
candidate->lid = 10;
candidate->rid = 10;

segments.mutable_segment(0)->move_candidate(1, 0);
segments.mutable_segment(0)->mutable_candidate(0)->attributes |=
Segment::Candidate::RERANKED;
segments.mutable_segment(0)->set_segment_type(Segment::FIXED_VALUE);

{
const Segments learning_segments = UserSegmentHistoryRewriterTestPeer::
MakeLearningSegmentsFromInnerSegments(segments);
EXPECT_EQ(learning_segments.segments_size(), 1);
EXPECT_EQ(learning_segments.segment(0).key(), "わたしの");
EXPECT_EQ(learning_segments.segment(0).candidate(0).key, "わたしの");
EXPECT_EQ(learning_segments.segment(0).candidate(0).value, "私の");
EXPECT_EQ(learning_segments.segment(0).candidate(0).content_key,
"わたし");
EXPECT_EQ(learning_segments.segment(0).candidate(0).content_value, "");
EXPECT_EQ(learning_segments.segment(0).candidate(0).lid, 10);
EXPECT_EQ(learning_segments.segment(0).candidate(0).rid, 10);
EXPECT_EQ(learning_segments.segment(0).segment_type(),
Segment::FIXED_VALUE);
}

ConversionRequest convreq = CreateConversionRequest();
rewriter->Finish(convreq, &segments);
}

{
segments.Clear();
InitSegments(&segments, 1, 2);
Expand Down
1 change: 1 addition & 0 deletions src/unix/ibus/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"mozc_cc_binary",
Expand Down
1 change: 1 addition & 0 deletions src/usage_stats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# and update any rules that should be different.

load("@bazel_skylib//rules:run_binary.bzl", "run_binary")
load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"mozc_cc_library",
Expand Down
1 change: 1 addition & 0 deletions src/win32/cache_service/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# Visibility: please choose a more appropriate default for the package,
# and update any rules that should be different.

load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load(
"//:build_defs.bzl",
"MOZC_TAGS",
Expand Down

0 comments on commit a6ad2cf

Please sign in to comment.