Skip to content

Commit

Permalink
Merge pull request #683 from Automattic/harperjs-mutable-dictionary
Browse files Browse the repository at this point in the history
feat(harper.js): allow modification to the dictionary.
  • Loading branch information
elijah-potter authored Feb 17, 2025
2 parents b1e4a31 + e9e76c2 commit 1a6010e
Show file tree
Hide file tree
Showing 14 changed files with 173 additions and 45 deletions.
1 change: 0 additions & 1 deletion harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -34026,7 +34026,6 @@ notional/51Y
notoriety/1M
notorious/5Y
notwithstanding/7+
notwork/1S
nougat/1MS
noun/14KMS
nourish/14DSLG
Expand Down
2 changes: 0 additions & 2 deletions harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use paste::paste;
use serde::{Deserialize, Serialize};

//

use super::an_a::AnA;
use super::avoid_curses::AvoidCurses;
use super::boring_words::BoringWords;
Expand Down
3 changes: 3 additions & 0 deletions harper-core/src/spell/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ pub trait Dictionary: Send + Sync {
/// Iterate over the words in the dictionary.
fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_>;

/// The number of words in the dictionary.
fn word_count(&self) -> usize;

/// Iterate over all the words in the dictionary of a given length
fn words_with_len_iter(&self, len: usize) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_>;
}
4 changes: 4 additions & 0 deletions harper-core/src/spell/fst_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ impl Dictionary for FstDictionary {
self.full_dict.words_with_len_iter(len)
}

fn word_count(&self) -> usize {
self.full_dict.word_count()
}

fn contains_exact_word(&self, word: &[char]) -> bool {
self.full_dict.contains_exact_word(word)
}
Expand Down
4 changes: 4 additions & 0 deletions harper-core/src/spell/merged_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,8 @@ impl Dictionary for MergedDictionary {
.take(max_results)
.collect()
}

fn word_count(&self) -> usize {
self.children.iter().map(|d| d.word_count()).sum()
}
}
8 changes: 7 additions & 1 deletion harper-core/src/spell/mutable_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ impl MutableDictionary {
) {
let pairs: Vec<_> = words
.into_iter()
.map(|(v, m)| (v.as_ref().to_smallvec(), m))
.filter_map(|(v, m)| {
(!self.contains_word(v.as_ref())).then(|| (v.as_ref().to_smallvec(), m))
})
.collect();

self.words.extend(pairs.iter().map(|(v, _)| v.clone()));
Expand Down Expand Up @@ -284,6 +286,10 @@ impl Dictionary for MutableDictionary {
Box::new(self.words[start..end].iter().map(|v| v.as_slice()))
}

fn word_count(&self) -> usize {
self.words.len()
}

fn contains_exact_word(&self, word: &[char]) -> bool {
self.word_map.contains_key(seq_to_normalized(word).as_ref())
}
Expand Down
62 changes: 58 additions & 4 deletions harper-wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use std::sync::Arc;
use harper_core::language_detection::is_doc_likely_english;
use harper_core::linting::{LintGroup, LintGroupConfig, Linter as _};
use harper_core::parsers::{IsolateEnglish, Markdown, Parser, PlainEnglish};
use harper_core::{remove_overlaps, Document, FstDictionary, IgnoredLints, Lrc, MutableDictionary};
use harper_core::{
remove_overlaps, CharString, Dictionary, Document, FstDictionary, IgnoredLints, Lrc,
MergedDictionary, MutableDictionary, WordMetadata,
};
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::wasm_bindgen;
use wasm_bindgen::JsValue;
Expand Down Expand Up @@ -61,8 +64,12 @@ impl Language {

#[wasm_bindgen]
pub struct Linter {
lint_group: LintGroup<Arc<FstDictionary>>,
dictionary: Arc<FstDictionary>,
lint_group: LintGroup<Arc<MergedDictionary>>,
/// The user-supplied dictionary.
///
/// To make changes affect linting, run [`Self::synchronize_lint_dict`].
user_dictionary: MutableDictionary,
dictionary: Arc<MergedDictionary>,
ignored_lints: IgnoredLints,
}

Expand All @@ -72,15 +79,35 @@ impl Linter {
/// Note that this can mean constructing the curated dictionary, which is the most expensive operation
/// in Harper.
pub fn new() -> Self {
let dictionary = FstDictionary::curated();
let dictionary = Self::construct_merged_dict(MutableDictionary::default());

Self {
lint_group: LintGroup::new(LintGroupConfig::default(), dictionary.clone()),
user_dictionary: MutableDictionary::new(),
dictionary,
ignored_lints: IgnoredLints::default(),
}
}

/// Update the dictionary inside [`Self::lint_group`] to include [`Self::user_dictionary`].
/// This clears any linter caches, so use it sparingly.
fn synchronize_lint_dict(&mut self) {
let lint_config = self.lint_group.config;
self.dictionary = Self::construct_merged_dict(self.user_dictionary.clone());
self.lint_group = LintGroup::new(lint_config, self.dictionary.clone());
}

/// Construct the actual dictionary to be used for linting and parsing from the curated dictionary
/// and [`Self::user_dictionary`].
fn construct_merged_dict(user_dictionary: MutableDictionary) -> Arc<MergedDictionary> {
let mut lint_dict = MergedDictionary::new();

lint_dict.add_dictionary(FstDictionary::curated());
lint_dict.add_dictionary(Arc::new(user_dictionary.clone()));

Arc::new(lint_dict)
}

/// Helper method to quickly check if a plain string is likely intended to be English
pub fn is_likely_english(&self, text: String) -> bool {
let document = Document::new_plain_english(&text, &self.dictionary);
Expand Down Expand Up @@ -179,6 +206,33 @@ impl Linter {
pub fn clear_ignored_lints(&mut self) {
self.ignored_lints = IgnoredLints::new();
}

/// Import words into the dictionary.
pub fn import_words(&mut self, additional_words: Vec<String>) {
let init_len = self.user_dictionary.word_count();

self.user_dictionary
.extend_words(additional_words.iter().map(|word| {
(
word.chars().collect::<CharString>(),
WordMetadata::default(),
)
}));

// Only synchronize if we added words that were not there before.
if self.user_dictionary.word_count() > init_len {
self.synchronize_lint_dict();
}
}

/// Export words from the dictionary.
/// Note: this will only return words previously added by [`Self::import_words`].
pub fn export_words(&mut self) -> Vec<String> {
self.user_dictionary
.words_iter()
.map(|v| v.iter().collect())
.collect()
}
}

impl Default for Linter {
Expand Down
2 changes: 1 addition & 1 deletion packages/harper.js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"test": "vitest run --browser chromium && vitest run --browser firefox"
"test": "vitest run --browser chromium"
},
"devDependencies": {
"@microsoft/api-documenter": "^7.26.5",
Expand Down
14 changes: 14 additions & 0 deletions packages/harper.js/src/Linter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,20 @@ for (const [linterName, Linter] of Object.entries(linters)) {
expect(firstLints.length).toBeGreaterThan(secondLints.length);
expect(secondLints.length).toBe(0);
});

test(`${linterName} can add words to the dictionary`, async () => {
const source = 'asdf is not a word';

const linter = new Linter();
let lints = await linter.lint(source);

expect(lints).toHaveLength(1);

await linter.importWords(['asdf']);
lints = await linter.lint(source);

expect(lints).toHaveLength(0);
});
}

test('Linters have the same config format', async () => {
Expand Down
7 changes: 7 additions & 0 deletions packages/harper.js/src/Linter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,11 @@ export default interface Linter {

/** Clear records of all previously ignored lints. */
clearIgnoredLints(): Promise<void>;

/** Import words into the dictionary. This is a significant operation, so try to batch words. */
importWords(words: string[]): Promise<void>;

/** Export all added words from the dictionary. Note that this will NOT export anything from the curated dictionary,
* only words from previous calls to `this.importWords`. */
exportWords(): Promise<string[]>;
}
12 changes: 12 additions & 0 deletions packages/harper.js/src/LocalLinter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,16 @@ export default class LocalLinter implements Linter {

return this.inner!.clear_ignored_lints();
}

async importWords(words: string[]): Promise<void> {
await this.initialize();

return this.inner!.import_words(words);
}

async exportWords(): Promise<string[]> {
await this.initialize();

return this.inner!.export_words();
}
}
8 changes: 8 additions & 0 deletions packages/harper.js/src/WorkerLinter/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ export default class WorkerLinter implements Linter {
return this.rpc('clearIgnoredLints', []);
}

async importWords(words: string[]): Promise<void> {
return this.rpc('importWords', [words]);
}

async exportWords(): Promise<string[]> {
return this.rpc('exportWords', []);
}

/** Run a procedure on the remote worker. */
private async rpc(procName: string, args: any[]): Promise<any> {
const promise = new Promise((resolve, reject) => {
Expand Down
90 changes: 55 additions & 35 deletions packages/obsidian-plugin/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@ function suggestionToLabel(sug: Suggestion) {
if (sug.kind() == SuggestionKind.Remove) {
return 'Remove';
} else if (sug.kind() == SuggestionKind.Replace) {
return `Replace with "${sug.get_replacement_text()}"`;
return `Replace with ${sug.get_replacement_text()}`;
} else if (sug.kind() == SuggestionKind.InsertAfter) {
return `Insert "${sug.get_replacement_text()}" after this.`;
return `Insert ${sug.get_replacement_text()} after this.`;
}
}

export type Settings = {
ignoredLints?: string;
useWebWorker: boolean;
lintSettings: LintConfig;
userDictionary?: string[];
};

export default class HarperPlugin extends Plugin {
Expand Down Expand Up @@ -54,6 +55,10 @@ export default class HarperPlugin extends Plugin {
await this.harper.importIgnoredLints(settings.ignoredLints);
}

if (settings.userDictionary != null && settings.userDictionary.length > 0) {
await this.harper.importWords(settings.userDictionary);
}

await this.harper.setLintConfig(settings.lintSettings);
this.harper.setup();

Expand All @@ -77,7 +82,8 @@ export default class HarperPlugin extends Plugin {
return {
ignoredLints: await this.harper.exportIgnoredLints(),
useWebWorker: usingWebWorker,
lintSettings: await this.harper.getLintConfig()
lintSettings: await this.harper.getLintConfig(),
userDictionary: await this.harper.exportWords()
};
}

Expand Down Expand Up @@ -178,6 +184,51 @@ export default class HarperPlugin extends Plugin {
span.start = charIndexToCodePointIndex(span.start, chars);
span.end = charIndexToCodePointIndex(span.end, chars);

const actions = lint.suggestions().map((sug) => {
return {
name: suggestionToLabel(sug),
apply: (view) => {
if (sug.kind() === SuggestionKind.Remove) {
view.dispatch({
changes: {
from: span.start,
to: span.end,
insert: ''
}
});
} else if (sug.kind() === SuggestionKind.Replace) {
view.dispatch({
changes: {
from: span.start,
to: span.end,
insert: sug.get_replacement_text()
}
});
} else if (sug.kind() === SuggestionKind.InsertAfter) {
view.dispatch({
changes: {
from: span.end,
to: span.end,
insert: sug.get_replacement_text()
}
});
}
}
};
});

if (lint.lint_kind() == 'Spelling') {
const word = lint.get_problem_text();

actions.push({
name: `Add “${word}” to your dictionary`,
apply: (view) => {
this.harper.importWords([word]);
this.reinitialize();
}
});
}

return {
from: span.start,
to: span.end,
Expand All @@ -188,38 +239,7 @@ export default class HarperPlugin extends Plugin {
await this.harper.ignoreLint(lint);
await this.reinitialize();
},
actions: lint.suggestions().map((sug) => {
return {
name: suggestionToLabel(sug),
apply: (view) => {
if (sug.kind() === SuggestionKind.Remove) {
view.dispatch({
changes: {
from: span.start,
to: span.end,
insert: ''
}
});
} else if (sug.kind() === SuggestionKind.Replace) {
view.dispatch({
changes: {
from: span.start,
to: span.end,
insert: sug.get_replacement_text()
}
});
} else if (sug.kind() === SuggestionKind.InsertAfter) {
view.dispatch({
changes: {
from: span.end,
to: span.end,
insert: sug.get_replacement_text()
}
});
}
}
};
})
actions
};
});
},
Expand Down
1 change: 0 additions & 1 deletion packages/obsidian-plugin/src/lint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,6 @@ const baseTheme = EditorView.baseTheme({
},

'.cm-diagnosticIgnore': {
color: 'black',
padding: 'var(--size-4-1) 0px',
fontSize: 'var(--font-ui-small)'
},
Expand Down

0 comments on commit 1a6010e

Please sign in to comment.