Skip to content

Commit

Permalink
"isJapanese" check move (#730)
Browse files Browse the repository at this point in the history
* Move isStringPartiallyJapanese out of ClipboardMonitor

* Create isStringPartiallyJapanese function

* Add textMayBeTranslatable

* Rename API function

* Rename internal function

* Add helper

* Update translatable check

* Pass language to TextScanner

* Pass language explicitly

* Use textMayBeTranslatable

* No redundant translatable check

* Update eslint

* Remove double newline

* Collapse

* Rename
  • Loading branch information
toasted-nutbread committed Feb 25, 2024
1 parent 73169f0 commit 2e9ea19
Show file tree
Hide file tree
Showing 18 changed files with 132 additions and 36 deletions.
1 change: 1 addition & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@
"ext/js/display/sandbox/structured-content-generator.js",
"ext/js/dom/sandbox/css-style-applier.js",
"ext/js/language/ja/japanese.js",
"ext/js/language/text-utilities.js",
"ext/js/templates/sandbox/anki-template-renderer-content-manager.js",
"ext/js/templates/sandbox/anki-template-renderer.js",
"ext/js/templates/sandbox/template-renderer-frame-api.js",
Expand Down
1 change: 1 addition & 0 deletions ext/js/app/frontend.js
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ export class Frontend {
await this._updatePopup();

const preventMiddleMouse = this._getPreventMiddleMouseValueForPageType(scanningOptions.preventMiddleMouse);
this._textScanner.language = options.general.language;
this._textScanner.setOptions({
inputs: scanningOptions.inputs,
deepContentScan: scanningOptions.deepDomScan,
Expand Down
18 changes: 11 additions & 7 deletions ext/js/background/backend.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ import {arrayBufferToBase64} from '../data/sandbox/array-buffer-util.js';
import {DictionaryDatabase} from '../dictionary/dictionary-database.js';
import {Environment} from '../extension/environment.js';
import {ObjectPropertyAccessor} from '../general/object-property-accessor.js';
import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js';
import {getLanguageSummaries} from '../language/languages.js';
import {distributeFuriganaInflected, isCodePointJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js';
import {getLanguageSummaries, isTextLookupWorthy} from '../language/languages.js';
import {Translator} from '../language/translator.js';
import {AudioDownloader} from '../media/audio-downloader.js';
import {getFileExtensionFromAudioMediaType, getFileExtensionFromImageMediaType} from '../media/media-util.js';
Expand Down Expand Up @@ -175,7 +175,7 @@ export class Backend {
['isTabSearchPopup', this._onApiIsTabSearchPopup.bind(this)],
['triggerDatabaseUpdated', this._onApiTriggerDatabaseUpdated.bind(this)],
['testMecab', this._onApiTestMecab.bind(this)],
['textHasJapaneseCharacters', this._onApiTextHasJapaneseCharacters.bind(this)],
['isTextLookupWorthy', this._onApiIsTextLookupWorthy.bind(this)],
['getTermFrequencies', this._onApiGetTermFrequencies.bind(this)],
['findAnkiNotes', this._onApiFindAnkiNotes.bind(this)],
['openCrossFramePort', this._onApiOpenCrossFramePort.bind(this)],
Expand Down Expand Up @@ -310,7 +310,11 @@ export class Backend {
* @param {import('clipboard-monitor').EventArgument<'change'>} details
*/
async _onClipboardTextChange({text}) {
const {clipboard: {maximumSearchLength}} = this._getProfileOptions({current: true}, false);
const {
general: {language},
clipboard: {maximumSearchLength}
} = this._getProfileOptions({current: true}, false);
if (!isTextLookupWorthy(text, language)) { return; }
if (text.length > maximumSearchLength) {
text = text.substring(0, maximumSearchLength);
}
Expand Down Expand Up @@ -839,9 +843,9 @@ export class Backend {
return true;
}

/** @type {import('api').ApiHandler<'textHasJapaneseCharacters'>} */
_onApiTextHasJapaneseCharacters({text}) {
return isStringPartiallyJapanese(text);
/** @type {import('api').ApiHandler<'isTextLookupWorthy'>} */
_onApiIsTextLookupWorthy({text, language}) {
return isTextLookupWorthy(text, language);
}

/** @type {import('api').ApiHandler<'getTermFrequencies'>} */
Expand Down
9 changes: 5 additions & 4 deletions ext/js/comm/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,12 @@ export class API {
}

/**
* @param {import('api').ApiParam<'textHasJapaneseCharacters', 'text'>} text
* @returns {Promise<import('api').ApiReturn<'textHasJapaneseCharacters'>>}
* @param {import('api').ApiParam<'isTextLookupWorthy', 'text'>} text
* @param {import('api').ApiParam<'isTextLookupWorthy', 'language'>} language
* @returns {Promise<import('api').ApiReturn<'isTextLookupWorthy'>>}
*/
textHasJapaneseCharacters(text) {
return this._invoke('textHasJapaneseCharacters', {text});
isTextLookupWorthy(text, language) {
return this._invoke('isTextLookupWorthy', {text, language});
}

/**
Expand Down
3 changes: 1 addition & 2 deletions ext/js/comm/clipboard-monitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
*/

import {EventDispatcher} from '../core/event-dispatcher.js';
import {isStringPartiallyJapanese} from '../language/ja/japanese.js';

/**
* @augments EventDispatcher<import('clipboard-monitor').Events>
Expand Down Expand Up @@ -71,7 +70,7 @@ export class ClipboardMonitor extends EventDispatcher {
text !== this._previousText
) {
this._previousText = text;
if (canChange && isStringPartiallyJapanese(text)) {
if (canChange) {
this.trigger('change', {text});
}
}
Expand Down
32 changes: 20 additions & 12 deletions ext/js/display/display-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import {ExtensionError} from '../core/extension-error.js';
import {isObject} from '../core/utilities.js';
import {getDisambiguations, getGroupedPronunciations, getTermFrequency, groupKanjiFrequencies, groupTermFrequencies, groupTermTags, isNonNounVerbOrAdjective} from '../dictionary/dictionary-data-util.js';
import {HtmlTemplateCollection} from '../dom/html-template-collection.js';
import {distributeFurigana, getKanaMorae, getPitchCategory, isCodePointKanji, isStringPartiallyJapanese} from '../language/ja/japanese.js';
import {distributeFurigana, getKanaMorae, getPitchCategory, isCodePointKanji} from '../language/ja/japanese.js';
import {getLanguageFromText} from '../language/text-utilities.js';
import {createPronunciationDownstepPosition, createPronunciationGraph, createPronunciationText} from './sandbox/pronunciation-generator.js';
import {StructuredContentGenerator} from './sandbox/structured-content-generator.js';

Expand Down Expand Up @@ -991,12 +992,7 @@ export class DisplayGenerator {
* @param {string} [language]
*/
_setTextContent(node, value, language) {
if (typeof language === 'string') {
node.lang = language;
} else if (isStringPartiallyJapanese(value)) {
node.lang = 'ja';
}

this._setElementLanguage(node, language, value);
node.textContent = value;
}

Expand All @@ -1008,11 +1004,7 @@ export class DisplayGenerator {
_setMultilineTextContent(node, value, language) {
// This can't just call _setTextContent because the lack of <br> elements will
// cause the text to not copy correctly.
if (typeof language === 'string') {
node.lang = language;
} else if (isStringPartiallyJapanese(value)) {
node.lang = 'ja';
}
this._setElementLanguage(node, language, value);

let start = 0;
while (true) {
Expand All @@ -1028,6 +1020,22 @@ export class DisplayGenerator {
}
}

/**
* @param {HTMLElement} element
* @param {string|undefined} language
* @param {string} content
*/
_setElementLanguage(element, language, content) {
if (typeof language === 'string') {
element.lang = language;
} else {
const language2 = getLanguageFromText(content);
if (language2 !== null) {
element.lang = language2;
}
}
}

/**
* @param {string} reading
* @param {import('dictionary').TermPronunciation[]} termPronunciations
Expand Down
2 changes: 2 additions & 0 deletions ext/js/display/display.js
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ export class Display extends EventDispatcher {
readingMode: options.parsing.readingMode,
useInternalParser: options.parsing.enableScanningParser,
useMecabParser: options.parsing.enableMecabParser,
language: options.general.language,
scanning: {
inputs: scanningOptions.inputs,
deepContentScan: scanningOptions.deepDomScan,
Expand Down Expand Up @@ -1834,6 +1835,7 @@ export class Display extends EventDispatcher {
}

const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
this._contentTextScanner.language = options.general.language;
this._contentTextScanner.setOptions({
inputs: [{
include: 'mouse0',
Expand Down
3 changes: 2 additions & 1 deletion ext/js/display/query-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ export class QueryParser extends EventDispatcher {
/**
* @param {import('display').QueryParserOptions} display
*/
setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, language, scanning}) {
let selectedParserChanged = false;
if (selectedParser === null || typeof selectedParser === 'string') {
selectedParserChanged = (this._selectedParser !== selectedParser);
Expand All @@ -115,6 +115,7 @@ export class QueryParser extends EventDispatcher {
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
this._textScanner.language = language;
this._textScanner.setOptions(scanning);
}
this._textScanner.setEnabled(true);
Expand Down
9 changes: 6 additions & 3 deletions ext/js/display/sandbox/structured-content-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {isStringPartiallyJapanese} from '../../language/ja/japanese.js';
import {getLanguageFromText} from '../../language/text-utilities.js';

export class StructuredContentGenerator {
/**
Expand Down Expand Up @@ -163,8 +163,11 @@ export class StructuredContentGenerator {
if (typeof content === 'string') {
if (content.length > 0) {
container.appendChild(this._createTextNode(content));
if (language === null && isStringPartiallyJapanese(content)) {
container.lang = 'ja';
if (language === null) {
const language2 = getLanguageFromText(content);
if (language2 !== null) {
container.lang = language2;
}
}
}
return;
Expand Down
21 changes: 19 additions & 2 deletions ext/js/display/search-display-controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ export class SearchDisplayController {
this._searchBackButton.addEventListener('click', this._onSearchBackButtonClick.bind(this), false);
this._wanakanaEnableCheckbox.addEventListener('change', this._onWanakanaEnableChange.bind(this));
window.addEventListener('copy', this._onCopy.bind(this));
this._clipboardMonitor.on('change', this._onExternalSearchUpdate.bind(this));
this._clipboardMonitor.on('change', this._onClipboardMonitorChange.bind(this));
this._clipboardMonitorEnableCheckbox.addEventListener('change', this._onClipboardMonitorEnableChange.bind(this));
this._display.hotkeyHandler.on('keydownNonHotkey', this._onKeyDown.bind(this));

Expand Down Expand Up @@ -271,9 +271,26 @@ export class SearchDisplayController {
}

/** @type {import('application').ApiHandler<'searchDisplayControllerUpdateSearchQuery'>} */
_onExternalSearchUpdate({text, animate = true}) {
_onExternalSearchUpdate({text, animate}) {
void this._updateSearchFromClipboard(text, animate, false);
}

/**
* @param {import('clipboard-monitor').Events['change']} event
*/
_onClipboardMonitorChange({text}) {
void this._updateSearchFromClipboard(text, true, true);
}

/**
* @param {string} text
* @param {boolean} animate
* @param {boolean} checkText
*/
async _updateSearchFromClipboard(text, animate, checkText) {
const options = this._display.getOptions();
if (options === null) { return; }
if (checkText && !await this._display.application.api.isTextLookupWorthy(text, options.general.language)) { return; }
const {clipboard: {autoSearchContent, maximumSearchLength}} = options;
if (text.length > maximumSearchLength) {
text = text.substring(0, maximumSearchLength);
Expand Down
2 changes: 2 additions & 0 deletions ext/js/language/language-descriptors.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
import {isStringPartiallyJapanese} from './ja/japanese.js';
import {removeLatinDiacritics} from './la/latin-text-preprocessors.js';
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js';
Expand Down Expand Up @@ -114,6 +115,7 @@ const languageDescriptors = [
iso: 'ja',
name: 'Japanese',
exampleText: '読め',
isTextLookupWorthy: isStringPartiallyJapanese,
textPreprocessors: {
convertHalfWidthCharacters,
convertNumericCharacters,
Expand Down
11 changes: 11 additions & 0 deletions ext/js/language/languages.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() {
}
return results;
}

/**
* @param {string} text
* @param {string} language
* @returns {boolean}
*/
export function isTextLookupWorthy(text, language) {
const descriptor = languageDescriptorMap.get(language);
if (typeof descriptor === 'undefined') { return false; }
return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text);
}
12 changes: 9 additions & 3 deletions ext/js/language/text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher {
this._includeSelector = null;
/** @type {?string} */
this._excludeSelector = null;
/** @type {?string} */
this._language = null;

/** @type {?import('text-scanner').InputInfo} */
this._inputInfoCurrent = null;
Expand Down Expand Up @@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher {
this._excludeSelector = value;
}

/** @type {?string} */
get language() { return this._language; }
set language(value) { this._language = value; }

/** */
prepare() {
this._isPrepared = true;
Expand Down Expand Up @@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher {
const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext);
if (result !== null) {
({dictionaryEntries, sentence, type} = result);
} else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) {
} else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) {
dictionaryEntries = [];
sentence = {text: '', offset: 0};
}
Expand Down Expand Up @@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher {
* @param {string} text
* @returns {Promise<boolean>}
*/
async _hasJapanese(text) {
async _isTextLookupWorthy(text) {
try {
return await this._api.textHasJapaneseCharacters(text);
return this._language !== null && await this._api.isTextLookupWorthy(text, this._language);
} catch (e) {
return false;
}
Expand Down
29 changes: 29 additions & 0 deletions ext/js/language/text-utilities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (C) 2024 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {isStringPartiallyJapanese} from './ja/japanese.js';

/**
* Returns the language that the string might be by using some heuristic checks.
* Values returned are ISO codes. `null` is returned if no language can be determined.
* @param {string} text
* @returns {?string}
*/
export function getLanguageFromText(text) {
if (isStringPartiallyJapanese(text)) { return 'ja'; }
return null;
}
3 changes: 2 additions & 1 deletion types/ext/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ type ApiSurface = {
params: void;
return: true;
};
textHasJapaneseCharacters: {
isTextLookupWorthy: {
params: {
text: string;
language: string;
};
return: boolean;
};
Expand Down
2 changes: 1 addition & 1 deletion types/ext/application.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export type ApiSurface = {
searchDisplayControllerUpdateSearchQuery: {
params: {
text: string;
animate?: boolean;
animate: boolean;
};
return: void;
};
Expand Down
1 change: 1 addition & 0 deletions types/ext/display.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ export type QueryParserOptions = {
readingMode: Settings.ParsingReadingMode;
useInternalParser: boolean;
useMecabParser: boolean;
language: string;
scanning: TextScannerTypes.Options;
};

Expand Down
9 changes: 9 additions & 0 deletions types/ext/language-descriptors.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,19 @@
import type {TextPreprocessor, BidirectionalConversionPreprocessor} from './language';
import type {SafeAny} from './core';

export type IsTextLookupWorthyFunction = (text: string) => boolean;

type LanguageDescriptor<TIso extends string, TTextPreprocessorDescriptor extends TextPreprocessorDescriptor> = {
iso: TIso;
name: string;
exampleText: string;
/**
* An optional function which returns whether or not a given string may be translatable.
* This is used as a filter for several situations, such as whether the clipboard monitor
* window should activate when text is copied to the clipboard.
* If no value is provided, `true` is assumed for all inputs.
*/
isTextLookupWorthy?: IsTextLookupWorthyFunction;
textPreprocessors: TTextPreprocessorDescriptor;
};

Expand Down

0 comments on commit 2e9ea19

Please sign in to comment.