damus

nostr ios client
git clone git://jb55.com/damus
Log | Files | Refs | README | LICENSE

commit bf78c0a3a00602cb06f03b6281fa84dfd6216e5f
parent eb41846bb91960234606add39cf60e95ddd5e3d3
Author: Terry Yiu <git@tyiu.xyz>
Date:   Sun,  7 Jan 2024 14:07:09 -0500

translation: add workaround to reduce wasteful translation requests

Signed-off-by: Terry Yiu <git@tyiu.xyz>
Reviewed-by: William Casarin <jb55@jb55.com>
Signed-off-by: William Casarin <jb55@jb55.com>
Changelog-Fixed: Add workaround to fix note language recognition and reduce wasteful translation requests

Diffstat:
Mdamus/Util/Translator.swift | 7++++++-
Mnostrdb/NdbNote.swift | 30++++++++++++++++++++++++++----
2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/damus/Util/Translator.swift b/damus/Util/Translator.swift @@ -23,6 +23,11 @@ public struct Translator { } public func translate(_ text: String, from sourceLanguage: String, to targetLanguage: String) async throws -> String? { + // Do not attempt to translate if the source and target languages are the same. + guard sourceLanguage != targetLanguage else { + return nil + } + switch userSettingsStore.translation_service { case .purple: return try await translateWithPurple(text, from: sourceLanguage, to: targetLanguage) @@ -35,7 +40,7 @@ public struct Translator { case .deepl: return try await translateWithDeepL(text, from: sourceLanguage, to: targetLanguage) case .none: - return text + return nil } } diff --git a/nostrdb/NdbNote.swift b/nostrdb/NdbNote.swift @@ -411,7 +411,25 @@ extension NdbNote { let originalBlocks = self.blocks(keypair).blocks let originalOnlyText = originalBlocks.compactMap { if case .text(let txt) = $0 { - return txt + // Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (') + // as a workaround to get Apple's language recognizer to predict language the correctly. + // It is important to add this workaround to get the language right because it wastes users' money to send translation requests. + // Until Apple fixes their language model, this workaround will be kept in place. + // See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters. + // + // For example, + // "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp" + // has the note content "It’s a meme". + // Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en), + // which is a wildly incorrect hypothesis. + // With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate. + // + // Similarly, + // "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut" + // has the note content "You’re funner". + // Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en). + // With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb). + return txt.replacingOccurrences(of: "’", with: "'") } else { return nil @@ -419,13 +437,17 @@ extension NdbNote { } .joined(separator: " ") - // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. + // If there is no text, there's nothing to use to detect language. + guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + return nil + } + let languageRecognizer = NLLanguageRecognizer() languageRecognizer.processString(originalOnlyText) + // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else { - let nstr: String? = nil - return nstr + return nil } // Remove the variant component and just take the language part as translation services typically only supports the variant-less language.