From 34c55b4eb28c9850472720ce8b1a6f848796d7e5 Mon Sep 17 00:00:00 2001 From: Andrei Gavrilescu Date: Thu, 9 Sep 2021 16:23:03 +0300 Subject: [PATCH] additional bcp47 languages --- config.js | 5 +- react/features/transcribing/functions.js | 27 +++--- ...P47LocaleMap.json => jitsi-bcp47-map.json} | 4 +- react/features/transcribing/logger.js | 5 ++ .../transcribing/transcriber-langs.json | 89 +++++++++++++++++++ 5 files changed, 116 insertions(+), 14 deletions(-) rename react/features/transcribing/{jitsiToBCP47LocaleMap.json => jitsi-bcp47-map.json} (93%) create mode 100644 react/features/transcribing/logger.js create mode 100644 react/features/transcribing/transcriber-langs.json diff --git a/config.js b/config.js index 7569e47d9..30ecdcd48 100644 --- a/config.js +++ b/config.js @@ -242,8 +242,9 @@ var config = { // transcribeWithAppLanguage: true, // Transcriber language. This settings will only work if "transcribeWithAppLanguage" is explicitly set to false. - // Available languages can be found in lang/language.json. - // preferredTranscribeLanguage: 'en', + // Available languages can be found in + // ./src/react/features/transcribing/transcriber-langs.json. + // preferredTranscribeLanguage: 'en-US', // Enables automatic turning on captions when recording is started // autoCaptionOnRecord: false, diff --git a/react/features/transcribing/functions.js b/react/features/transcribing/functions.js index 13bacd5ce..fdbb0e500 100644 --- a/react/features/transcribing/functions.js +++ b/react/features/transcribing/functions.js @@ -2,11 +2,12 @@ import i18next from 'i18next'; -import JITSI_TO_BCP47_MAP from './jitsiToBCP47LocaleMap.json'; +import JITSI_TO_BCP47_MAP from './jitsi-bcp47-map.json'; +import logger from './logger'; +import TRANSCRIBER_LANGS from './transcriber-langs.json'; const DEFAULT_TRANSCRIBER_LANG = 'en-US'; - /** * Determine which language to use for transcribing. * @@ -14,17 +15,23 @@ const DEFAULT_TRANSCRIBER_LANG = 'en-US'; * @returns {string} */ export function determineTranscriptionLanguage(config: Object) { - const { preferredTranscribeLanguage, transcribeWithAppLanguage = true } = config; // Depending on the config either use the language that the app automatically detected or the hardcoded - // config value. - const jitsiLocale = transcribeWithAppLanguage ? i18next.language : preferredTranscribeLanguage; + // config BCP47 value. + // Jitsi language detections uses custom language tags, but the transcriber expects BCP-47 compliant tags, + // we use a mapping file to convert them. + const bcp47Locale = transcribeWithAppLanguage ? JITSI_TO_BCP47_MAP[i18next.language] : preferredTranscribeLanguage; - // Jitsi uses custom language tags, but the transcriber expects BCP-47 compliant tags. We use a mapping file - // to convert them. - // Not all languages that the app might detect are supported by the transcriber in which case use the default. - const { [jitsiLocale]: bcp47Locale = DEFAULT_TRANSCRIBER_LANG } = JITSI_TO_BCP47_MAP; + // Check if the obtained language is supported by the transcriber + let safeBCP47Locale = TRANSCRIBER_LANGS[bcp47Locale] && bcp47Locale; - return bcp47Locale; + if (!safeBCP47Locale) { + safeBCP47Locale = DEFAULT_TRANSCRIBER_LANG; + logger.warn(`Transcriber language ${bcp47Locale} is not supported, using default ${DEFAULT_TRANSCRIBER_LANG}`); + } + + logger.info(`Transcriber language set to ${safeBCP47Locale}`); + + return safeBCP47Locale; } diff --git a/react/features/transcribing/jitsiToBCP47LocaleMap.json b/react/features/transcribing/jitsi-bcp47-map.json similarity index 93% rename from react/features/transcribing/jitsiToBCP47LocaleMap.json rename to react/features/transcribing/jitsi-bcp47-map.json index e769ef101..80e025b82 100644 --- a/react/features/transcribing/jitsiToBCP47LocaleMap.json +++ b/react/features/transcribing/jitsi-bcp47-map.json @@ -46,6 +46,6 @@ "tr": "tr-TR", "uk": "uk-UA", "vi": "vi-VN", - "zhCN": "zh (cmn-Hans-CN)", - "zhTW": "zh-TW (cmn-Hant-TW)" + "zhCN": "zh", + "zhTW": "zh-TW" } \ No newline at end of file diff --git a/react/features/transcribing/logger.js b/react/features/transcribing/logger.js new file mode 100644 index 000000000..2a56afa18 --- /dev/null +++ b/react/features/transcribing/logger.js @@ -0,0 +1,5 @@ +// @flow + +import { getLogger } from '../base/logging/functions'; + +export default getLogger('features/transcribing'); diff --git a/react/features/transcribing/transcriber-langs.json b/react/features/transcribing/transcriber-langs.json new file mode 100644 index 000000000..f277b53eb --- /dev/null +++ b/react/features/transcribing/transcriber-langs.json @@ -0,0 +1,89 @@ +{ + "af-ZA": "Afrikaans (South Africa)", + "id-ID": "Indonesian (Indonesia)", + "ms-MY": "Malay (Malaysia)", + "ca-ES": "Catalan (Spain)", + "cs-CZ": "Czech (Czech Republic)", + "da-DK": "Danish (Denmark)", + "de-DE": "German (Germany)", + "en-AU": "English (Australia)", + "en-CA": "English (Canada)", + "en-GB": "English (United Kingdom)", + "en-IN": "English (India)", + "en-IE": "English (Ireland)", + "en-NZ": "English (New Zealand)", + "en-PH": "English (Philippines)", + "en-ZA": "English (South Africa)", + "en-US": "English (United States)", + "es-AR": "Spanish (Argentina)", + "es-BO": "Spanish (Bolivia)", + "es-CL": "Spanish (Chile)", + "es-CO": "Spanish (Colombia)", + "es-CR": "Spanish (Costa Rica)", + "es-EC": "Spanish (Ecuador)", + "es-SV": "Spanish (El Salvador)", + "es-ES": "Spanish (Spain)", + "es-US": "Spanish (United States)", + "es-GT": "Spanish (Guatemala)", + "es-HN": "Spanish (Honduras)", + "es-MX": "Spanish (Mexico)", + "es-NI": "Spanish (Nicaragua)", + "es-PA": "Spanish (Panama)", + "es-PY": "Spanish (Paraguay)", + "es-PE": "Spanish (Peru)", + "es-PR": "Spanish (Puerto Rico)", + "es-DO": "Spanish (Dominican Republic)", + "es-UY": "Spanish (Uruguay)", + "es-VE": "Spanish (Venezuela)", + "eu-ES": "Basque (Spain)", + "fil-PH": "Filipino (Philippines)", + "fr-CA": "French (Canada)", + "fr-FR": "French (France)", + "gl-ES": "Galician (Spain)", + "hr-HR": "Croatian (Croatia)", + "zu-ZA": "Zulu (South Africa)", + "is-IS": "Icelandic (Iceland)", + "it-IT": "Italian (Italy)", + "lt-LT": "Lithuanian (Lithuania)", + "hu-HU": "Hungarian (Hungary)", + "nl-NL": "Dutch (Netherlands)", + "no-NO": "Norwegian Bokmål (Norway)", + "pl-PL": "Polish (Poland)", + "pt-BR": "Portuguese (Brazil)", + "pt-PT": "Portuguese (Portugal)", + "ro-RO": "Romanian (Romania)", + "sk-SK": "Slovak (Slovakia)", + "sl-SI": "Slovenian (Slovenia)", + "fi-FI": "Finnish (Finland)", + "sv-SE": "Swedish (Sweden)", + "vi-VN": "Vietnamese (Vietnam)", + "tr-TR": "Turkish (Turkey)", + "el-GR": "Greek (Greece)", + "bg-BG": "Bulgarian (Bulgaria)", + "ru-RU": "Russian (Russia)", + "sr-RS": "Serbian (Serbia)", + "uk-UA": "Ukrainian (Ukraine)", + "iw-IL": "Hebrew", + "ar-IL": "Arabic (Israel)", + "ar-JO": "Arabic (Jordan)", + "ar-AE": "Arabic (United Arab Emirates)", + "ar-BH": "Arabic (Bahrain)", + "ar-DZ": "Arabic (Algeria)", + "ar-SA": "Arabic (Saudi Arabia)", + "ar-IQ": "Arabic (Iraq)", + "ar-KW": "Arabic (Kuwait)", + "ar-MA": "Arabic (Morocco)", + "ar-TN": "Arabic (Tunisia)", + "ar-OM": "Arabic (Oman)", + "ar-PS": "Arabic (State of Palestine)", + "ar-QA": "Arabic (Qatar)", + "ar-LB": "Arabic (Lebanon)", + "ar-EG": "Arabic (Egypt)", + "fa-IR": "Persian (Iran)", + "hi-IN": "Hindi (India)", + "th-TH": "Thai (Thailand)", + "ko-KR": "Korean (South Korea)", + "zh-TW": "Chinese Mandarin (Traditional, Taiwan)", + "ja-JP": "Japanese (Japan)", + "zh": "Chinese Mandarin (Simplified, China)" +} \ No newline at end of file