additional bcp47 languages

This commit is contained in:
Andrei Gavrilescu 2021-09-09 16:23:03 +03:00 committed by Дамян Минков
parent d83d822818
commit 34c55b4eb2
5 changed files with 116 additions and 14 deletions

View File

@ -242,8 +242,9 @@ var config = {
// transcribeWithAppLanguage: true, // transcribeWithAppLanguage: true,
// Transcriber language. This settings will only work if "transcribeWithAppLanguage" is explicitly set to false. // Transcriber language. This settings will only work if "transcribeWithAppLanguage" is explicitly set to false.
// Available languages can be found in lang/language.json. // Available languages can be found in
// preferredTranscribeLanguage: 'en', // ./src/react/features/transcribing/transcriber-langs.json.
// preferredTranscribeLanguage: 'en-US',
// Enables automatic turning on captions when recording is started // Enables automatic turning on captions when recording is started
// autoCaptionOnRecord: false, // autoCaptionOnRecord: false,

View File

@ -2,11 +2,12 @@
import i18next from 'i18next'; import i18next from 'i18next';
import JITSI_TO_BCP47_MAP from './jitsiToBCP47LocaleMap.json'; import JITSI_TO_BCP47_MAP from './jitsi-bcp47-map.json';
import logger from './logger';
import TRANSCRIBER_LANGS from './transcriber-langs.json';
const DEFAULT_TRANSCRIBER_LANG = 'en-US'; const DEFAULT_TRANSCRIBER_LANG = 'en-US';
/** /**
* Determine which language to use for transcribing. * Determine which language to use for transcribing.
* *
@ -14,17 +15,23 @@ const DEFAULT_TRANSCRIBER_LANG = 'en-US';
* @returns {string} * @returns {string}
*/ */
export function determineTranscriptionLanguage(config: Object) { export function determineTranscriptionLanguage(config: Object) {
const { preferredTranscribeLanguage, transcribeWithAppLanguage = true } = config; const { preferredTranscribeLanguage, transcribeWithAppLanguage = true } = config;
// Depending on the config either use the language that the app automatically detected or the hardcoded // Depending on the config either use the language that the app automatically detected or the hardcoded
// config value. // config BCP47 value.
const jitsiLocale = transcribeWithAppLanguage ? i18next.language : preferredTranscribeLanguage; // Jitsi language detections uses custom language tags, but the transcriber expects BCP-47 compliant tags,
// we use a mapping file to convert them.
const bcp47Locale = transcribeWithAppLanguage ? JITSI_TO_BCP47_MAP[i18next.language] : preferredTranscribeLanguage;
// Jitsi uses custom language tags, but the transcriber expects BCP-47 compliant tags. We use a mapping file // Check if the obtained language is supported by the transcriber
// to convert them. let safeBCP47Locale = TRANSCRIBER_LANGS[bcp47Locale] && bcp47Locale;
// Not all languages that the app might detect are supported by the transcriber in which case use the default.
const { [jitsiLocale]: bcp47Locale = DEFAULT_TRANSCRIBER_LANG } = JITSI_TO_BCP47_MAP;
return bcp47Locale; if (!safeBCP47Locale) {
safeBCP47Locale = DEFAULT_TRANSCRIBER_LANG;
logger.warn(`Transcriber language ${bcp47Locale} is not supported, using default ${DEFAULT_TRANSCRIBER_LANG}`);
}
logger.info(`Transcriber language set to ${safeBCP47Locale}`);
return safeBCP47Locale;
} }

View File

@ -46,6 +46,6 @@
"tr": "tr-TR", "tr": "tr-TR",
"uk": "uk-UA", "uk": "uk-UA",
"vi": "vi-VN", "vi": "vi-VN",
"zhCN": "zh (cmn-Hans-CN)", "zhCN": "zh",
"zhTW": "zh-TW (cmn-Hant-TW)" "zhTW": "zh-TW"
} }

View File

@ -0,0 +1,5 @@
// @flow
import { getLogger } from '../base/logging/functions';
export default getLogger('features/transcribing');

View File

@ -0,0 +1,89 @@
{
"af-ZA": "Afrikaans (South Africa)",
"id-ID": "Indonesian (Indonesia)",
"ms-MY": "Malay (Malaysia)",
"ca-ES": "Catalan (Spain)",
"cs-CZ": "Czech (Czech Republic)",
"da-DK": "Danish (Denmark)",
"de-DE": "German (Germany)",
"en-AU": "English (Australia)",
"en-CA": "English (Canada)",
"en-GB": "English (United Kingdom)",
"en-IN": "English (India)",
"en-IE": "English (Ireland)",
"en-NZ": "English (New Zealand)",
"en-PH": "English (Philippines)",
"en-ZA": "English (South Africa)",
"en-US": "English (United States)",
"es-AR": "Spanish (Argentina)",
"es-BO": "Spanish (Bolivia)",
"es-CL": "Spanish (Chile)",
"es-CO": "Spanish (Colombia)",
"es-CR": "Spanish (Costa Rica)",
"es-EC": "Spanish (Ecuador)",
"es-SV": "Spanish (El Salvador)",
"es-ES": "Spanish (Spain)",
"es-US": "Spanish (United States)",
"es-GT": "Spanish (Guatemala)",
"es-HN": "Spanish (Honduras)",
"es-MX": "Spanish (Mexico)",
"es-NI": "Spanish (Nicaragua)",
"es-PA": "Spanish (Panama)",
"es-PY": "Spanish (Paraguay)",
"es-PE": "Spanish (Peru)",
"es-PR": "Spanish (Puerto Rico)",
"es-DO": "Spanish (Dominican Republic)",
"es-UY": "Spanish (Uruguay)",
"es-VE": "Spanish (Venezuela)",
"eu-ES": "Basque (Spain)",
"fil-PH": "Filipino (Philippines)",
"fr-CA": "French (Canada)",
"fr-FR": "French (France)",
"gl-ES": "Galician (Spain)",
"hr-HR": "Croatian (Croatia)",
"zu-ZA": "Zulu (South Africa)",
"is-IS": "Icelandic (Iceland)",
"it-IT": "Italian (Italy)",
"lt-LT": "Lithuanian (Lithuania)",
"hu-HU": "Hungarian (Hungary)",
"nl-NL": "Dutch (Netherlands)",
"no-NO": "Norwegian Bokmål (Norway)",
"pl-PL": "Polish (Poland)",
"pt-BR": "Portuguese (Brazil)",
"pt-PT": "Portuguese (Portugal)",
"ro-RO": "Romanian (Romania)",
"sk-SK": "Slovak (Slovakia)",
"sl-SI": "Slovenian (Slovenia)",
"fi-FI": "Finnish (Finland)",
"sv-SE": "Swedish (Sweden)",
"vi-VN": "Vietnamese (Vietnam)",
"tr-TR": "Turkish (Turkey)",
"el-GR": "Greek (Greece)",
"bg-BG": "Bulgarian (Bulgaria)",
"ru-RU": "Russian (Russia)",
"sr-RS": "Serbian (Serbia)",
"uk-UA": "Ukrainian (Ukraine)",
"iw-IL": "Hebrew",
"ar-IL": "Arabic (Israel)",
"ar-JO": "Arabic (Jordan)",
"ar-AE": "Arabic (United Arab Emirates)",
"ar-BH": "Arabic (Bahrain)",
"ar-DZ": "Arabic (Algeria)",
"ar-SA": "Arabic (Saudi Arabia)",
"ar-IQ": "Arabic (Iraq)",
"ar-KW": "Arabic (Kuwait)",
"ar-MA": "Arabic (Morocco)",
"ar-TN": "Arabic (Tunisia)",
"ar-OM": "Arabic (Oman)",
"ar-PS": "Arabic (State of Palestine)",
"ar-QA": "Arabic (Qatar)",
"ar-LB": "Arabic (Lebanon)",
"ar-EG": "Arabic (Egypt)",
"fa-IR": "Persian (Iran)",
"hi-IN": "Hindi (India)",
"th-TH": "Thai (Thailand)",
"ko-KR": "Korean (South Korea)",
"zh-TW": "Chinese Mandarin (Traditional, Taiwan)",
"ja-JP": "Japanese (Japan)",
"zh": "Chinese Mandarin (Simplified, China)"
}