diff --git a/source/net/yacy/kelondro/util/ISO639.java b/source/net/yacy/kelondro/util/ISO639.java index 52818f64b..3ef1f94cb 100644 --- a/source/net/yacy/kelondro/util/ISO639.java +++ b/source/net/yacy/kelondro/util/ISO639.java @@ -1,4 +1,4 @@ -// iso639.java +// ISO639.java // (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 19.09.2008 on http://yacy.net // @@ -30,110 +30,172 @@ import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +/** + * Support for ISO 639 language codes. + * @see Wikipedia list of ISO 639-1 codes + * @see Language Code List from the ISO 639-2 Registration Authority (Library of Congress) + * @see Home page of the ISO 639-3 Registration Authority (SIL International) + * @see IANA language subtag registry + * @see Code Changes history from the ISO 639-2 Registration Authority + */ public class ISO639 { + + /* + * Note : using icu4j package classes such as com.ibm.icu.impl.LocaleIDs may be + * considered to maintain a more up to date support of ISO 639 codes, notably to + * support ISO 639 3 letters language codes. + */ + /** ISO 639-1 language codes table : [two letters code] - [ISO Reference name] */ private static final String[] codes = { "aa-Afar", "ab-Abkhazian", + "ae-Avestan", "af-Afrikaans", + "ak-Akan", "am-Amharic", + "an-Aragonese", "ar-Arabic", "as-Assamese", + "av-Avaric", "ay-Aymara", "az-Azerbaijani", "ba-Bashkir", - "be-Byelorussian", + "be-Belarusian", "bg-Bulgarian", - "bh-Bihari", + "bh-Bihari", // collective language code for bho-Bhojpuri, mag-Magahi, and mai-Maithili "bi-Bislama", - "bn-Bengali;-Bangla", + "bm-Bambara", + "bn-Bengali", "bo-Tibetan", "br-Breton", + "bs-Bosnian", "ca-Catalan", + "ce-Chechen", + "ch-Chamorro", "co-Corsican", + "cr-Cree", "cs-Czech", + "cu-Church Slavic", + "cv-Chuvash", "cy-Welsh", "da-Danish", "de-German", - "dz-Bhutani", - "el-Greek", + "dv-Dhivehi", + "dz-Dzongkha", + "ee-Ewe", + "el-Modern Greek (1453-)", "en-English", "eo-Esperanto", "es-Spanish", "et-Estonian", "eu-Basque", "fa-Persian", + "ff-Fulah", "fi-Finnish", - "fj-Fiji", - "fo-Faeroese", + "fj-Fijian", + "fo-Faroese", "fr-French", - "fy-Frisian", + "fy-Western Frisian", "ga-Irish", - "gd-Scots-Gaelic", + "gd-Scottish Gaelic", "gl-Galician", "gn-Guarani", "gu-Gujarati", + "gv-Manx", "ha-Hausa", + "he-Hebrew", "hi-Hindi", + "ho-Hiri Motu", "hr-Croatian", + "ht-Haitian", "hu-Hungarian", "hy-Armenian", + "hz-Herero", "ia-Interlingua", + "id-Indonesian", "ie-Interlingue", - "ik-Inupiak", - "in-Indonesian", + "ig-Igbo", + "ii-Sichuan Yi", + "ik-Inupiaq", + "in-Indonesian", // deprecated on 1989-03-11 in favor of id-Indonesian + "io-Ido", "is-Icelandic", "it-Italian", - "iw-Hebrew", + "iu-Inuktitut", + "iw-Hebrew", // deprecated on 1989-03-11 in favor of he-Hebrew "ja-Japanese", - "ji-Yiddish", - "jw-Javanese", + "ji-Yiddish", // deprecated on 1989-03-11 in favor of yi-Yiddish + "jv-Javanese", "ka-Georgian", + "kg-Kongo", + "ki-Kikuyu", + "kj-Kuanyama", "kk-Kazakh", - "kl-Greenlandic", - "km-Cambodian", + "kl-Kalaallisut; Greenlandic", + "km-Central Khmer", "kn-Kannada", "ko-Korean", + "kr-Kanuri", "ks-Kashmiri", "ku-Kurdish", + "kv-Komi", + "kw-Cornish", "ky-Kirghiz", "la-Latin", + "lb-Luxembourgish", + "lg-Ganda", + "li-Limburgan", "ln-Lingala", - "lo-Laothian", + "lo-Lao", "lt-Lithuanian", - "lv-Latvian,-Lettish", + "lu-Luba-Katanga", + "lv-Latvian", "mg-Malagasy", + "mh-Marshallese", "mi-Maori", "mk-Macedonian", "ml-Malayalam", "mn-Mongolian", - //"mo-Moldavian", // this maps on 'mozilla' :( + //"mo-Moldavian", // this maps on 'mozilla' :( // deprecated on 2008-11-03 in favor of ro-Romanian to be used for the variant of the Romanian language also known as Moldavian "mr-Marathi", "ms-Malay", "mt-Maltese", "my-Burmese", "na-Nauru", + "nb-Norwegian Bokmål", + "nd-North Ndebele", "ne-Nepali", + "ng-Ndonga", "nl-Dutch", + "nn-Norwegian Nynorsk", "no-Norwegian", - "oc-Occitan", - "om-(Afan)-Oromo", + "nr-South Ndebele", + "nv-Navajo", + "ny-Nyanja", + "oc-Occitan (post 1500)", + "oj-Ojibwa", + "om-Oromo", "or-Oriya", - "pa-Punjabi", + "os-Ossetian", + "pa-Panjabi; Punjabi", + "pi-Pali", "pl-Polish", - "ps-Pashto,-Pushto", + "ps-Pushto; Pashto", "pt-Portuguese", "qu-Quechua", - "rm-Rhaeto-Romance", - "rn-Kirundi", + "rm-Romansh", + "rn-Rundi", "ro-Romanian", "ru-Russian", "rw-Kinyarwanda", "sa-Sanskrit", + "sc-Sardinian", "sd-Sindhi", - "sg-Sangro", + "se-Northern Sami", + "sg-Sango", "sh-Serbo-Croatian", - "si-Singhalese", + "si-Sinhala; Sinhalese", "sk-Slovak", "sl-Slovenian", "sm-Samoan", @@ -141,35 +203,42 @@ public class ISO639 { "so-Somali", "sq-Albanian", "sr-Serbian", - "ss-Siswati", - "st-Sesotho", + "ss-Swati", + "st-Southern Sotho", "su-Sundanese", "sv-Swedish", "sw-Swahili", "ta-Tamil", - "te-Tegulu", + "te-Telugu", "tg-Tajik", "th-Thai", "ti-Tigrinya", "tk-Turkmen", "tl-Tagalog", - "tn-Setswana", - "to-Tonga", + "tn-Tswana", + "to-Tonga (Tonga Islands)", "tr-Turkish", "ts-Tsonga", "tt-Tatar", "tw-Twi", + "ty-Tahitian", + "ug-Uighur", "uk-Ukrainian", "ur-Urdu", "uz-Uzbek", + "ve-Venda", "vi-Vietnamese", - "vo-Volapuk", + "vo-Volapük", + "wa-Walloon", "wo-Wolof", "xh-Xhosa", + "yi-Yiddish", "yo-Yoruba", + "za-Zhuang", "zh-Chinese", "zu-Zulu"}; + /** Mapping from 2 letters ISO 639-1 code to ISO language reference name in English. */ private static Map mapping = new ConcurrentHashMap(codes.length); static {