diff --git a/source/net/yacy/kelondro/util/ISO639.java b/source/net/yacy/kelondro/util/ISO639.java
index 52818f64b..3ef1f94cb 100644
--- a/source/net/yacy/kelondro/util/ISO639.java
+++ b/source/net/yacy/kelondro/util/ISO639.java
@@ -1,4 +1,4 @@
-// iso639.java
+// ISO639.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 19.09.2008 on http://yacy.net
//
@@ -30,110 +30,172 @@ import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+/**
+ * Support for ISO 639 language codes.
+ * @see Wikipedia list of ISO 639-1 codes
+ * @see Language Code List from the ISO 639-2 Registration Authority (Library of Congress)
+ * @see Home page of the ISO 639-3 Registration Authority (SIL International)
+ * @see IANA language subtag registry
+ * @see Code Changes history from the ISO 639-2 Registration Authority
+ */
public class ISO639 {
+
+ /*
+ * Note : using icu4j package classes such as com.ibm.icu.impl.LocaleIDs may be
+ * considered to maintain a more up to date support of ISO 639 codes, notably to
+ * support ISO 639 3 letters language codes.
+ */
+ /** ISO 639-1 language codes table : [two letters code] - [ISO Reference name] */
private static final String[] codes = {
"aa-Afar",
"ab-Abkhazian",
+ "ae-Avestan",
"af-Afrikaans",
+ "ak-Akan",
"am-Amharic",
+ "an-Aragonese",
"ar-Arabic",
"as-Assamese",
+ "av-Avaric",
"ay-Aymara",
"az-Azerbaijani",
"ba-Bashkir",
- "be-Byelorussian",
+ "be-Belarusian",
"bg-Bulgarian",
- "bh-Bihari",
+ "bh-Bihari", // collective language code for bho-Bhojpuri, mag-Magahi, and mai-Maithili
"bi-Bislama",
- "bn-Bengali;-Bangla",
+ "bm-Bambara",
+ "bn-Bengali",
"bo-Tibetan",
"br-Breton",
+ "bs-Bosnian",
"ca-Catalan",
+ "ce-Chechen",
+ "ch-Chamorro",
"co-Corsican",
+ "cr-Cree",
"cs-Czech",
+ "cu-Church Slavic",
+ "cv-Chuvash",
"cy-Welsh",
"da-Danish",
"de-German",
- "dz-Bhutani",
- "el-Greek",
+ "dv-Dhivehi",
+ "dz-Dzongkha",
+ "ee-Ewe",
+ "el-Modern Greek (1453-)",
"en-English",
"eo-Esperanto",
"es-Spanish",
"et-Estonian",
"eu-Basque",
"fa-Persian",
+ "ff-Fulah",
"fi-Finnish",
- "fj-Fiji",
- "fo-Faeroese",
+ "fj-Fijian",
+ "fo-Faroese",
"fr-French",
- "fy-Frisian",
+ "fy-Western Frisian",
"ga-Irish",
- "gd-Scots-Gaelic",
+ "gd-Scottish Gaelic",
"gl-Galician",
"gn-Guarani",
"gu-Gujarati",
+ "gv-Manx",
"ha-Hausa",
+ "he-Hebrew",
"hi-Hindi",
+ "ho-Hiri Motu",
"hr-Croatian",
+ "ht-Haitian",
"hu-Hungarian",
"hy-Armenian",
+ "hz-Herero",
"ia-Interlingua",
+ "id-Indonesian",
"ie-Interlingue",
- "ik-Inupiak",
- "in-Indonesian",
+ "ig-Igbo",
+ "ii-Sichuan Yi",
+ "ik-Inupiaq",
+ "in-Indonesian", // deprecated on 1989-03-11 in favor of id-Indonesian
+ "io-Ido",
"is-Icelandic",
"it-Italian",
- "iw-Hebrew",
+ "iu-Inuktitut",
+ "iw-Hebrew", // deprecated on 1989-03-11 in favor of he-Hebrew
"ja-Japanese",
- "ji-Yiddish",
- "jw-Javanese",
+ "ji-Yiddish", // deprecated on 1989-03-11 in favor of yi-Yiddish
+ "jv-Javanese",
"ka-Georgian",
+ "kg-Kongo",
+ "ki-Kikuyu",
+ "kj-Kuanyama",
"kk-Kazakh",
- "kl-Greenlandic",
- "km-Cambodian",
+ "kl-Kalaallisut; Greenlandic",
+ "km-Central Khmer",
"kn-Kannada",
"ko-Korean",
+ "kr-Kanuri",
"ks-Kashmiri",
"ku-Kurdish",
+ "kv-Komi",
+ "kw-Cornish",
"ky-Kirghiz",
"la-Latin",
+ "lb-Luxembourgish",
+ "lg-Ganda",
+ "li-Limburgan",
"ln-Lingala",
- "lo-Laothian",
+ "lo-Lao",
"lt-Lithuanian",
- "lv-Latvian,-Lettish",
+ "lu-Luba-Katanga",
+ "lv-Latvian",
"mg-Malagasy",
+ "mh-Marshallese",
"mi-Maori",
"mk-Macedonian",
"ml-Malayalam",
"mn-Mongolian",
- //"mo-Moldavian", // this maps on 'mozilla' :(
+ //"mo-Moldavian", // this maps on 'mozilla' :( // deprecated on 2008-11-03 in favor of ro-Romanian to be used for the variant of the Romanian language also known as Moldavian
"mr-Marathi",
"ms-Malay",
"mt-Maltese",
"my-Burmese",
"na-Nauru",
+ "nb-Norwegian Bokmål",
+ "nd-North Ndebele",
"ne-Nepali",
+ "ng-Ndonga",
"nl-Dutch",
+ "nn-Norwegian Nynorsk",
"no-Norwegian",
- "oc-Occitan",
- "om-(Afan)-Oromo",
+ "nr-South Ndebele",
+ "nv-Navajo",
+ "ny-Nyanja",
+ "oc-Occitan (post 1500)",
+ "oj-Ojibwa",
+ "om-Oromo",
"or-Oriya",
- "pa-Punjabi",
+ "os-Ossetian",
+ "pa-Panjabi; Punjabi",
+ "pi-Pali",
"pl-Polish",
- "ps-Pashto,-Pushto",
+ "ps-Pushto; Pashto",
"pt-Portuguese",
"qu-Quechua",
- "rm-Rhaeto-Romance",
- "rn-Kirundi",
+ "rm-Romansh",
+ "rn-Rundi",
"ro-Romanian",
"ru-Russian",
"rw-Kinyarwanda",
"sa-Sanskrit",
+ "sc-Sardinian",
"sd-Sindhi",
- "sg-Sangro",
+ "se-Northern Sami",
+ "sg-Sango",
"sh-Serbo-Croatian",
- "si-Singhalese",
+ "si-Sinhala; Sinhalese",
"sk-Slovak",
"sl-Slovenian",
"sm-Samoan",
@@ -141,35 +203,42 @@ public class ISO639 {
"so-Somali",
"sq-Albanian",
"sr-Serbian",
- "ss-Siswati",
- "st-Sesotho",
+ "ss-Swati",
+ "st-Southern Sotho",
"su-Sundanese",
"sv-Swedish",
"sw-Swahili",
"ta-Tamil",
- "te-Tegulu",
+ "te-Telugu",
"tg-Tajik",
"th-Thai",
"ti-Tigrinya",
"tk-Turkmen",
"tl-Tagalog",
- "tn-Setswana",
- "to-Tonga",
+ "tn-Tswana",
+ "to-Tonga (Tonga Islands)",
"tr-Turkish",
"ts-Tsonga",
"tt-Tatar",
"tw-Twi",
+ "ty-Tahitian",
+ "ug-Uighur",
"uk-Ukrainian",
"ur-Urdu",
"uz-Uzbek",
+ "ve-Venda",
"vi-Vietnamese",
- "vo-Volapuk",
+ "vo-Volapük",
+ "wa-Walloon",
"wo-Wolof",
"xh-Xhosa",
+ "yi-Yiddish",
"yo-Yoruba",
+ "za-Zhuang",
"zh-Chinese",
"zu-Zulu"};
+ /** Mapping from 2 letters ISO 639-1 code to ISO language reference name in English. */
private static Map mapping = new ConcurrentHashMap(codes.length);
static {