diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html index fd48f6c6a..8d3d0fdfe 100644 --- a/htroot/DictionaryLoader_p.html +++ b/htroot/DictionaryLoader_p.html @@ -65,9 +65,11 @@ Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.

GeoNames

-

With this file it is possible to find cities with a population > 1000 all over the world.

+

With this file it is possible to find cities all over the world.

+
Content
+
cities with a population > 1000 all over the world
#[geon0URL]#
@@ -99,6 +101,74 @@
Result
cannot activate dictionary file: #[error]#
#(/geon0ActionActivated)#
+
+
Content
+
cities with a population > 5000 all over the world
+
+
#[geon1URL]#
+
+
#[geon1Storage]#
+
+
#(geon1Status)#
not loaded
::
loaded
::deactivated#(/geon1Status)#
+
Action
+
#(geon1Status)# + :: + + :: + + + #(/geon1Status)#
+ #(geon1ActionLoaded)#:: +
Result
loaded and activated dictionary file
:: +
Result
loading of dictionary file failed: #[error]#
+ #(/geon1ActionLoaded)# + #(geon1ActionRemoved)#:: +
Result
deactivated and removed dictionary file
:: +
Result
cannot remove dictionary file: #[error]#
+ #(/geon1ActionRemoved)# + #(geon1ActionDeactivated)#:: +
Result
deactivated dictionary file
:: +
Result
cannot deactivate dictionary file: #[error]#
+ #(/geon1ActionDeactivated)# + #(geon1ActionActivated)#:: +
Result
activated dictionary file
:: +
Result
cannot activate dictionary file: #[error]#
+ #(/geon1ActionActivated)# +
+
+
Content
+
cities with a population > 100000 all over the world (the set is is reduced to cities > 100000)
+
+
#[geon2URL]#
+
+
#[geon2Storage]#
+
+
#(geon2Status)#
not loaded
::
loaded
::deactivated#(/geon2Status)#
+
Action
+
#(geon2Status)# + :: + + :: + + + #(/geon2Status)#
+ #(geon2ActionLoaded)#:: +
Result
loaded and activated dictionary file
:: +
Result
loading of dictionary file failed: #[error]#
+ #(/geon2ActionLoaded)# + #(geon2ActionRemoved)#:: +
Result
deactivated and removed dictionary file
:: +
Result
cannot remove dictionary file: #[error]#
+ #(/geon2ActionRemoved)# + #(geon2ActionDeactivated)#:: +
Result
deactivated dictionary file
:: +
Result
cannot deactivate dictionary file: #[error]#
+ #(/geon2ActionDeactivated)# + #(geon2ActionActivated)#:: +
Result
activated dictionary file
:: +
Result
cannot activate dictionary file: #[error]#
+ #(/geon2ActionActivated)# +

OpenGeoDB

With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.

diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index c5906fdb9..70ba014ee 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -68,7 +68,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); prop.put("geon0ActionLoaded", 1); @@ -98,11 +98,95 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Activate")) { LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0ActionActivated", 1); } + // GEON1 + if (post.containsKey("geon1Load")) { + // load from the net + try { + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); + final byte[] b = response.getContent(); + FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file()); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1)); + LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); + prop.put("geon1Status", LibraryProvider.Dictionary.GEON1.file().exists() ? 1 : 0); + prop.put("geon1ActionLoaded", 1); + } catch (final MalformedURLException e) { + Log.logException(e); + prop.put("geon1ActionLoaded", 2); + prop.put("geon1ActionLoaded_error", e.getMessage()); + } catch (final IOException e) { + Log.logException(e); + prop.put("geon1ActionLoaded", 2); + prop.put("geon1ActionLoaded_error", e.getMessage()); + } + } + + if (post.containsKey("geon1Remove")) { + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.fileDisabled()); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname); + prop.put("geon1ActionRemoved", 1); + } + + if (post.containsKey("geon1Deactivate")) { + LibraryProvider.Dictionary.GEON1.file().renameTo(LibraryProvider.Dictionary.GEON1.fileDisabled()); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname); + prop.put("geon1ActionDeactivated", 1); + } + + if (post.containsKey("geon1Activate")) { + LibraryProvider.Dictionary.GEON1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON1.file()); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1)); + LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); + prop.put("geon1ActionActivated", 1); + } + + // GEON2 + if (post.containsKey("geon2Load")) { + // load from the net + try { + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); + final byte[] b = response.getContent(); + FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file()); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000)); + LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); + prop.put("geon2Status", LibraryProvider.Dictionary.GEON2.file().exists() ? 1 : 0); + prop.put("geon2ActionLoaded", 1); + } catch (final MalformedURLException e) { + Log.logException(e); + prop.put("geon2ActionLoaded", 2); + prop.put("geon2ActionLoaded_error", e.getMessage()); + } catch (final IOException e) { + Log.logException(e); + prop.put("geon2ActionLoaded", 2); + prop.put("geon2ActionLoaded_error", e.getMessage()); + } + } + + if (post.containsKey("geon2Remove")) { + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.fileDisabled()); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname); + prop.put("geon2ActionRemoved", 1); + } + + if (post.containsKey("geon2Deactivate")) { + LibraryProvider.Dictionary.GEON2.file().renameTo(LibraryProvider.Dictionary.GEON2.fileDisabled()); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname); + prop.put("geon2ActionDeactivated", 1); + } + + if (post.containsKey("geon2Activate")) { + LibraryProvider.Dictionary.GEON2.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON2.file()); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000)); + LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); + prop.put("geon2ActionActivated", 1); + } + // GEO1 if (post.containsKey("geo1Load")) { // load from the net @@ -110,7 +194,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 57604d0aa..3239deb48 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -36,6 +36,7 @@ import java.util.regex.Pattern; import net.yacy.cora.storage.Files; import net.yacy.document.WordCache.Dictionary; +import net.yacy.document.geolocation.GeoLocation; import net.yacy.document.geolocation.Locations; public class Tagging { @@ -52,6 +53,39 @@ public class Tagging { private String predicate, namespace, objectspace; + /** + * helper class: Synonym and Objectlink tuple + */ + public static class SOTuple { + private final String synonyms; + private final String objectlink; + + public SOTuple(String synonyms, String objectlink) { + this.synonyms = synonyms; + this.objectlink = objectlink; + } + + public SOTuple(String[] synonyms, String objectlink) { + StringBuilder sb = new StringBuilder(synonyms.length * 10); + for (String s: synonyms) sb.append(',').append(s); + this.synonyms = sb.substring(1); + this.objectlink = objectlink; + } + + public String getSynonymsCSV() { + return this.synonyms; + } + + public String[] getSynonymsList() { + return this.synonyms.split(","); + } + + public String getObjectlink() { + return this.objectlink; + } + + } + public Tagging(String name) { this.navigatorName = name; this.synonym2term = new ConcurrentHashMap(); @@ -136,37 +170,104 @@ public class Tagging { } } - /** - * helper class: Synonym and Objectlink tuple - */ - public static class SOTuple { - private final String synonyms; - private final String objectlink; - - public SOTuple(String synonyms, String objectlink) { - this.synonyms = synonyms; - this.objectlink = objectlink; - } - - public SOTuple(String[] synonyms, String objectlink) { - StringBuilder sb = new StringBuilder(synonyms.length * 10); - for (String s: synonyms) sb.append(',').append(s); - this.synonyms = sb.substring(1); - this.objectlink = objectlink; + public Tagging(String name, Locations location) { + this(name); + Set locNames = location.locationNames(); + TreeSet geo; + GeoLocation g; + for (String loc: locNames) { + String syn = normalizeTerm(loc); + this.synonym2term.put(syn, loc); + this.term2synonym.put(loc, syn); + geo = location.find(loc, true); + if (geo.size() > 0) { + g = geo.iterator().next(); + this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16"); + } } + } - public String getSynonymsCSV() { - return this.synonyms; + public Tagging(String name, Dictionary dictionary) { + this(name); + Set words = dictionary.getWords(); + String s; + for (StringBuilder word: words) { + s = word.toString(); + this.synonym2term.put(s.toLowerCase(), s); + this.term2synonym.put(s, s.toLowerCase()); } + } - public String[] getSynonymsList() { - return this.synonyms.split(","); - } + public void init() throws IOException { + if (this.propFile == null) return; + this.synonym2term.clear(); + this.term2synonym.clear(); + this.term2objectlink.clear(); + this.synonym2synonyms.clear(); + this.namespace = DEFAULT_NAMESPACE; + this.predicate = this.namespace + this.navigatorName; + this.objectspace = null; - public String getObjectlink() { - return this.objectlink; + BlockingQueue list = Files.concurentLineReader(this.propFile, 1000); + String term, v; + String[] tags; + int p; + String line; + try { + vocloop: while ((line = list.take()) != Files.POISON_LINE) { + line = line.trim(); + p = line.indexOf('#'); + if (p >= 0) { + String comment = line.substring(p + 1).trim(); + if (comment.startsWith("namespace:")) { + this.namespace = comment.substring(10).trim(); + if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#"; + this.predicate = this.namespace + this.navigatorName; + continue vocloop; + } + if (comment.startsWith("objectspace:")) { + this.objectspace = comment.substring(12).trim(); + if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#"; + continue vocloop; + } + } + String[] pl = parseLine(line); + if (pl == null) { + continue vocloop; + } + if (pl[1] == null) { + term = normalizeKey(pl[0]); + v = normalizeTerm(pl[0]); + this.synonym2term.put(v, term); + this.term2synonym.put(term, v); + if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); + continue vocloop; + } + term = normalizeKey(pl[0]); + v = pl[1]; + tags = v.split(","); + Set synonyms = new HashSet(); + synonyms.add(term); + tagloop: for (String synonym: tags) { + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + synonym = normalizeTerm(synonym); + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + } + String synonym = normalizeTerm(term); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); + synonyms.add(synonym); + for (String s: synonyms) { + this.synonym2synonyms.put(s, synonyms); + } + } + } catch (InterruptedException e) { } - } public int size() { @@ -362,99 +463,6 @@ public class Tagging { return new String[]{line.substring(0, p), line.substring(p + 1), c}; } - public void init() throws IOException { - if (this.propFile == null) return; - this.synonym2term.clear(); - this.term2synonym.clear(); - this.term2objectlink.clear(); - this.synonym2synonyms.clear(); - this.namespace = DEFAULT_NAMESPACE; - this.predicate = this.namespace + this.navigatorName; - this.objectspace = null; - - BlockingQueue list = Files.concurentLineReader(this.propFile, 1000); - String term, v; - String[] tags; - int p; - String line; - try { - vocloop: while ((line = list.take()) != Files.POISON_LINE) { - line = line.trim(); - p = line.indexOf('#'); - if (p >= 0) { - String comment = line.substring(p + 1).trim(); - if (comment.startsWith("namespace:")) { - this.namespace = comment.substring(10).trim(); - if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#"; - this.predicate = this.namespace + this.navigatorName; - continue vocloop; - } - if (comment.startsWith("objectspace:")) { - this.objectspace = comment.substring(12).trim(); - if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#"; - continue vocloop; - } - } - String[] pl = parseLine(line); - if (pl == null) { - continue vocloop; - } - if (pl[1] == null) { - term = normalizeKey(pl[0]); - v = normalizeTerm(pl[0]); - this.synonym2term.put(v, term); - this.term2synonym.put(term, v); - if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); - continue vocloop; - } - term = normalizeKey(pl[0]); - v = pl[1]; - tags = v.split(","); - Set synonyms = new HashSet(); - synonyms.add(term); - tagloop: for (String synonym: tags) { - if (synonym.length() == 0) continue tagloop; - synonyms.add(synonym); - synonym = normalizeTerm(synonym); - if (synonym.length() == 0) continue tagloop; - synonyms.add(synonym); - this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); - } - String synonym = normalizeTerm(term); - this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); - if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); - synonyms.add(synonym); - for (String s: synonyms) { - this.synonym2synonyms.put(s, synonyms); - } - } - } catch (InterruptedException e) { - } - } - - public Tagging(String name, Locations localization) { - this(name); - Set locNames = localization.locationNames(); - for (String loc: locNames) { - String syn = normalizeTerm(loc); - this.synonym2term.put(syn, loc); - this.term2synonym.put(loc, syn); - } - } - - public Tagging(String name, Dictionary dictionary) { - this(name); - Set words = dictionary.getWords(); - String s; - for (StringBuilder word: words) { - s = word.toString(); - this.synonym2term.put(s.toLowerCase(), s); - this.term2synonym.put(s, s.toLowerCase()); - } - } - /** * get the predicate name which already contains the prefix url stub * @return diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index c0bf959f8..440d07147 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -75,6 +75,8 @@ public class LibraryProvider { "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ), GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ), GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ), + GEON1( "geon1", "http://download.geonames.org/export/dump/cities5000.zip" ), + GEON2( "geon2", "http://download.geonames.org/export/dump/cities15000.zip" ), DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ), PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" ); @@ -119,7 +121,9 @@ public class LibraryProvider { activateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); - integrateGeonames(); + integrateGeonames0(-1); + integrateGeonames1(-1); + integrateGeonames2(100000); activatePND(); Set allTags = new HashSet() ; allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity @@ -144,10 +148,24 @@ public class LibraryProvider { } } - public static void integrateGeonames() { + public static void integrateGeonames0(long minPopulation) { final File geon = Dictionary.GEON0.file(); if ( geon.exists() ) { - geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib)); + geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib, minPopulation)); + return; + } + } + public static void integrateGeonames1(long minPopulation) { + final File geon = Dictionary.GEON1.file(); + if ( geon.exists() ) { + geoLoc.activateLocation(Dictionary.GEON1.nickname, new GeonamesLocation(geon, dymLib, minPopulation)); + return; + } + } + public static void integrateGeonames2(long minPopulation) { + final File geon = Dictionary.GEON2.file(); + if ( geon.exists() ) { + geoLoc.activateLocation(Dictionary.GEON2.nickname, new GeonamesLocation(geon, dymLib, minPopulation)); return; } } @@ -296,13 +314,6 @@ public class LibraryProvider { InputStream derewoTxtEntry; try { final ZipFile zip = new ZipFile(file); - /* - final Enumeration i = zip.entries(); - while (i.hasMoreElements()) { - final ZipEntry e = i.nextElement(); - System.out.println("loadDeReWo: " + e.getName()); - } - */ derewoTxtEntry = zip.getInputStream(zip.getEntry("derewo-v-100000t-2009-04-30-0.1")); } catch ( final ZipException e ) { Log.logException(e); diff --git a/source/net/yacy/document/geolocation/GeonamesLocation.java b/source/net/yacy/document/geolocation/GeonamesLocation.java index f3f2d6ab2..5b5842b26 100644 --- a/source/net/yacy/document/geolocation/GeonamesLocation.java +++ b/source/net/yacy/document/geolocation/GeonamesLocation.java @@ -73,8 +73,7 @@ public class GeonamesLocation implements Locations private final Map id2loc; private final TreeMap> name2ids; private final File file; - - public GeonamesLocation(final File file, WordCache dymLib) { + public GeonamesLocation(final File file, WordCache dymLib, long minPopulation) { // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/ this.file = file; @@ -88,7 +87,9 @@ public class GeonamesLocation implements Locations BufferedReader reader; try { final ZipFile zf = new ZipFile(file); - final ZipEntry ze = zf.getEntry("cities1000.txt"); + String entryName = file.getName(); + entryName = entryName.substring(0, entryName.length() - 3) + "txt"; + final ZipEntry ze = zf.getEntry(entryName); final InputStream is = zf.getInputStream(ze); reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); } catch ( final IOException e ) { @@ -97,6 +98,28 @@ public class GeonamesLocation implements Locations } // when an error occurs after this line, just accept it and work on +/* parse this fields: +--------------------------------------------------- +00 geonameid : integer id of record in geonames database +01 name : name of geographical point (utf8) varchar(200) +02 asciiname : name of geographical point in plain ascii characters, varchar(200) +03 alternatenames : alternatenames, comma separated varchar(5000) +04 latitude : latitude in decimal degrees (wgs84) +05 longitude : longitude in decimal degrees (wgs84) +06 feature class : see http://www.geonames.org/export/codes.html, char(1) +07 feature code : see http://www.geonames.org/export/codes.html, varchar(10) +08 country code : ISO-3166 2-letter country code, 2 characters +09 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters +10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20) +11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80) +12 admin3 code : code for third level administrative division, varchar(20) +13 admin4 code : code for fourth level administrative division, varchar(20) +14 population : bigint (8 byte int) +15 elevation : in meters, integer +16 dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat. +17 timezone : the timezone id (see file timeZone.txt) varchar(40) +18 modification date : date of last modification in yyyy-MM-dd format +*/ try { String line; String[] fields; @@ -106,7 +129,9 @@ public class GeonamesLocation implements Locations continue; } fields = line.split("\t"); - final int id = Integer.parseInt(fields[0]); + final long population = Long.parseLong(fields[14]); + if (minPopulation > 0 && population < minPopulation) continue; + final int geonameid = Integer.parseInt(fields[0]); locnames = new HashSet(); locnames.add(new StringBuilder(fields[1])); locnames.add(new StringBuilder(fields[2])); @@ -116,7 +141,7 @@ public class GeonamesLocation implements Locations final GeoLocation c = new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]); c.setPopulation((int) Long.parseLong(fields[14])); - this.id2loc.put(id, c); + this.id2loc.put(geonameid, c); for ( final StringBuilder name : locnames ) { if (dymLib != null && dymLib.contains(name)) continue; if (name.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue; @@ -124,7 +149,7 @@ public class GeonamesLocation implements Locations if ( locs == null ) { locs = new ArrayList(1); } - locs.add(id); + locs.add(geonameid); this.name2ids.put(name, locs); } }