diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html
index fd48f6c6a..8d3d0fdfe 100644
--- a/htroot/DictionaryLoader_p.html
+++ b/htroot/DictionaryLoader_p.html
@@ -65,9 +65,11 @@
Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
GeoNames
- With this file it is possible to find cities with a population > 1000 all over the world.
+ With this file it is possible to find cities all over the world.
+ Content
+ cities with a population > 1000 all over the world
Download from
#[geon0URL]#
Storage location
@@ -99,6 +101,74 @@
Result cannot activate dictionary file: #[error]#
#(/geon0ActionActivated)#
+
+ Content
+ cities with a population > 5000 all over the world
+ Download from
+ #[geon1URL]#
+ Storage location
+ #[geon1Storage]#
+ Status
+ #(geon1Status)#not loaded
::loaded
::deactivated#(/geon1Status)#
+ Action
+ #(geon1Status)#
+ ::
+
+ ::
+
+
+ #(/geon1Status)#
+ #(geon1ActionLoaded)#::
+ Result loaded and activated dictionary file
::
+ Result loading of dictionary file failed: #[error]#
+ #(/geon1ActionLoaded)#
+ #(geon1ActionRemoved)#::
+ Result deactivated and removed dictionary file
::
+ Result cannot remove dictionary file: #[error]#
+ #(/geon1ActionRemoved)#
+ #(geon1ActionDeactivated)#::
+ Result deactivated dictionary file
::
+ Result cannot deactivate dictionary file: #[error]#
+ #(/geon1ActionDeactivated)#
+ #(geon1ActionActivated)#::
+ Result activated dictionary file
::
+ Result cannot activate dictionary file: #[error]#
+ #(/geon1ActionActivated)#
+
+
+ Content
+ cities with a population > 100000 all over the world (the set is is reduced to cities > 100000)
+ Download from
+ #[geon2URL]#
+ Storage location
+ #[geon2Storage]#
+ Status
+ #(geon2Status)#not loaded
::loaded
::deactivated#(/geon2Status)#
+ Action
+ #(geon2Status)#
+ ::
+
+ ::
+
+
+ #(/geon2Status)#
+ #(geon2ActionLoaded)#::
+ Result loaded and activated dictionary file
::
+ Result loading of dictionary file failed: #[error]#
+ #(/geon2ActionLoaded)#
+ #(geon2ActionRemoved)#::
+ Result deactivated and removed dictionary file
::
+ Result cannot remove dictionary file: #[error]#
+ #(/geon2ActionRemoved)#
+ #(geon2ActionDeactivated)#::
+ Result deactivated dictionary file
::
+ Result cannot deactivate dictionary file: #[error]#
+ #(/geon2ActionDeactivated)#
+ #(geon2ActionActivated)#::
+ Result activated dictionary file
::
+ Result cannot activate dictionary file: #[error]#
+ #(/geon2ActionActivated)#
+
OpenGeoDB
With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.
diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java
index c5906fdb9..70ba014ee 100644
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
- LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@@ -98,11 +98,95 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
- LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
+ // GEON1
+ if (post.containsKey("geon1Load")) {
+ // load from the net
+ try {
+ final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
+ final byte[] b = response.getContent();
+ FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
+ LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
+ prop.put("geon1Status", LibraryProvider.Dictionary.GEON1.file().exists() ? 1 : 0);
+ prop.put("geon1ActionLoaded", 1);
+ } catch (final MalformedURLException e) {
+ Log.logException(e);
+ prop.put("geon1ActionLoaded", 2);
+ prop.put("geon1ActionLoaded_error", e.getMessage());
+ } catch (final IOException e) {
+ Log.logException(e);
+ prop.put("geon1ActionLoaded", 2);
+ prop.put("geon1ActionLoaded_error", e.getMessage());
+ }
+ }
+
+ if (post.containsKey("geon1Remove")) {
+ FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.file());
+ FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.fileDisabled());
+ LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
+ prop.put("geon1ActionRemoved", 1);
+ }
+
+ if (post.containsKey("geon1Deactivate")) {
+ LibraryProvider.Dictionary.GEON1.file().renameTo(LibraryProvider.Dictionary.GEON1.fileDisabled());
+ LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
+ prop.put("geon1ActionDeactivated", 1);
+ }
+
+ if (post.containsKey("geon1Activate")) {
+ LibraryProvider.Dictionary.GEON1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON1.file());
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
+ LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
+ prop.put("geon1ActionActivated", 1);
+ }
+
+ // GEON2
+ if (post.containsKey("geon2Load")) {
+ // load from the net
+ try {
+ final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
+ final byte[] b = response.getContent();
+ FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
+ LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
+ prop.put("geon2Status", LibraryProvider.Dictionary.GEON2.file().exists() ? 1 : 0);
+ prop.put("geon2ActionLoaded", 1);
+ } catch (final MalformedURLException e) {
+ Log.logException(e);
+ prop.put("geon2ActionLoaded", 2);
+ prop.put("geon2ActionLoaded_error", e.getMessage());
+ } catch (final IOException e) {
+ Log.logException(e);
+ prop.put("geon2ActionLoaded", 2);
+ prop.put("geon2ActionLoaded_error", e.getMessage());
+ }
+ }
+
+ if (post.containsKey("geon2Remove")) {
+ FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.file());
+ FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.fileDisabled());
+ LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
+ prop.put("geon2ActionRemoved", 1);
+ }
+
+ if (post.containsKey("geon2Deactivate")) {
+ LibraryProvider.Dictionary.GEON2.file().renameTo(LibraryProvider.Dictionary.GEON2.fileDisabled());
+ LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
+ prop.put("geon2ActionDeactivated", 1);
+ }
+
+ if (post.containsKey("geon2Activate")) {
+ LibraryProvider.Dictionary.GEON2.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON2.file());
+ LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
+ LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
+ prop.put("geon2ActionActivated", 1);
+ }
+
// GEO1
if (post.containsKey("geo1Load")) {
// load from the net
@@ -110,7 +194,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
- LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
+ LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java
index 57604d0aa..3239deb48 100644
--- a/source/net/yacy/cora/lod/vocabulary/Tagging.java
+++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java
@@ -36,6 +36,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.storage.Files;
import net.yacy.document.WordCache.Dictionary;
+import net.yacy.document.geolocation.GeoLocation;
import net.yacy.document.geolocation.Locations;
public class Tagging {
@@ -52,6 +53,39 @@ public class Tagging {
private String predicate, namespace, objectspace;
+ /**
+ * helper class: Synonym and Objectlink tuple
+ */
+ public static class SOTuple {
+ private final String synonyms;
+ private final String objectlink;
+
+ public SOTuple(String synonyms, String objectlink) {
+ this.synonyms = synonyms;
+ this.objectlink = objectlink;
+ }
+
+ public SOTuple(String[] synonyms, String objectlink) {
+ StringBuilder sb = new StringBuilder(synonyms.length * 10);
+ for (String s: synonyms) sb.append(',').append(s);
+ this.synonyms = sb.substring(1);
+ this.objectlink = objectlink;
+ }
+
+ public String getSynonymsCSV() {
+ return this.synonyms;
+ }
+
+ public String[] getSynonymsList() {
+ return this.synonyms.split(",");
+ }
+
+ public String getObjectlink() {
+ return this.objectlink;
+ }
+
+ }
+
public Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap();
@@ -136,37 +170,104 @@ public class Tagging {
}
}
- /**
- * helper class: Synonym and Objectlink tuple
- */
- public static class SOTuple {
- private final String synonyms;
- private final String objectlink;
-
- public SOTuple(String synonyms, String objectlink) {
- this.synonyms = synonyms;
- this.objectlink = objectlink;
- }
-
- public SOTuple(String[] synonyms, String objectlink) {
- StringBuilder sb = new StringBuilder(synonyms.length * 10);
- for (String s: synonyms) sb.append(',').append(s);
- this.synonyms = sb.substring(1);
- this.objectlink = objectlink;
+ public Tagging(String name, Locations location) {
+ this(name);
+ Set locNames = location.locationNames();
+ TreeSet geo;
+ GeoLocation g;
+ for (String loc: locNames) {
+ String syn = normalizeTerm(loc);
+ this.synonym2term.put(syn, loc);
+ this.term2synonym.put(loc, syn);
+ geo = location.find(loc, true);
+ if (geo.size() > 0) {
+ g = geo.iterator().next();
+ this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16");
+ }
}
+ }
- public String getSynonymsCSV() {
- return this.synonyms;
+ public Tagging(String name, Dictionary dictionary) {
+ this(name);
+ Set words = dictionary.getWords();
+ String s;
+ for (StringBuilder word: words) {
+ s = word.toString();
+ this.synonym2term.put(s.toLowerCase(), s);
+ this.term2synonym.put(s, s.toLowerCase());
}
+ }
- public String[] getSynonymsList() {
- return this.synonyms.split(",");
- }
+ public void init() throws IOException {
+ if (this.propFile == null) return;
+ this.synonym2term.clear();
+ this.term2synonym.clear();
+ this.term2objectlink.clear();
+ this.synonym2synonyms.clear();
+ this.namespace = DEFAULT_NAMESPACE;
+ this.predicate = this.namespace + this.navigatorName;
+ this.objectspace = null;
- public String getObjectlink() {
- return this.objectlink;
+ BlockingQueue list = Files.concurentLineReader(this.propFile, 1000);
+ String term, v;
+ String[] tags;
+ int p;
+ String line;
+ try {
+ vocloop: while ((line = list.take()) != Files.POISON_LINE) {
+ line = line.trim();
+ p = line.indexOf('#');
+ if (p >= 0) {
+ String comment = line.substring(p + 1).trim();
+ if (comment.startsWith("namespace:")) {
+ this.namespace = comment.substring(10).trim();
+ if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
+ this.predicate = this.namespace + this.navigatorName;
+ continue vocloop;
+ }
+ if (comment.startsWith("objectspace:")) {
+ this.objectspace = comment.substring(12).trim();
+ if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
+ continue vocloop;
+ }
+ }
+ String[] pl = parseLine(line);
+ if (pl == null) {
+ continue vocloop;
+ }
+ if (pl[1] == null) {
+ term = normalizeKey(pl[0]);
+ v = normalizeTerm(pl[0]);
+ this.synonym2term.put(v, term);
+ this.term2synonym.put(term, v);
+ if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
+ continue vocloop;
+ }
+ term = normalizeKey(pl[0]);
+ v = pl[1];
+ tags = v.split(",");
+ Set synonyms = new HashSet();
+ synonyms.add(term);
+ tagloop: for (String synonym: tags) {
+ if (synonym.length() == 0) continue tagloop;
+ synonyms.add(synonym);
+ synonym = normalizeTerm(synonym);
+ if (synonym.length() == 0) continue tagloop;
+ synonyms.add(synonym);
+ this.synonym2term.put(synonym, term);
+ this.term2synonym.put(term, synonym);
+ }
+ String synonym = normalizeTerm(term);
+ this.synonym2term.put(synonym, term);
+ this.term2synonym.put(term, synonym);
+ if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
+ synonyms.add(synonym);
+ for (String s: synonyms) {
+ this.synonym2synonyms.put(s, synonyms);
+ }
+ }
+ } catch (InterruptedException e) {
}
-
}
public int size() {
@@ -362,99 +463,6 @@ public class Tagging {
return new String[]{line.substring(0, p), line.substring(p + 1), c};
}
- public void init() throws IOException {
- if (this.propFile == null) return;
- this.synonym2term.clear();
- this.term2synonym.clear();
- this.term2objectlink.clear();
- this.synonym2synonyms.clear();
- this.namespace = DEFAULT_NAMESPACE;
- this.predicate = this.namespace + this.navigatorName;
- this.objectspace = null;
-
- BlockingQueue list = Files.concurentLineReader(this.propFile, 1000);
- String term, v;
- String[] tags;
- int p;
- String line;
- try {
- vocloop: while ((line = list.take()) != Files.POISON_LINE) {
- line = line.trim();
- p = line.indexOf('#');
- if (p >= 0) {
- String comment = line.substring(p + 1).trim();
- if (comment.startsWith("namespace:")) {
- this.namespace = comment.substring(10).trim();
- if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
- this.predicate = this.namespace + this.navigatorName;
- continue vocloop;
- }
- if (comment.startsWith("objectspace:")) {
- this.objectspace = comment.substring(12).trim();
- if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
- continue vocloop;
- }
- }
- String[] pl = parseLine(line);
- if (pl == null) {
- continue vocloop;
- }
- if (pl[1] == null) {
- term = normalizeKey(pl[0]);
- v = normalizeTerm(pl[0]);
- this.synonym2term.put(v, term);
- this.term2synonym.put(term, v);
- if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
- continue vocloop;
- }
- term = normalizeKey(pl[0]);
- v = pl[1];
- tags = v.split(",");
- Set synonyms = new HashSet();
- synonyms.add(term);
- tagloop: for (String synonym: tags) {
- if (synonym.length() == 0) continue tagloop;
- synonyms.add(synonym);
- synonym = normalizeTerm(synonym);
- if (synonym.length() == 0) continue tagloop;
- synonyms.add(synonym);
- this.synonym2term.put(synonym, term);
- this.term2synonym.put(term, synonym);
- }
- String synonym = normalizeTerm(term);
- this.synonym2term.put(synonym, term);
- this.term2synonym.put(term, synonym);
- if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
- synonyms.add(synonym);
- for (String s: synonyms) {
- this.synonym2synonyms.put(s, synonyms);
- }
- }
- } catch (InterruptedException e) {
- }
- }
-
- public Tagging(String name, Locations localization) {
- this(name);
- Set locNames = localization.locationNames();
- for (String loc: locNames) {
- String syn = normalizeTerm(loc);
- this.synonym2term.put(syn, loc);
- this.term2synonym.put(loc, syn);
- }
- }
-
- public Tagging(String name, Dictionary dictionary) {
- this(name);
- Set words = dictionary.getWords();
- String s;
- for (StringBuilder word: words) {
- s = word.toString();
- this.synonym2term.put(s.toLowerCase(), s);
- this.term2synonym.put(s, s.toLowerCase());
- }
- }
-
/**
* get the predicate name which already contains the prefix url stub
* @return
diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java
index c0bf959f8..440d07147 100644
--- a/source/net/yacy/document/LibraryProvider.java
+++ b/source/net/yacy/document/LibraryProvider.java
@@ -75,6 +75,8 @@ public class LibraryProvider {
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ),
GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ),
GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ),
+ GEON1( "geon1", "http://download.geonames.org/export/dump/cities5000.zip" ),
+ GEON2( "geon2", "http://download.geonames.org/export/dump/cities15000.zip" ),
DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ),
PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" );
@@ -119,7 +121,9 @@ public class LibraryProvider {
activateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
- integrateGeonames();
+ integrateGeonames0(-1);
+ integrateGeonames1(-1);
+ integrateGeonames2(100000);
activatePND();
Set allTags = new HashSet() ;
allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity
@@ -144,10 +148,24 @@ public class LibraryProvider {
}
}
- public static void integrateGeonames() {
+ public static void integrateGeonames0(long minPopulation) {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
- geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib));
+ geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
+ return;
+ }
+ }
+ public static void integrateGeonames1(long minPopulation) {
+ final File geon = Dictionary.GEON1.file();
+ if ( geon.exists() ) {
+ geoLoc.activateLocation(Dictionary.GEON1.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
+ return;
+ }
+ }
+ public static void integrateGeonames2(long minPopulation) {
+ final File geon = Dictionary.GEON2.file();
+ if ( geon.exists() ) {
+ geoLoc.activateLocation(Dictionary.GEON2.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
@@ -296,13 +314,6 @@ public class LibraryProvider {
InputStream derewoTxtEntry;
try {
final ZipFile zip = new ZipFile(file);
- /*
- final Enumeration extends ZipEntry> i = zip.entries();
- while (i.hasMoreElements()) {
- final ZipEntry e = i.nextElement();
- System.out.println("loadDeReWo: " + e.getName());
- }
- */
derewoTxtEntry = zip.getInputStream(zip.getEntry("derewo-v-100000t-2009-04-30-0.1"));
} catch ( final ZipException e ) {
Log.logException(e);
diff --git a/source/net/yacy/document/geolocation/GeonamesLocation.java b/source/net/yacy/document/geolocation/GeonamesLocation.java
index f3f2d6ab2..5b5842b26 100644
--- a/source/net/yacy/document/geolocation/GeonamesLocation.java
+++ b/source/net/yacy/document/geolocation/GeonamesLocation.java
@@ -73,8 +73,7 @@ public class GeonamesLocation implements Locations
private final Map id2loc;
private final TreeMap> name2ids;
private final File file;
-
- public GeonamesLocation(final File file, WordCache dymLib) {
+ public GeonamesLocation(final File file, WordCache dymLib, long minPopulation) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
@@ -88,7 +87,9 @@ public class GeonamesLocation implements Locations
BufferedReader reader;
try {
final ZipFile zf = new ZipFile(file);
- final ZipEntry ze = zf.getEntry("cities1000.txt");
+ String entryName = file.getName();
+ entryName = entryName.substring(0, entryName.length() - 3) + "txt";
+ final ZipEntry ze = zf.getEntry(entryName);
final InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch ( final IOException e ) {
@@ -97,6 +98,28 @@ public class GeonamesLocation implements Locations
}
// when an error occurs after this line, just accept it and work on
+/* parse this fields:
+---------------------------------------------------
+00 geonameid : integer id of record in geonames database
+01 name : name of geographical point (utf8) varchar(200)
+02 asciiname : name of geographical point in plain ascii characters, varchar(200)
+03 alternatenames : alternatenames, comma separated varchar(5000)
+04 latitude : latitude in decimal degrees (wgs84)
+05 longitude : longitude in decimal degrees (wgs84)
+06 feature class : see http://www.geonames.org/export/codes.html, char(1)
+07 feature code : see http://www.geonames.org/export/codes.html, varchar(10)
+08 country code : ISO-3166 2-letter country code, 2 characters
+09 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
+10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
+11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
+12 admin3 code : code for third level administrative division, varchar(20)
+13 admin4 code : code for fourth level administrative division, varchar(20)
+14 population : bigint (8 byte int)
+15 elevation : in meters, integer
+16 dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
+17 timezone : the timezone id (see file timeZone.txt) varchar(40)
+18 modification date : date of last modification in yyyy-MM-dd format
+*/
try {
String line;
String[] fields;
@@ -106,7 +129,9 @@ public class GeonamesLocation implements Locations
continue;
}
fields = line.split("\t");
- final int id = Integer.parseInt(fields[0]);
+ final long population = Long.parseLong(fields[14]);
+ if (minPopulation > 0 && population < minPopulation) continue;
+ final int geonameid = Integer.parseInt(fields[0]);
locnames = new HashSet();
locnames.add(new StringBuilder(fields[1]));
locnames.add(new StringBuilder(fields[2]));
@@ -116,7 +141,7 @@ public class GeonamesLocation implements Locations
final GeoLocation c =
new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]);
c.setPopulation((int) Long.parseLong(fields[14]));
- this.id2loc.put(id, c);
+ this.id2loc.put(geonameid, c);
for ( final StringBuilder name : locnames ) {
if (dymLib != null && dymLib.contains(name)) continue;
if (name.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue;
@@ -124,7 +149,7 @@ public class GeonamesLocation implements Locations
if ( locs == null ) {
locs = new ArrayList(1);
}
- locs.add(id);
+ locs.add(geonameid);
this.name2ids.put(name, locs);
}
}