integrate also geonames library files for less cities. these are more

useful for tagging since less normal words are false-identified as
location
pull/1/head
Michael Peter Christen 13 years ago
parent 5a41e739b4
commit 24bbe359ca

@ -65,9 +65,11 @@
Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
<h4>GeoNames</h4>
<p>With this file it is possible to find cities with a population > 1000 all over the world.</p>
<p>With this file it is possible to find cities all over the world.</p>
<dl>
<dt>Content</dt>
<dd>cities with a population > 1000 all over the world</dd>
<dt><label>Download from</label></dt>
<dd>#[geon0URL]#</dd>
<dt><label>Storage location</label></dt>
@ -99,6 +101,74 @@
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon0ActionActivated)#
</dl>
<dl>
<dt>Content</dt>
<dd>cities with a population > 5000 all over the world</dd>
<dt><label>Download from</label></dt>
<dd>#[geon1URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon1Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon1Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::deactivated#(/geon1Status)#</dd>
<dt>Action</dt>
<dd>#(geon1Status)#
<input type="submit" name="geon1Load" value="Load" />::
<input type="submit" name="geon1Deactivate" value="Deactivate" />
<input type="submit" name="geon1Remove" value="Remove" />::
<input type="submit" name="geon1Activate" value="Activate" />
<input type="submit" name="geon1Remove" value="Remove" />
#(/geon1Status)#</dd>
#(geon1ActionLoaded)#::
<dt>Result</dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon1ActionLoaded)#
#(geon1ActionRemoved)#::
<dt>Result</dt><dd><div class="commit">deactivated and removed dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon1ActionRemoved)#
#(geon1ActionDeactivated)#::
<dt>Result</dt><dd><div class="commit">deactivated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot deactivate dictionary file: #[error]#</div></dd>
#(/geon1ActionDeactivated)#
#(geon1ActionActivated)#::
<dt>Result</dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon1ActionActivated)#
</dl>
<dl>
<dt>Content</dt>
<dd>cities with a population > 100000 all over the world (the set is is reduced to cities > 100000)</dd>
<dt><label>Download from</label></dt>
<dd>#[geon2URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon2Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon2Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::deactivated#(/geon2Status)#</dd>
<dt>Action</dt>
<dd>#(geon2Status)#
<input type="submit" name="geon2Load" value="Load" />::
<input type="submit" name="geon2Deactivate" value="Deactivate" />
<input type="submit" name="geon2Remove" value="Remove" />::
<input type="submit" name="geon2Activate" value="Activate" />
<input type="submit" name="geon2Remove" value="Remove" />
#(/geon2Status)#</dd>
#(geon2ActionLoaded)#::
<dt>Result</dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon2ActionLoaded)#
#(geon2ActionRemoved)#::
<dt>Result</dt><dd><div class="commit">deactivated and removed dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon2ActionRemoved)#
#(geon2ActionDeactivated)#::
<dt>Result</dt><dd><div class="commit">deactivated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot deactivate dictionary file: #[error]#</div></dd>
#(/geon2ActionDeactivated)#
#(geon2ActionActivated)#::
<dt>Result</dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon2ActionActivated)#
</dl>
<h4>OpenGeoDB</h4>
<p>With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.</p>

@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@ -98,11 +98,95 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
// GEON1
if (post.containsKey("geon1Load")) {
// load from the net
try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon1Status", LibraryProvider.Dictionary.GEON1.file().exists() ? 1 : 0);
prop.put("geon1ActionLoaded", 1);
} catch (final MalformedURLException e) {
Log.logException(e);
prop.put("geon1ActionLoaded", 2);
prop.put("geon1ActionLoaded_error", e.getMessage());
} catch (final IOException e) {
Log.logException(e);
prop.put("geon1ActionLoaded", 2);
prop.put("geon1ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon1Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
prop.put("geon1ActionRemoved", 1);
}
if (post.containsKey("geon1Deactivate")) {
LibraryProvider.Dictionary.GEON1.file().renameTo(LibraryProvider.Dictionary.GEON1.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
prop.put("geon1ActionDeactivated", 1);
}
if (post.containsKey("geon1Activate")) {
LibraryProvider.Dictionary.GEON1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON1.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon1ActionActivated", 1);
}
// GEON2
if (post.containsKey("geon2Load")) {
// load from the net
try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon2Status", LibraryProvider.Dictionary.GEON2.file().exists() ? 1 : 0);
prop.put("geon2ActionLoaded", 1);
} catch (final MalformedURLException e) {
Log.logException(e);
prop.put("geon2ActionLoaded", 2);
prop.put("geon2ActionLoaded_error", e.getMessage());
} catch (final IOException e) {
Log.logException(e);
prop.put("geon2ActionLoaded", 2);
prop.put("geon2ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon2Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
prop.put("geon2ActionRemoved", 1);
}
if (post.containsKey("geon2Deactivate")) {
LibraryProvider.Dictionary.GEON2.file().renameTo(LibraryProvider.Dictionary.GEON2.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
prop.put("geon2ActionDeactivated", 1);
}
if (post.containsKey("geon2Activate")) {
LibraryProvider.Dictionary.GEON2.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON2.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon2ActionActivated", 1);
}
// GEO1
if (post.containsKey("geo1Load")) {
// load from the net
@ -110,7 +194,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);

@ -36,6 +36,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.storage.Files;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.document.geolocation.Locations;
public class Tagging {
@ -52,6 +53,39 @@ public class Tagging {
private String predicate, namespace, objectspace;
/**
* helper class: Synonym and Objectlink tuple
*/
public static class SOTuple {
private final String synonyms;
private final String objectlink;
public SOTuple(String synonyms, String objectlink) {
this.synonyms = synonyms;
this.objectlink = objectlink;
}
public SOTuple(String[] synonyms, String objectlink) {
StringBuilder sb = new StringBuilder(synonyms.length * 10);
for (String s: synonyms) sb.append(',').append(s);
this.synonyms = sb.substring(1);
this.objectlink = objectlink;
}
public String getSynonymsCSV() {
return this.synonyms;
}
public String[] getSynonymsList() {
return this.synonyms.split(",");
}
public String getObjectlink() {
return this.objectlink;
}
}
public Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap<String, String>();
@ -136,37 +170,104 @@ public class Tagging {
}
}
/**
* helper class: Synonym and Objectlink tuple
*/
public static class SOTuple {
private final String synonyms;
private final String objectlink;
public SOTuple(String synonyms, String objectlink) {
this.synonyms = synonyms;
this.objectlink = objectlink;
}
public SOTuple(String[] synonyms, String objectlink) {
StringBuilder sb = new StringBuilder(synonyms.length * 10);
for (String s: synonyms) sb.append(',').append(s);
this.synonyms = sb.substring(1);
this.objectlink = objectlink;
public Tagging(String name, Locations location) {
this(name);
Set<String> locNames = location.locationNames();
TreeSet<GeoLocation> geo;
GeoLocation g;
for (String loc: locNames) {
String syn = normalizeTerm(loc);
this.synonym2term.put(syn, loc);
this.term2synonym.put(loc, syn);
geo = location.find(loc, true);
if (geo.size() > 0) {
g = geo.iterator().next();
this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16");
}
}
}
public String getSynonymsCSV() {
return this.synonyms;
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
for (StringBuilder word: words) {
s = word.toString();
this.synonym2term.put(s.toLowerCase(), s);
this.term2synonym.put(s, s.toLowerCase());
}
}
public String[] getSynonymsList() {
return this.synonyms.split(",");
}
public void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.synonym2synonyms.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
this.objectspace = null;
public String getObjectlink() {
return this.objectlink;
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
String term, v;
String[] tags;
int p;
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
line = line.trim();
p = line.indexOf('#');
if (p >= 0) {
String comment = line.substring(p + 1).trim();
if (comment.startsWith("namespace:")) {
this.namespace = comment.substring(10).trim();
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
this.predicate = this.namespace + this.navigatorName;
continue vocloop;
}
if (comment.startsWith("objectspace:")) {
this.objectspace = comment.substring(12).trim();
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
continue vocloop;
}
}
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
continue vocloop;
}
term = normalizeKey(pl[0]);
v = pl[1];
tags = v.split(",");
Set<String> synonyms = new HashSet<String>();
synonyms.add(term);
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
synonyms.add(synonym);
for (String s: synonyms) {
this.synonym2synonyms.put(s, synonyms);
}
}
} catch (InterruptedException e) {
}
}
public int size() {
@ -362,99 +463,6 @@ public class Tagging {
return new String[]{line.substring(0, p), line.substring(p + 1), c};
}
public void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.synonym2synonyms.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
this.objectspace = null;
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
String term, v;
String[] tags;
int p;
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
line = line.trim();
p = line.indexOf('#');
if (p >= 0) {
String comment = line.substring(p + 1).trim();
if (comment.startsWith("namespace:")) {
this.namespace = comment.substring(10).trim();
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
this.predicate = this.namespace + this.navigatorName;
continue vocloop;
}
if (comment.startsWith("objectspace:")) {
this.objectspace = comment.substring(12).trim();
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
continue vocloop;
}
}
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
continue vocloop;
}
term = normalizeKey(pl[0]);
v = pl[1];
tags = v.split(",");
Set<String> synonyms = new HashSet<String>();
synonyms.add(term);
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
synonyms.add(synonym);
for (String s: synonyms) {
this.synonym2synonyms.put(s, synonyms);
}
}
} catch (InterruptedException e) {
}
}
public Tagging(String name, Locations localization) {
this(name);
Set<String> locNames = localization.locationNames();
for (String loc: locNames) {
String syn = normalizeTerm(loc);
this.synonym2term.put(syn, loc);
this.term2synonym.put(loc, syn);
}
}
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
for (StringBuilder word: words) {
s = word.toString();
this.synonym2term.put(s.toLowerCase(), s);
this.term2synonym.put(s, s.toLowerCase());
}
}
/**
* get the predicate name which already contains the prefix url stub
* @return

@ -75,6 +75,8 @@ public class LibraryProvider {
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ),
GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ),
GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ),
GEON1( "geon1", "http://download.geonames.org/export/dump/cities5000.zip" ),
GEON2( "geon2", "http://download.geonames.org/export/dump/cities15000.zip" ),
DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ),
PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" );
@ -119,7 +121,9 @@ public class LibraryProvider {
activateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
integrateGeonames();
integrateGeonames0(-1);
integrateGeonames1(-1);
integrateGeonames2(100000);
activatePND();
Set<String> allTags = new HashSet<String>() ;
allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity
@ -144,10 +148,24 @@ public class LibraryProvider {
}
}
public static void integrateGeonames() {
public static void integrateGeonames0(long minPopulation) {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib));
geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
public static void integrateGeonames1(long minPopulation) {
final File geon = Dictionary.GEON1.file();
if ( geon.exists() ) {
geoLoc.activateLocation(Dictionary.GEON1.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
public static void integrateGeonames2(long minPopulation) {
final File geon = Dictionary.GEON2.file();
if ( geon.exists() ) {
geoLoc.activateLocation(Dictionary.GEON2.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
@ -296,13 +314,6 @@ public class LibraryProvider {
InputStream derewoTxtEntry;
try {
final ZipFile zip = new ZipFile(file);
/*
final Enumeration<? extends ZipEntry> i = zip.entries();
while (i.hasMoreElements()) {
final ZipEntry e = i.nextElement();
System.out.println("loadDeReWo: " + e.getName());
}
*/
derewoTxtEntry = zip.getInputStream(zip.getEntry("derewo-v-100000t-2009-04-30-0.1"));
} catch ( final ZipException e ) {
Log.logException(e);

@ -73,8 +73,7 @@ public class GeonamesLocation implements Locations
private final Map<Integer, GeoLocation> id2loc;
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file;
public GeonamesLocation(final File file, WordCache dymLib) {
public GeonamesLocation(final File file, WordCache dymLib, long minPopulation) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
@ -88,7 +87,9 @@ public class GeonamesLocation implements Locations
BufferedReader reader;
try {
final ZipFile zf = new ZipFile(file);
final ZipEntry ze = zf.getEntry("cities1000.txt");
String entryName = file.getName();
entryName = entryName.substring(0, entryName.length() - 3) + "txt";
final ZipEntry ze = zf.getEntry(entryName);
final InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch ( final IOException e ) {
@ -97,6 +98,28 @@ public class GeonamesLocation implements Locations
}
// when an error occurs after this line, just accept it and work on
/* parse this fields:
---------------------------------------------------
00 geonameid : integer id of record in geonames database
01 name : name of geographical point (utf8) varchar(200)
02 asciiname : name of geographical point in plain ascii characters, varchar(200)
03 alternatenames : alternatenames, comma separated varchar(5000)
04 latitude : latitude in decimal degrees (wgs84)
05 longitude : longitude in decimal degrees (wgs84)
06 feature class : see http://www.geonames.org/export/codes.html, char(1)
07 feature code : see http://www.geonames.org/export/codes.html, varchar(10)
08 country code : ISO-3166 2-letter country code, 2 characters
09 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
12 admin3 code : code for third level administrative division, varchar(20)
13 admin4 code : code for fourth level administrative division, varchar(20)
14 population : bigint (8 byte int)
15 elevation : in meters, integer
16 dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
17 timezone : the timezone id (see file timeZone.txt) varchar(40)
18 modification date : date of last modification in yyyy-MM-dd format
*/
try {
String line;
String[] fields;
@ -106,7 +129,9 @@ public class GeonamesLocation implements Locations
continue;
}
fields = line.split("\t");
final int id = Integer.parseInt(fields[0]);
final long population = Long.parseLong(fields[14]);
if (minPopulation > 0 && population < minPopulation) continue;
final int geonameid = Integer.parseInt(fields[0]);
locnames = new HashSet<StringBuilder>();
locnames.add(new StringBuilder(fields[1]));
locnames.add(new StringBuilder(fields[2]));
@ -116,7 +141,7 @@ public class GeonamesLocation implements Locations
final GeoLocation c =
new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]);
c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c);
this.id2loc.put(geonameid, c);
for ( final StringBuilder name : locnames ) {
if (dymLib != null && dymLib.contains(name)) continue;
if (name.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue;
@ -124,7 +149,7 @@ public class GeonamesLocation implements Locations
if ( locs == null ) {
locs = new ArrayList<Integer>(1);
}
locs.add(id);
locs.add(geonameid);
this.name2ids.put(name, locs);
}
}

Loading…
Cancel
Save