added another geolocalization data source: GeoNames

- added downloader option in DictionaryLoader
- added generalization (interfaces and overarching localization)
- more abstraction using the libraries

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6879 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent c9862e0ca9
commit e43e61e502

@ -1,4 +1,4 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"> <html xmlns="http://www.w3.org/1999/xhtml">
<head> <head>
<title>YaCy '#[clientname]#': Dictionary Loader</title> <title>YaCy '#[clientname]#': Dictionary Loader</title>
@ -18,8 +18,46 @@
<form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data"> <form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data">
<fieldset> <fieldset>
<legend>Geolocalization</legend> <legend>Geolocalization</legend>
The geolocalization file will enable YaCy to present locations from OpenStreetMap according to given search words. Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
With this file it is possible to find locations using the location (city) name, a zip code, a car sign or a telephone pre-dial number.
<h4>GeoNames</h4>
<p>With this file it is possible to find cities with a population > 1000 all over the world.</p>
<dl>
<dt><label>Download from</label></dt>
<dd>#[geon0URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon0Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon0Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::de-activated#(/geon0Status)#</dd>
<dt></dt>
<dd>#(geon0Status)#
<input type="submit" name="geon0Load" value="Load" />::
<input type="submit" name="geon0Deactivate" value="de-Activate" />
<input type="submit" name="geon0Remove" value="Remove" />::
<input type="submit" name="geon0Activate" value="Activate" />
<input type="submit" name="geon0Remove" value="Remove" />
#(/geon0Status)#</dd>
#(geon0ActionLoaded)#::
<dt></dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon0ActionLoaded)#
#(geon0ActionRemoved)#::
<dt></dt><dd><div class="commit">de-activated and removed dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon0ActionRemoved)#
#(geon0ActionDeactivated)#::
<dt></dt><dd><div class="commit">de-activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot de-activate dictionary file: #[error]#</div></dd>
#(/geon0ActionDeactivated)#
#(geon0ActionActivated)#::
<dt></dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon0ActionActivated)#
</dl>
<h4>OpenGeoDB</h4>
<p>With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.</p>
<dl> <dl>
<dt><label>Download from</label></dt> <dt><label>Download from</label></dt>

@ -21,7 +21,8 @@
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import net.yacy.document.geolocalization.OpenGeoDB; import net.yacy.document.geolocalization.GeonamesLocalization;
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
@ -58,15 +59,56 @@ public class DictionaryLoader_p {
if (post == null) return prop; if (post == null) return prop;
// GEON0
if (post.containsKey("geon0Load")) {
// load from the net
try {
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
} catch (MalformedURLException e) {
Log.logException(e);
prop.put("geon0ActionLoaded", 2);
prop.put("geon0ActionLoaded_error", e.getMessage());
} catch (IOException e) {
Log.logException(e);
prop.put("geon0ActionLoaded", 2);
prop.put("geon0ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon0Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionRemoved", 1);
}
if (post.containsKey("geon0Deactivate")) {
LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionDeactivated", 1);
}
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
prop.put("geon0ActionActivated", 1);
}
// GEO1 // GEO1
if (post.containsKey("geo1Load")) { if (post.containsKey("geo1Load")) {
// load from the net // load from the net
try { try {
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE); Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
byte[] b = response.getContent(); byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEO1.file()); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false); LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
prop.put("geo1Status", LibraryProvider.Dictionary.GEO1.file().exists() ? 1 : 0); LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1); prop.put("geo1ActionLoaded", 1);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
Log.logException(e); Log.logException(e);
@ -80,25 +122,24 @@ public class DictionaryLoader_p {
} }
if (post.containsKey("geo1Remove")) { if (post.containsKey("geo1Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.fileDisabled()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoDB = new OpenGeoDB(null, true); LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionRemoved", 1); prop.put("geo1ActionRemoved", 1);
} }
if (post.containsKey("geo1Deactivate")) { if (post.containsKey("geo1Deactivate")) {
LibraryProvider.Dictionary.GEO1.file().renameTo(LibraryProvider.Dictionary.GEO1.fileDisabled()); LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoDB = new OpenGeoDB(null, true); LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionDeactivated", 1); prop.put("geo1ActionDeactivated", 1);
} }
if (post.containsKey("geo1Activate")) { if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEO1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO1.file()); LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false); LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
prop.put("geo1ActionActivated", 1); prop.put("geo1ActionActivated", 1);
} }
// check status again // check status again
for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);

@ -151,7 +151,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
</div> </div>
#{/loc}# #{/loc}#
</p> </p>
<p class="urlinfo" style="clear:left;">Geographic information provided by <a href="http://opengeodb.hoppe-media.com">OpenGeoDB</a>, Map provided by <a href="http://www.openstreetmap.org">OpenStreetMap</a></p> <p class="urlinfo" style="clear:left;">Map (c) by <a href="http://www.openstreetmap.org">OpenStreetMap</a> and contributors, CC-BY-SA</p>
</div> </div>
#(/geoinfo)# #(/geoinfo)#

@ -560,7 +560,7 @@ public class yacysearch {
} }
// find geographic info // find geographic info
Set<Location> coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true); Set<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if (coordinates == null || coordinates.isEmpty() || offset > 0) { if (coordinates == null || coordinates.isEmpty() || offset > 0) {
prop.put("geoinfo", "0"); prop.put("geoinfo", "0");
} else { } else {

@ -69,7 +69,7 @@ public class yacysearch_location {
String subject = ""; String subject = "";
for (String s: message.getSubject()) subject += " " + s; for (String s: message.getSubject()) subject += " " + s;
words += subject; words += subject;
for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false)); for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoLoc.find(word, true));
String locnames = ""; String locnames = "";
for (Location location: locations) locnames += ", " + location.getName(); for (Location location: locations) locnames += ", " + location.getName();

@ -221,7 +221,7 @@ public class DidYouMean {
public void test(final String s) throws InterruptedException { public void test(final String s) throws InterruptedException {
Set<String> libr = LibraryProvider.dymLib.recommend(s); Set<String> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoDB.recommend(s)); libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) createGen = false; if (!libr.isEmpty()) createGen = false;
for (final String t: libr) { for (final String t: libr) {
guessLib.put(t); guessLib.put(t);

@ -1,28 +1,24 @@
// LibraryProvider.java /**
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * LibraryProvider.java
// first published 01.10.2009 on http://yacy.net * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// * first published 01.10.2009 on http://yacy.net
// This is a part of YaCy *
// * This file is part of YaCy Content Integration
// $LastChangedDate$ *
// $LastChangedRevision$ * This program is free software: you can redistribute it and/or modify
// $LastChangedBy$ * it under the terms of the GNU Lesser General Public License as published by
// * the Free Software Foundation, either version 3 of the License, or
// LICENSE * (at your option) any later version.
// *
// This program is free software; you can redistribute it and/or modify * This program is distributed in the hope that it will be useful,
// it under the terms of the GNU General Public License as published by * but WITHOUT ANY WARRANTY; without even the implied warranty of
// the Free Software Foundation; either version 2 of the License, or * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// (at your option) any later version. * GNU Lesser General Public License for more details.
// *
// This program is distributed in the hope that it will be useful, * You should have received a copy of the GNU Lesser General Public License
// but WITHOUT ANY WARRANTY; without even the implied warranty of * along with this program in the file COPYING.LESSER.
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * If not, see <http://www.gnu.org/licenses/>.
// GNU General Public License for more details. */
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data; package de.anomic.data;
@ -39,7 +35,9 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import net.yacy.document.geolocalization.OpenGeoDB; import net.yacy.document.geolocalization.GeonamesLocalization;
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
import net.yacy.document.geolocalization.OverarchingLocalization;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
public class LibraryProvider { public class LibraryProvider {
@ -50,17 +48,20 @@ public class LibraryProvider {
public static final String disabledExtension = ".disabled"; public static final String disabledExtension = ".disabled";
public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null); public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
public static OpenGeoDB geoDB = new OpenGeoDB(null, true); public static OverarchingLocalization geoLoc = new OverarchingLocalization();
private static File dictSource = null; private static File dictSource = null;
private static File dictRoot = null; private static File dictRoot = null;
public static enum Dictionary { public static enum Dictionary {
GEO0("geo0", GEODB0("geo0",
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz", "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz",
"opengeodb-0.2.5a-UTF8-sql.gz"), "opengeodb-0.2.5a-UTF8-sql.gz"),
GEO1("geo1", GEODB1("geo1",
"http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz",
"opengeodb-02621_2010-03-16.sql.gz"); "opengeodb-02621_2010-03-16.sql.gz"),
GEON0("geon0",
"http://download.geonames.org/export/dump/cities1000.zip",
"cities1000.zip");
public String nickname, url, filename; public String nickname, url, filename;
private Dictionary(String nickname, String url, String filename) { private Dictionary(String nickname, String url, String filename) {
@ -95,18 +96,27 @@ public class LibraryProvider {
integrateDeReWo(); integrateDeReWo();
initDidYouMean(); initDidYouMean();
integrateOpenGeoDB(); integrateOpenGeoDB();
integrateGeonames();
} }
public static void integrateOpenGeoDB() { public static void integrateOpenGeoDB() {
File geo1 = Dictionary.GEO1.file(); File geo1 = Dictionary.GEODB1.file();
File geo0 = Dictionary.GEO0.file(); File geo0 = Dictionary.GEODB0.file();
if (geo1.exists()) { if (geo1.exists()) {
if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled()); if (geo0.exists()) geo0.renameTo(Dictionary.GEODB0.fileDisabled());
geoDB = new OpenGeoDB(geo1, false); geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(geo1, false));
return; return;
} }
if (geo0.exists()) { if (geo0.exists()) {
geoDB = new OpenGeoDB(geo0, true); geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocalization(geo0, false));
return;
}
}
public static void integrateGeonames() {
File geon = Dictionary.GEON0.file();
if (geon.exists()) {
geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocalization(geon));
return; return;
} }
} }

@ -53,7 +53,6 @@ import java.util.Iterator;
import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.table.Table; import net.yacy.kelondro.table.Table;

@ -1,28 +1,24 @@
// Coordinates.java /**
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * Coordinates.java
// first published 04.10.2009 on http://yacy.net * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// * first published 04.10.2009 on http://yacy.net
// This is a part of YaCy *
// * This file is part of YaCy Content Integration
// $LastChangedDate$ *
// $LastChangedRevision$ * This program is free software: you can redistribute it and/or modify
// $LastChangedBy$ * it under the terms of the GNU Lesser General Public License as published by
// * the Free Software Foundation, either version 3 of the License, or
// LICENSE * (at your option) any later version.
// *
// This program is free software; you can redistribute it and/or modify * This program is distributed in the hope that it will be useful,
// it under the terms of the GNU General Public License as published by * but WITHOUT ANY WARRANTY; without even the implied warranty of
// the Free Software Foundation; either version 2 of the License, or * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// (at your option) any later version. * GNU Lesser General Public License for more details.
// *
// This program is distributed in the hope that it will be useful, * You should have received a copy of the GNU Lesser General Public License
// but WITHOUT ANY WARRANTY; without even the implied warranty of * along with this program in the file COPYING.LESSER.
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * If not, see <http://www.gnu.org/licenses/>.
// GNU General Public License for more details. */
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document.geolocalization; package net.yacy.document.geolocalization;

@ -0,0 +1,169 @@
/**
* GeonamesLocalization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.Collator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.yacy.kelondro.logging.Log;
public class GeonamesLocalization implements Localization {
/*
The main 'geoname' table has the following fields :
---------------------------------------------------
geonameid : integer id of record in geonames database
name : name of geographical point (utf8) varchar(200)
asciiname : name of geographical point in plain ascii characters, varchar(200)
alternatenames : alternatenames, comma separated varchar(5000)
latitude : latitude in decimal degrees (wgs84)
longitude : longitude in decimal degrees (wgs84)
feature class : see http://www.geonames.org/export/codes.html, char(1)
feature code : see http://www.geonames.org/export/codes.html, varchar(10)
country code : ISO-3166 2-letter country code, 2 characters
cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
admin3 code : code for third level administrative division, varchar(20)
admin4 code : code for fourth level administrative division, varchar(20)
population : bigint (8 byte int)
elevation : in meters, integer
gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
timezone : the timezone id (see file timeZone.txt)
modification date : date of last modification in yyyy-MM-dd format
*/
// use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
private final HashMap<Integer, Location> id2loc;
private final TreeMap<String, List<Integer>> name2ids;
private final File file;
public GeonamesLocalization(final File file) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
this.id2loc = new HashMap<Integer, Location>();
this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
if (file == null || !file.exists()) return;
BufferedReader reader;
try {
ZipFile zf = new ZipFile(file);
ZipEntry ze = zf.getEntry("cities1000.txt");
InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch (IOException e) {
Log.logException(e);
return;
}
// when an error occurs after this line, just accept it and work on
try {
String line;
String[] fields;
Set<String> locnames;
while ((line = reader.readLine()) != null) {
if (line.length() == 0) continue;
fields = line.split("\t");
int id = Integer.parseInt(fields[0]);
locnames = new HashSet<String>();
locnames.add(fields[1]);
locnames.add(fields[2]);
for (String s: fields[3].split(",")) locnames.add(s);
Location c = new Location(Double.parseDouble(fields[5]), Double.parseDouble(fields[4]), fields[1]);
this.id2loc.put(id, c);
for (String name: locnames) {
List<Integer> locs = this.name2ids.get(name);
if (locs == null) locs = new ArrayList<Integer>(1);
locs.add(id);
this.name2ids.put(name, locs);
}
}
} catch (IOException e) {
Log.logException(e);
}
}
public Set<Location> find(String anyname, boolean locationexact) {
HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c;
if (locationexact) {
c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
} else {
SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
}
}
HashSet<Location> a = new HashSet<Location>();
for (Integer e: r) {
Location w = this.id2loc.get(e);
if (w != null) a.add(w);
}
return a;
}
public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase();
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
if (r.startsWith(s)) a.add(r); else break;
}
return a;
}
public String nickname() {
return this.file.getName();
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
}

@ -0,0 +1,68 @@
/**
* Localization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.util.Set;
/**
* localization interface
* @author Michael Peter Christen
*
*/
public interface Localization {
/**
* find a location by name
* @param anyname - a name of a location
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
* @return a set of locations
*/
public Set<Location> find(String anyname, boolean locationexact);
/**
* recommend a set of names according to a given name
* @param s a possibly partially matching name
* @return a set of names that match with the given name using the local dictionary of names
*/
public Set<String> recommend(String s);
/**
* return an nickname of the localization service
* @return the nickname
*/
public String nickname();
/**
* hashCode that must be used to distinuguish localization services in hash sets
* @return the hash code, may be derived from the nickname
*/
public int hashCode();
/**
* compare localization services; to be used for hash sets with localization services
* @param other
* @return true if both objects are localization services and have the same nickname
*/
public boolean equals(Object other);
}

@ -1,28 +1,24 @@
// Coordinates.java /**
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * Location.java
// first published 08.10.2009 on http://yacy.net * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// * first published 08.10.2009 on http://yacy.net
// This is a part of YaCy *
// * This file is part of YaCy Content Integration
// $LastChangedDate$ *
// $LastChangedRevision$ * This program is free software: you can redistribute it and/or modify
// $LastChangedBy$ * it under the terms of the GNU Lesser General Public License as published by
// * the Free Software Foundation, either version 3 of the License, or
// LICENSE * (at your option) any later version.
// *
// This program is free software; you can redistribute it and/or modify * This program is distributed in the hope that it will be useful,
// it under the terms of the GNU General Public License as published by * but WITHOUT ANY WARRANTY; without even the implied warranty of
// the Free Software Foundation; either version 2 of the License, or * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// (at your option) any later version. * GNU Lesser General Public License for more details.
// *
// This program is distributed in the hope that it will be useful, * You should have received a copy of the GNU Lesser General Public License
// but WITHOUT ANY WARRANTY; without even the implied warranty of * along with this program in the file COPYING.LESSER.
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * If not, see <http://www.gnu.org/licenses/>.
// GNU General Public License for more details. */
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document.geolocalization; package net.yacy.document.geolocalization;
@ -49,4 +45,9 @@ public class Location extends Coordinates {
return this.name; return this.name;
} }
public boolean equals(Object loc) {
if (!(loc instanceof Location)) return false;
return super.equals(loc) && this.name.equals((Location) loc);
}
} }

@ -1,28 +1,24 @@
// OpenGeoDB.java /**
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * OpenGeoDBLocalization
// first published 04.10.2009 on http://yacy.net * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// * first published 04.10.2009 on http://yacy.net
// This is a part of YaCy *
// * This file is part of YaCy Content Integration
// $LastChangedDate$ *
// $LastChangedRevision$ * This program is free software: you can redistribute it and/or modify
// $LastChangedBy$ * it under the terms of the GNU Lesser General Public License as published by
// * the Free Software Foundation, either version 3 of the License, or
// LICENSE * (at your option) any later version.
// *
// This program is free software; you can redistribute it and/or modify * This program is distributed in the hope that it will be useful,
// it under the terms of the GNU General Public License as published by * but WITHOUT ANY WARRANTY; without even the implied warranty of
// the Free Software Foundation; either version 2 of the License, or * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// (at your option) any later version. * GNU Lesser General Public License for more details.
// *
// This program is distributed in the hope that it will be useful, * You should have received a copy of the GNU Lesser General Public License
// but WITHOUT ANY WARRANTY; without even the implied warranty of * along with this program in the file COPYING.LESSER.
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * If not, see <http://www.gnu.org/licenses/>.
// GNU General Public License for more details. */
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document.geolocalization; package net.yacy.document.geolocalization;
@ -59,9 +55,9 @@ import net.yacy.kelondro.logging.Log;
* This class will provide a super-fast access to the OpenGeoDB, * This class will provide a super-fast access to the OpenGeoDB,
* since all request are evaluated using data in the RAM. * since all request are evaluated using data in the RAM.
*/ */
public class OpenGeoDB { public class OpenGeoDBLocalization implements Localization {
// use a collator to relax when distinguishing between lowercase und uppercase letters // use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static { static {
insensitiveCollator.setStrength(Collator.SECONDARY); insensitiveCollator.setStrength(Collator.SECONDARY);
@ -71,17 +67,19 @@ public class OpenGeoDB {
private final HashMap<Integer, String> locTypeHash2locType; private final HashMap<Integer, String> locTypeHash2locType;
private final HashMap<Integer, Location> id2loc; private final HashMap<Integer, Location> id2loc;
private final HashMap<Integer, Integer> id2locTypeHash; private final HashMap<Integer, Integer> id2locTypeHash;
private final TreeMap<String, List<Integer>> locationName2ids; private final TreeMap<String, List<Integer>> name2ids;
private final TreeMap<String, List<Integer>> kfz2ids; private final TreeMap<String, List<Integer>> kfz2ids;
private final HashMap<String, List<Integer>> predial2ids; private final HashMap<String, List<Integer>> predial2ids;
private final HashMap<String, Integer> zip2id; private final HashMap<String, Integer> zip2id;
private final File file;
public OpenGeoDB(final File file, boolean lonlat) { public OpenGeoDBLocalization(final File file, boolean lonlat) {
this.file = file;
this.locTypeHash2locType = new HashMap<Integer, String>(); this.locTypeHash2locType = new HashMap<Integer, String>();
this.id2loc = new HashMap<Integer, Location>(); this.id2loc = new HashMap<Integer, Location>();
this.id2locTypeHash = new HashMap<Integer, Integer>(); this.id2locTypeHash = new HashMap<Integer, Integer>();
this.locationName2ids = new TreeMap<String, List<Integer>>(insensitiveCollator); this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
this.kfz2ids = new TreeMap<String, List<Integer>>(insensitiveCollator); this.kfz2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
this.predial2ids = new HashMap<String, List<Integer>>(); this.predial2ids = new HashMap<String, List<Integer>>();
this.zip2id = new HashMap<String, Integer>(); this.zip2id = new HashMap<String, Integer>();
@ -123,10 +121,10 @@ public class OpenGeoDB {
if (v[1].equals("500100000")) { // Ortsname if (v[1].equals("500100000")) { // Ortsname
id = Integer.parseInt(v[0]); id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]); h = removeQuotes(v[2]);
List<Integer> l = this.locationName2ids.get(h); List<Integer> l = this.name2ids.get(h);
if (l == null) l = new ArrayList<Integer>(1); if (l == null) l = new ArrayList<Integer>(1);
l.add(id); l.add(id);
this.locationName2ids.put(h, l); this.name2ids.put(h, l);
Location loc = this.id2loc.get(id); Location loc = this.id2loc.get(id);
if (loc != null) loc.setName(h); if (loc != null) loc.setName(h);
} else if (v[1].equals("500400000")) { // Vorwahl } else if (v[1].equals("500400000")) { // Vorwahl
@ -181,22 +179,20 @@ public class OpenGeoDB {
* @param anyname * @param anyname
* @return * @return
*/ */
public HashSet<Location> find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) { public HashSet<Location> find(String anyname, boolean locationexact) {
HashSet<Integer> r = new HashSet<Integer>(); HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c; List<Integer> c;
if (location) { if (locationexact) {
if (locationexact) { c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c); } else {
} else { SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
SortedMap<String, List<Integer>> cities = this.locationName2ids.tailMap(anyname); for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) { if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
}
} }
c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);
c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);
Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);
} }
if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);}
if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);}
if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);}
HashSet<Location> a = new HashSet<Location>(); HashSet<Location> a = new HashSet<Location>();
for (Integer e: r) { for (Integer e: r) {
Location w = this.id2loc.get(e); Location w = this.id2loc.get(e);
@ -213,10 +209,23 @@ public class OpenGeoDB {
public Set<String> recommend(String s) { public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>(); Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase(); s = s.trim().toLowerCase();
SortedMap<String, List<Integer>> t = this.locationName2ids.tailMap(s); SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) { for (String r: t.keySet()) {
if (r.startsWith(s)) a.add(r); else break; if (r.startsWith(s)) a.add(r); else break;
} }
return a; return a;
} }
public String nickname() {
return this.file.getName();
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
} }

@ -0,0 +1,93 @@
/**
* OverarchingLocalization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class OverarchingLocalization implements Localization {
private Map<String, Localization> services;
/**
* create a new overarching localization object
*/
public OverarchingLocalization() {
this.services = new HashMap<String, Localization>();
}
/**
* add a localization service
* @param nickname the nickname of the service
* @param service the service
*/
public void addLocalization(String nickname, Localization service) {
this.services.put(nickname, service);
}
/**
* remove a localization service
* @param nickname
*/
public void removeLocalization(String nickname) {
this.services.remove(nickname);
}
/**
* find (a set of) locations
*/
public Set<Location> find(String anyname, boolean locationexact) {
Set<Location> locations = new HashSet<Location>();
for (Localization service: this.services.values()) {
locations.addAll(service.find(anyname, locationexact));
}
return locations;
}
/**
* recommend location names
*/
public Set<String> recommend(String s) {
Set<String> recommendations = new HashSet<String>();
for (Localization service: this.services.values()) {
recommendations.addAll(service.recommend(s));
}
return recommendations;
}
public String nickname() {
return "oa";
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
}
Loading…
Cancel
Save