From e43e61e502bcac185122aac65b2bf1a3955d8067 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 15 May 2010 23:49:30 +0000 Subject: [PATCH] added another geolocalization data source: GeoNames - added downloader option in DictionaryLoader - added generalization (interfaces and overarching localization) - more abstraction using the libraries git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6879 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/DictionaryLoader_p.html | 44 ++++- htroot/DictionaryLoader_p.java | 67 +++++-- htroot/yacysearch.html | 2 +- htroot/yacysearch.java | 2 +- htroot/yacysearch_location.java | 2 +- source/de/anomic/data/DidYouMean.java | 2 +- source/de/anomic/data/LibraryProvider.java | 80 +++++---- source/de/anomic/yacy/yacyNewsQueue.java | 1 - .../document/geolocalization/Coordinates.java | 46 +++-- .../geolocalization/GeonamesLocalization.java | 169 ++++++++++++++++++ .../geolocalization/Localization.java | 68 +++++++ .../document/geolocalization/Location.java | 51 +++--- ...nGeoDB.java => OpenGeoDBLocalization.java} | 99 +++++----- .../OverarchingLocalization.java | 93 ++++++++++ 14 files changed, 575 insertions(+), 151 deletions(-) create mode 100644 source/net/yacy/document/geolocalization/GeonamesLocalization.java create mode 100644 source/net/yacy/document/geolocalization/Localization.java rename source/net/yacy/document/geolocalization/{OpenGeoDB.java => OpenGeoDBLocalization.java} (72%) create mode 100644 source/net/yacy/document/geolocalization/OverarchingLocalization.java diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html index 6cc04f465..684e3cddc 100644 --- a/htroot/DictionaryLoader_p.html +++ b/htroot/DictionaryLoader_p.html @@ -1,4 +1,4 @@ - +1 YaCy '#[clientname]#': Dictionary Loader @@ -18,8 +18,46 @@
Geolocalization - The geolocalization file will enable YaCy to present locations from OpenStreetMap according to given search words. - With this file it is possible to find locations using the location (city) name, a zip code, a car sign or a telephone pre-dial number. + Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words. + +

GeoNames

+

With this file it is possible to find cities with a population > 1000 all over the world.

+ +
+
+
#[geon0URL]#
+
+
#[geon0Storage]#
+
+
#(geon0Status)#
not loaded
::
loaded
::de-activated#(/geon0Status)#
+
+
#(geon0Status)# + :: + + :: + + + #(/geon0Status)#
+ #(geon0ActionLoaded)#:: +
loaded and activated dictionary file
:: +
loading of dictionary file failed: #[error]#
+ #(/geon0ActionLoaded)# + #(geon0ActionRemoved)#:: +
de-activated and removed dictionary file
:: +
cannot remove dictionary file: #[error]#
+ #(/geon0ActionRemoved)# + #(geon0ActionDeactivated)#:: +
de-activated dictionary file
:: +
cannot de-activate dictionary file: #[error]#
+ #(/geon0ActionDeactivated)# + #(geon0ActionActivated)#:: +
activated dictionary file
:: +
cannot activate dictionary file: #[error]#
+ #(/geon0ActionActivated)# +
+ +

OpenGeoDB

+

With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.

diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index c104c2b26..787570a9d 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -21,7 +21,8 @@ import java.io.IOException; import java.net.MalformedURLException; -import net.yacy.document.geolocalization.OpenGeoDB; +import net.yacy.document.geolocalization.GeonamesLocalization; +import net.yacy.document.geolocalization.OpenGeoDBLocalization; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; @@ -58,15 +59,56 @@ public class DictionaryLoader_p { if (post == null) return prop; + // GEON0 + if (post.containsKey("geon0Load")) { + // load from the net + try { + Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE); + byte[] b = response.getContent(); + FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); + LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file())); + prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); + prop.put("geon0ActionLoaded", 1); + } catch (MalformedURLException e) { + Log.logException(e); + prop.put("geon0ActionLoaded", 2); + prop.put("geon0ActionLoaded_error", e.getMessage()); + } catch (IOException e) { + Log.logException(e); + prop.put("geon0ActionLoaded", 2); + prop.put("geon0ActionLoaded_error", e.getMessage()); + } + } + + if (post.containsKey("geon0Remove")) { + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled()); + LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + prop.put("geon0ActionRemoved", 1); + } + + if (post.containsKey("geon0Deactivate")) { + LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled()); + LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + prop.put("geon0ActionDeactivated", 1); + } + + if (post.containsKey("geon0Activate")) { + LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); + LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file())); + prop.put("geon0ActionActivated", 1); + } + // GEO1 if (post.containsKey("geo1Load")) { // load from the net try { - Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE); + Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE); byte[] b = response.getContent(); - FileUtils.copy(b, LibraryProvider.Dictionary.GEO1.file()); - LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false); - prop.put("geo1Status", LibraryProvider.Dictionary.GEO1.file().exists() ? 1 : 0); + FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); + LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname); + LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false)); + prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); prop.put("geo1ActionLoaded", 1); } catch (MalformedURLException e) { Log.logException(e); @@ -80,25 +122,24 @@ public class DictionaryLoader_p { } if (post.containsKey("geo1Remove")) { - FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.file()); - FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.fileDisabled()); - LibraryProvider.geoDB = new OpenGeoDB(null, true); + FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled()); + LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionRemoved", 1); } if (post.containsKey("geo1Deactivate")) { - LibraryProvider.Dictionary.GEO1.file().renameTo(LibraryProvider.Dictionary.GEO1.fileDisabled()); - LibraryProvider.geoDB = new OpenGeoDB(null, true); + LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled()); + LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionDeactivated", 1); } if (post.containsKey("geo1Activate")) { - LibraryProvider.Dictionary.GEO1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO1.file()); - LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false); + LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file()); + LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false)); prop.put("geo1ActionActivated", 1); } - // check status again for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 527c8f363..ad628f9a1 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -151,7 +151,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results" #{/loc}#

-

Geographic information provided by OpenGeoDB, Map provided by OpenStreetMap

+

Map (c) by OpenStreetMap and contributors, CC-BY-SA

#(/geoinfo)# diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 32db77d73..5f3ed1044 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -560,7 +560,7 @@ public class yacysearch { } // find geographic info - Set coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true); + Set coordinates = LibraryProvider.geoLoc.find(originalquerystring, false); if (coordinates == null || coordinates.isEmpty() || offset > 0) { prop.put("geoinfo", "0"); } else { diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index 7e7736385..09e04ac5e 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -69,7 +69,7 @@ public class yacysearch_location { String subject = ""; for (String s: message.getSubject()) subject += " " + s; words += subject; - for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false)); + for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoLoc.find(word, true)); String locnames = ""; for (Location location: locations) locnames += ", " + location.getName(); diff --git a/source/de/anomic/data/DidYouMean.java b/source/de/anomic/data/DidYouMean.java index f800e7536..4bc86050f 100644 --- a/source/de/anomic/data/DidYouMean.java +++ b/source/de/anomic/data/DidYouMean.java @@ -221,7 +221,7 @@ public class DidYouMean { public void test(final String s) throws InterruptedException { Set libr = LibraryProvider.dymLib.recommend(s); - libr.addAll(LibraryProvider.geoDB.recommend(s)); + libr.addAll(LibraryProvider.geoLoc.recommend(s)); if (!libr.isEmpty()) createGen = false; for (final String t: libr) { guessLib.put(t); diff --git a/source/de/anomic/data/LibraryProvider.java b/source/de/anomic/data/LibraryProvider.java index e2de6183f..e864f1970 100644 --- a/source/de/anomic/data/LibraryProvider.java +++ b/source/de/anomic/data/LibraryProvider.java @@ -1,28 +1,24 @@ -// LibraryProvider.java -// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 01.10.2009 on http://yacy.net -// -// This is a part of YaCy -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +/** + * LibraryProvider.java + * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 01.10.2009 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ package de.anomic.data; @@ -39,7 +35,9 @@ import java.util.List; import java.util.Set; import java.util.TreeSet; -import net.yacy.document.geolocalization.OpenGeoDB; +import net.yacy.document.geolocalization.GeonamesLocalization; +import net.yacy.document.geolocalization.OpenGeoDBLocalization; +import net.yacy.document.geolocalization.OverarchingLocalization; import net.yacy.kelondro.logging.Log; public class LibraryProvider { @@ -50,17 +48,20 @@ public class LibraryProvider { public static final String disabledExtension = ".disabled"; public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null); - public static OpenGeoDB geoDB = new OpenGeoDB(null, true); + public static OverarchingLocalization geoLoc = new OverarchingLocalization(); private static File dictSource = null; private static File dictRoot = null; public static enum Dictionary { - GEO0("geo0", + GEODB0("geo0", "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz", "opengeodb-0.2.5a-UTF8-sql.gz"), - GEO1("geo1", + GEODB1("geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz", - "opengeodb-02621_2010-03-16.sql.gz"); + "opengeodb-02621_2010-03-16.sql.gz"), + GEON0("geon0", + "http://download.geonames.org/export/dump/cities1000.zip", + "cities1000.zip"); public String nickname, url, filename; private Dictionary(String nickname, String url, String filename) { @@ -95,18 +96,27 @@ public class LibraryProvider { integrateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); + integrateGeonames(); } public static void integrateOpenGeoDB() { - File geo1 = Dictionary.GEO1.file(); - File geo0 = Dictionary.GEO0.file(); + File geo1 = Dictionary.GEODB1.file(); + File geo0 = Dictionary.GEODB0.file(); if (geo1.exists()) { - if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled()); - geoDB = new OpenGeoDB(geo1, false); + if (geo0.exists()) geo0.renameTo(Dictionary.GEODB0.fileDisabled()); + geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(geo1, false)); return; } if (geo0.exists()) { - geoDB = new OpenGeoDB(geo0, true); + geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocalization(geo0, false)); + return; + } + } + + public static void integrateGeonames() { + File geon = Dictionary.GEON0.file(); + if (geon.exists()) { + geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocalization(geon)); return; } } diff --git a/source/de/anomic/yacy/yacyNewsQueue.java b/source/de/anomic/yacy/yacyNewsQueue.java index 016adb3f1..32258f9af 100644 --- a/source/de/anomic/yacy/yacyNewsQueue.java +++ b/source/de/anomic/yacy/yacyNewsQueue.java @@ -53,7 +53,6 @@ import java.util.Iterator; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.RowSpaceExceededException; -import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.table.Table; diff --git a/source/net/yacy/document/geolocalization/Coordinates.java b/source/net/yacy/document/geolocalization/Coordinates.java index abf17a886..e7cb7a2ee 100644 --- a/source/net/yacy/document/geolocalization/Coordinates.java +++ b/source/net/yacy/document/geolocalization/Coordinates.java @@ -1,28 +1,24 @@ -// Coordinates.java -// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 04.10.2009 on http://yacy.net -// -// This is a part of YaCy -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +/** + * Coordinates.java + * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 04.10.2009 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ package net.yacy.document.geolocalization; diff --git a/source/net/yacy/document/geolocalization/GeonamesLocalization.java b/source/net/yacy/document/geolocalization/GeonamesLocalization.java new file mode 100644 index 000000000..3a9bd98de --- /dev/null +++ b/source/net/yacy/document/geolocalization/GeonamesLocalization.java @@ -0,0 +1,169 @@ +/** + * GeonamesLocalization.java + * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 16.05.2010 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ + +package net.yacy.document.geolocalization; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.text.Collator; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import net.yacy.kelondro.logging.Log; + +public class GeonamesLocalization implements Localization { + + /* + The main 'geoname' table has the following fields : + --------------------------------------------------- + geonameid : integer id of record in geonames database + name : name of geographical point (utf8) varchar(200) + asciiname : name of geographical point in plain ascii characters, varchar(200) + alternatenames : alternatenames, comma separated varchar(5000) + latitude : latitude in decimal degrees (wgs84) + longitude : longitude in decimal degrees (wgs84) + feature class : see http://www.geonames.org/export/codes.html, char(1) + feature code : see http://www.geonames.org/export/codes.html, varchar(10) + country code : ISO-3166 2-letter country code, 2 characters + cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters + admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20) + admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80) + admin3 code : code for third level administrative division, varchar(20) + admin4 code : code for fourth level administrative division, varchar(20) + population : bigint (8 byte int) + elevation : in meters, integer + gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer + timezone : the timezone id (see file timeZone.txt) + modification date : date of last modification in yyyy-MM-dd format + */ + + // use a collator to relax when distinguishing between lowercase und uppercase letters + private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); + static { + insensitiveCollator.setStrength(Collator.SECONDARY); + insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION); + } + + private final HashMap id2loc; + private final TreeMap> name2ids; + private final File file; + + public GeonamesLocalization(final File file) { + // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/ + + this.file = file; + this.id2loc = new HashMap(); + this.name2ids = new TreeMap>(insensitiveCollator); + + if (file == null || !file.exists()) return; + BufferedReader reader; + try { + ZipFile zf = new ZipFile(file); + ZipEntry ze = zf.getEntry("cities1000.txt"); + InputStream is = zf.getInputStream(ze); + reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + } catch (IOException e) { + Log.logException(e); + return; + } + + // when an error occurs after this line, just accept it and work on + try { + String line; + String[] fields; + Set locnames; + while ((line = reader.readLine()) != null) { + if (line.length() == 0) continue; + fields = line.split("\t"); + int id = Integer.parseInt(fields[0]); + locnames = new HashSet(); + locnames.add(fields[1]); + locnames.add(fields[2]); + for (String s: fields[3].split(",")) locnames.add(s); + Location c = new Location(Double.parseDouble(fields[5]), Double.parseDouble(fields[4]), fields[1]); + this.id2loc.put(id, c); + for (String name: locnames) { + List locs = this.name2ids.get(name); + if (locs == null) locs = new ArrayList(1); + locs.add(id); + this.name2ids.put(name, locs); + } + } + } catch (IOException e) { + Log.logException(e); + } + } + + public Set find(String anyname, boolean locationexact) { + HashSet r = new HashSet(); + List c; + if (locationexact) { + c = this.name2ids.get(anyname); if (c != null) r.addAll(c); + } else { + SortedMap> cities = this.name2ids.tailMap(anyname); + for (Map.Entry> e: cities.entrySet()) { + if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break; + } + } + HashSet a = new HashSet(); + for (Integer e: r) { + Location w = this.id2loc.get(e); + if (w != null) a.add(w); + } + return a; + } + + public Set recommend(String s) { + Set a = new HashSet(); + s = s.trim().toLowerCase(); + SortedMap> t = this.name2ids.tailMap(s); + for (String r: t.keySet()) { + if (r.startsWith(s)) a.add(r); else break; + } + return a; + } + + public String nickname() { + return this.file.getName(); + } + + public int hashCode() { + return this.nickname().hashCode(); + } + + public boolean equals(Object other) { + if (!(other instanceof Localization)) return false; + return this.nickname().equals(((Localization) other).nickname()); + } +} diff --git a/source/net/yacy/document/geolocalization/Localization.java b/source/net/yacy/document/geolocalization/Localization.java new file mode 100644 index 000000000..9ac8aa0b1 --- /dev/null +++ b/source/net/yacy/document/geolocalization/Localization.java @@ -0,0 +1,68 @@ +/** + * Localization.java + * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 16.05.2010 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ + + +package net.yacy.document.geolocalization; + +import java.util.Set; + +/** + * localization interface + * @author Michael Peter Christen + * + */ +public interface Localization { + + /** + * find a location by name + * @param anyname - a name of a location + * @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names + * @return a set of locations + */ + public Set find(String anyname, boolean locationexact); + + /** + * recommend a set of names according to a given name + * @param s a possibly partially matching name + * @return a set of names that match with the given name using the local dictionary of names + */ + public Set recommend(String s); + + /** + * return an nickname of the localization service + * @return the nickname + */ + public String nickname(); + + /** + * hashCode that must be used to distinuguish localization services in hash sets + * @return the hash code, may be derived from the nickname + */ + public int hashCode(); + + /** + * compare localization services; to be used for hash sets with localization services + * @param other + * @return true if both objects are localization services and have the same nickname + */ + public boolean equals(Object other); +} diff --git a/source/net/yacy/document/geolocalization/Location.java b/source/net/yacy/document/geolocalization/Location.java index 52391066f..4fae94d32 100644 --- a/source/net/yacy/document/geolocalization/Location.java +++ b/source/net/yacy/document/geolocalization/Location.java @@ -1,28 +1,24 @@ -// Coordinates.java -// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 08.10.2009 on http://yacy.net -// -// This is a part of YaCy -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +/** + * Location.java + * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 08.10.2009 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ package net.yacy.document.geolocalization; @@ -49,4 +45,9 @@ public class Location extends Coordinates { return this.name; } + public boolean equals(Object loc) { + if (!(loc instanceof Location)) return false; + return super.equals(loc) && this.name.equals((Location) loc); + } + } diff --git a/source/net/yacy/document/geolocalization/OpenGeoDB.java b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java similarity index 72% rename from source/net/yacy/document/geolocalization/OpenGeoDB.java rename to source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java index 3b156c614..51fb2ca2d 100644 --- a/source/net/yacy/document/geolocalization/OpenGeoDB.java +++ b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java @@ -1,28 +1,24 @@ -// OpenGeoDB.java -// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 04.10.2009 on http://yacy.net -// -// This is a part of YaCy -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +/** + * OpenGeoDBLocalization + * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 04.10.2009 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ package net.yacy.document.geolocalization; @@ -59,9 +55,9 @@ import net.yacy.kelondro.logging.Log; * This class will provide a super-fast access to the OpenGeoDB, * since all request are evaluated using data in the RAM. */ -public class OpenGeoDB { +public class OpenGeoDBLocalization implements Localization { - // use a collator to relax when distinguishing between lowercase und uppercase letters + // use a collator to relax when distinguishing between lowercase und uppercase letters private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); static { insensitiveCollator.setStrength(Collator.SECONDARY); @@ -71,17 +67,19 @@ public class OpenGeoDB { private final HashMap locTypeHash2locType; private final HashMap id2loc; private final HashMap id2locTypeHash; - private final TreeMap> locationName2ids; + private final TreeMap> name2ids; private final TreeMap> kfz2ids; private final HashMap> predial2ids; private final HashMap zip2id; + private final File file; - public OpenGeoDB(final File file, boolean lonlat) { + public OpenGeoDBLocalization(final File file, boolean lonlat) { + this.file = file; this.locTypeHash2locType = new HashMap(); this.id2loc = new HashMap(); this.id2locTypeHash = new HashMap(); - this.locationName2ids = new TreeMap>(insensitiveCollator); + this.name2ids = new TreeMap>(insensitiveCollator); this.kfz2ids = new TreeMap>(insensitiveCollator); this.predial2ids = new HashMap>(); this.zip2id = new HashMap(); @@ -123,10 +121,10 @@ public class OpenGeoDB { if (v[1].equals("500100000")) { // Ortsname id = Integer.parseInt(v[0]); h = removeQuotes(v[2]); - List l = this.locationName2ids.get(h); + List l = this.name2ids.get(h); if (l == null) l = new ArrayList(1); l.add(id); - this.locationName2ids.put(h, l); + this.name2ids.put(h, l); Location loc = this.id2loc.get(id); if (loc != null) loc.setName(h); } else if (v[1].equals("500400000")) { // Vorwahl @@ -181,22 +179,20 @@ public class OpenGeoDB { * @param anyname * @return */ - public HashSet find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) { + public HashSet find(String anyname, boolean locationexact) { HashSet r = new HashSet(); List c; - if (location) { - if (locationexact) { - c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c); - } else { - SortedMap> cities = this.locationName2ids.tailMap(anyname); - for (Map.Entry> e: cities.entrySet()) { - if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break; - } + if (locationexact) { + c = this.name2ids.get(anyname); if (c != null) r.addAll(c); + } else { + SortedMap> cities = this.name2ids.tailMap(anyname); + for (Map.Entry> e: cities.entrySet()) { + if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break; } + c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c); + c = this.predial2ids.get(anyname); if (c != null) r.addAll(c); + Integer i = this.zip2id.get(anyname); if (i != null) r.add(i); } - if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);} - if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);} - if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);} HashSet a = new HashSet(); for (Integer e: r) { Location w = this.id2loc.get(e); @@ -213,10 +209,23 @@ public class OpenGeoDB { public Set recommend(String s) { Set a = new HashSet(); s = s.trim().toLowerCase(); - SortedMap> t = this.locationName2ids.tailMap(s); + SortedMap> t = this.name2ids.tailMap(s); for (String r: t.keySet()) { if (r.startsWith(s)) a.add(r); else break; } return a; } + + public String nickname() { + return this.file.getName(); + } + + public int hashCode() { + return this.nickname().hashCode(); + } + + public boolean equals(Object other) { + if (!(other instanceof Localization)) return false; + return this.nickname().equals(((Localization) other).nickname()); + } } diff --git a/source/net/yacy/document/geolocalization/OverarchingLocalization.java b/source/net/yacy/document/geolocalization/OverarchingLocalization.java new file mode 100644 index 000000000..ed5566b36 --- /dev/null +++ b/source/net/yacy/document/geolocalization/OverarchingLocalization.java @@ -0,0 +1,93 @@ +/** + * OverarchingLocalization.java + * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * first published 16.05.2010 on http://yacy.net + * + * This file is part of YaCy Content Integration + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file COPYING.LESSER. + * If not, see . + */ + +package net.yacy.document.geolocalization; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class OverarchingLocalization implements Localization { + + private Map services; + + /** + * create a new overarching localization object + */ + public OverarchingLocalization() { + this.services = new HashMap(); + } + + /** + * add a localization service + * @param nickname the nickname of the service + * @param service the service + */ + public void addLocalization(String nickname, Localization service) { + this.services.put(nickname, service); + } + + /** + * remove a localization service + * @param nickname + */ + public void removeLocalization(String nickname) { + this.services.remove(nickname); + } + + /** + * find (a set of) locations + */ + public Set find(String anyname, boolean locationexact) { + Set locations = new HashSet(); + for (Localization service: this.services.values()) { + locations.addAll(service.find(anyname, locationexact)); + } + return locations; + } + + /** + * recommend location names + */ + public Set recommend(String s) { + Set recommendations = new HashSet(); + for (Localization service: this.services.values()) { + recommendations.addAll(service.recommend(s)); + } + return recommendations; + } + + public String nickname() { + return "oa"; + } + + public int hashCode() { + return this.nickname().hashCode(); + } + + public boolean equals(Object other) { + if (!(other instanceof Localization)) return false; + return this.nickname().equals(((Localization) other).nickname()); + } + +}