From e43e61e502bcac185122aac65b2bf1a3955d8067 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Sat, 15 May 2010 23:49:30 +0000
Subject: [PATCH] added another geolocalization data source: GeoNames - added
downloader option in DictionaryLoader - added generalization (interfaces and
overarching localization) - more abstraction using the libraries
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6879 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/DictionaryLoader_p.html | 44 ++++-
htroot/DictionaryLoader_p.java | 67 +++++--
htroot/yacysearch.html | 2 +-
htroot/yacysearch.java | 2 +-
htroot/yacysearch_location.java | 2 +-
source/de/anomic/data/DidYouMean.java | 2 +-
source/de/anomic/data/LibraryProvider.java | 80 +++++----
source/de/anomic/yacy/yacyNewsQueue.java | 1 -
.../document/geolocalization/Coordinates.java | 46 +++--
.../geolocalization/GeonamesLocalization.java | 169 ++++++++++++++++++
.../geolocalization/Localization.java | 68 +++++++
.../document/geolocalization/Location.java | 51 +++---
...nGeoDB.java => OpenGeoDBLocalization.java} | 99 +++++-----
.../OverarchingLocalization.java | 93 ++++++++++
14 files changed, 575 insertions(+), 151 deletions(-)
create mode 100644 source/net/yacy/document/geolocalization/GeonamesLocalization.java
create mode 100644 source/net/yacy/document/geolocalization/Localization.java
rename source/net/yacy/document/geolocalization/{OpenGeoDB.java => OpenGeoDBLocalization.java} (72%)
create mode 100644 source/net/yacy/document/geolocalization/OverarchingLocalization.java
diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html
index 6cc04f465..684e3cddc 100644
--- a/htroot/DictionaryLoader_p.html
+++ b/htroot/DictionaryLoader_p.html
@@ -1,4 +1,4 @@
-
+1
YaCy '#[clientname]#': Dictionary Loader
@@ -18,8 +18,46 @@
- Geographic information provided by OpenGeoDB, Map provided by OpenStreetMap
+ Map (c) by OpenStreetMap and contributors, CC-BY-SA
#(/geoinfo)#
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 32db77d73..5f3ed1044 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -560,7 +560,7 @@ public class yacysearch {
}
// find geographic info
- Set coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true);
+ Set coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if (coordinates == null || coordinates.isEmpty() || offset > 0) {
prop.put("geoinfo", "0");
} else {
diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java
index 7e7736385..09e04ac5e 100644
--- a/htroot/yacysearch_location.java
+++ b/htroot/yacysearch_location.java
@@ -69,7 +69,7 @@ public class yacysearch_location {
String subject = "";
for (String s: message.getSubject()) subject += " " + s;
words += subject;
- for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false));
+ for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoLoc.find(word, true));
String locnames = "";
for (Location location: locations) locnames += ", " + location.getName();
diff --git a/source/de/anomic/data/DidYouMean.java b/source/de/anomic/data/DidYouMean.java
index f800e7536..4bc86050f 100644
--- a/source/de/anomic/data/DidYouMean.java
+++ b/source/de/anomic/data/DidYouMean.java
@@ -221,7 +221,7 @@ public class DidYouMean {
public void test(final String s) throws InterruptedException {
Set libr = LibraryProvider.dymLib.recommend(s);
- libr.addAll(LibraryProvider.geoDB.recommend(s));
+ libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) createGen = false;
for (final String t: libr) {
guessLib.put(t);
diff --git a/source/de/anomic/data/LibraryProvider.java b/source/de/anomic/data/LibraryProvider.java
index e2de6183f..e864f1970 100644
--- a/source/de/anomic/data/LibraryProvider.java
+++ b/source/de/anomic/data/LibraryProvider.java
@@ -1,28 +1,24 @@
-// LibraryProvider.java
-// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 01.10.2009 on http://yacy.net
-//
-// This is a part of YaCy
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+/**
+ * LibraryProvider.java
+ * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 01.10.2009 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
package de.anomic.data;
@@ -39,7 +35,9 @@ import java.util.List;
import java.util.Set;
import java.util.TreeSet;
-import net.yacy.document.geolocalization.OpenGeoDB;
+import net.yacy.document.geolocalization.GeonamesLocalization;
+import net.yacy.document.geolocalization.OpenGeoDBLocalization;
+import net.yacy.document.geolocalization.OverarchingLocalization;
import net.yacy.kelondro.logging.Log;
public class LibraryProvider {
@@ -50,17 +48,20 @@ public class LibraryProvider {
public static final String disabledExtension = ".disabled";
public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
- public static OpenGeoDB geoDB = new OpenGeoDB(null, true);
+ public static OverarchingLocalization geoLoc = new OverarchingLocalization();
private static File dictSource = null;
private static File dictRoot = null;
public static enum Dictionary {
- GEO0("geo0",
+ GEODB0("geo0",
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz",
"opengeodb-0.2.5a-UTF8-sql.gz"),
- GEO1("geo1",
+ GEODB1("geo1",
"http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz",
- "opengeodb-02621_2010-03-16.sql.gz");
+ "opengeodb-02621_2010-03-16.sql.gz"),
+ GEON0("geon0",
+ "http://download.geonames.org/export/dump/cities1000.zip",
+ "cities1000.zip");
public String nickname, url, filename;
private Dictionary(String nickname, String url, String filename) {
@@ -95,18 +96,27 @@ public class LibraryProvider {
integrateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
+ integrateGeonames();
}
public static void integrateOpenGeoDB() {
- File geo1 = Dictionary.GEO1.file();
- File geo0 = Dictionary.GEO0.file();
+ File geo1 = Dictionary.GEODB1.file();
+ File geo0 = Dictionary.GEODB0.file();
if (geo1.exists()) {
- if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled());
- geoDB = new OpenGeoDB(geo1, false);
+ if (geo0.exists()) geo0.renameTo(Dictionary.GEODB0.fileDisabled());
+ geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(geo1, false));
return;
}
if (geo0.exists()) {
- geoDB = new OpenGeoDB(geo0, true);
+ geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocalization(geo0, false));
+ return;
+ }
+ }
+
+ public static void integrateGeonames() {
+ File geon = Dictionary.GEON0.file();
+ if (geon.exists()) {
+ geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocalization(geon));
return;
}
}
diff --git a/source/de/anomic/yacy/yacyNewsQueue.java b/source/de/anomic/yacy/yacyNewsQueue.java
index 016adb3f1..32258f9af 100644
--- a/source/de/anomic/yacy/yacyNewsQueue.java
+++ b/source/de/anomic/yacy/yacyNewsQueue.java
@@ -53,7 +53,6 @@ import java.util.Iterator;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
-import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.table.Table;
diff --git a/source/net/yacy/document/geolocalization/Coordinates.java b/source/net/yacy/document/geolocalization/Coordinates.java
index abf17a886..e7cb7a2ee 100644
--- a/source/net/yacy/document/geolocalization/Coordinates.java
+++ b/source/net/yacy/document/geolocalization/Coordinates.java
@@ -1,28 +1,24 @@
-// Coordinates.java
-// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 04.10.2009 on http://yacy.net
-//
-// This is a part of YaCy
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+/**
+ * Coordinates.java
+ * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 04.10.2009 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
package net.yacy.document.geolocalization;
diff --git a/source/net/yacy/document/geolocalization/GeonamesLocalization.java b/source/net/yacy/document/geolocalization/GeonamesLocalization.java
new file mode 100644
index 000000000..3a9bd98de
--- /dev/null
+++ b/source/net/yacy/document/geolocalization/GeonamesLocalization.java
@@ -0,0 +1,169 @@
+/**
+ * GeonamesLocalization.java
+ * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 16.05.2010 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
+
+package net.yacy.document.geolocalization;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.text.Collator;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import net.yacy.kelondro.logging.Log;
+
+public class GeonamesLocalization implements Localization {
+
+ /*
+ The main 'geoname' table has the following fields :
+ ---------------------------------------------------
+ geonameid : integer id of record in geonames database
+ name : name of geographical point (utf8) varchar(200)
+ asciiname : name of geographical point in plain ascii characters, varchar(200)
+ alternatenames : alternatenames, comma separated varchar(5000)
+ latitude : latitude in decimal degrees (wgs84)
+ longitude : longitude in decimal degrees (wgs84)
+ feature class : see http://www.geonames.org/export/codes.html, char(1)
+ feature code : see http://www.geonames.org/export/codes.html, varchar(10)
+ country code : ISO-3166 2-letter country code, 2 characters
+ cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
+ admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
+ admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
+ admin3 code : code for third level administrative division, varchar(20)
+ admin4 code : code for fourth level administrative division, varchar(20)
+ population : bigint (8 byte int)
+ elevation : in meters, integer
+ gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
+ timezone : the timezone id (see file timeZone.txt)
+ modification date : date of last modification in yyyy-MM-dd format
+ */
+
+ // use a collator to relax when distinguishing between lowercase und uppercase letters
+ private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
+ static {
+ insensitiveCollator.setStrength(Collator.SECONDARY);
+ insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
+ }
+
+ private final HashMap id2loc;
+ private final TreeMap> name2ids;
+ private final File file;
+
+ public GeonamesLocalization(final File file) {
+ // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
+
+ this.file = file;
+ this.id2loc = new HashMap();
+ this.name2ids = new TreeMap>(insensitiveCollator);
+
+ if (file == null || !file.exists()) return;
+ BufferedReader reader;
+ try {
+ ZipFile zf = new ZipFile(file);
+ ZipEntry ze = zf.getEntry("cities1000.txt");
+ InputStream is = zf.getInputStream(ze);
+ reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ } catch (IOException e) {
+ Log.logException(e);
+ return;
+ }
+
+ // when an error occurs after this line, just accept it and work on
+ try {
+ String line;
+ String[] fields;
+ Set locnames;
+ while ((line = reader.readLine()) != null) {
+ if (line.length() == 0) continue;
+ fields = line.split("\t");
+ int id = Integer.parseInt(fields[0]);
+ locnames = new HashSet();
+ locnames.add(fields[1]);
+ locnames.add(fields[2]);
+ for (String s: fields[3].split(",")) locnames.add(s);
+ Location c = new Location(Double.parseDouble(fields[5]), Double.parseDouble(fields[4]), fields[1]);
+ this.id2loc.put(id, c);
+ for (String name: locnames) {
+ List locs = this.name2ids.get(name);
+ if (locs == null) locs = new ArrayList(1);
+ locs.add(id);
+ this.name2ids.put(name, locs);
+ }
+ }
+ } catch (IOException e) {
+ Log.logException(e);
+ }
+ }
+
+ public Set find(String anyname, boolean locationexact) {
+ HashSet r = new HashSet();
+ List c;
+ if (locationexact) {
+ c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
+ } else {
+ SortedMap> cities = this.name2ids.tailMap(anyname);
+ for (Map.Entry> e: cities.entrySet()) {
+ if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
+ }
+ }
+ HashSet a = new HashSet();
+ for (Integer e: r) {
+ Location w = this.id2loc.get(e);
+ if (w != null) a.add(w);
+ }
+ return a;
+ }
+
+ public Set recommend(String s) {
+ Set a = new HashSet();
+ s = s.trim().toLowerCase();
+ SortedMap> t = this.name2ids.tailMap(s);
+ for (String r: t.keySet()) {
+ if (r.startsWith(s)) a.add(r); else break;
+ }
+ return a;
+ }
+
+ public String nickname() {
+ return this.file.getName();
+ }
+
+ public int hashCode() {
+ return this.nickname().hashCode();
+ }
+
+ public boolean equals(Object other) {
+ if (!(other instanceof Localization)) return false;
+ return this.nickname().equals(((Localization) other).nickname());
+ }
+}
diff --git a/source/net/yacy/document/geolocalization/Localization.java b/source/net/yacy/document/geolocalization/Localization.java
new file mode 100644
index 000000000..9ac8aa0b1
--- /dev/null
+++ b/source/net/yacy/document/geolocalization/Localization.java
@@ -0,0 +1,68 @@
+/**
+ * Localization.java
+ * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 16.05.2010 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
+
+
+package net.yacy.document.geolocalization;
+
+import java.util.Set;
+
+/**
+ * localization interface
+ * @author Michael Peter Christen
+ *
+ */
+public interface Localization {
+
+ /**
+ * find a location by name
+ * @param anyname - a name of a location
+ * @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
+ * @return a set of locations
+ */
+ public Set find(String anyname, boolean locationexact);
+
+ /**
+ * recommend a set of names according to a given name
+ * @param s a possibly partially matching name
+ * @return a set of names that match with the given name using the local dictionary of names
+ */
+ public Set recommend(String s);
+
+ /**
+ * return an nickname of the localization service
+ * @return the nickname
+ */
+ public String nickname();
+
+ /**
+ * hashCode that must be used to distinuguish localization services in hash sets
+ * @return the hash code, may be derived from the nickname
+ */
+ public int hashCode();
+
+ /**
+ * compare localization services; to be used for hash sets with localization services
+ * @param other
+ * @return true if both objects are localization services and have the same nickname
+ */
+ public boolean equals(Object other);
+}
diff --git a/source/net/yacy/document/geolocalization/Location.java b/source/net/yacy/document/geolocalization/Location.java
index 52391066f..4fae94d32 100644
--- a/source/net/yacy/document/geolocalization/Location.java
+++ b/source/net/yacy/document/geolocalization/Location.java
@@ -1,28 +1,24 @@
-// Coordinates.java
-// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 08.10.2009 on http://yacy.net
-//
-// This is a part of YaCy
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+/**
+ * Location.java
+ * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 08.10.2009 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
package net.yacy.document.geolocalization;
@@ -49,4 +45,9 @@ public class Location extends Coordinates {
return this.name;
}
+ public boolean equals(Object loc) {
+ if (!(loc instanceof Location)) return false;
+ return super.equals(loc) && this.name.equals((Location) loc);
+ }
+
}
diff --git a/source/net/yacy/document/geolocalization/OpenGeoDB.java b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
similarity index 72%
rename from source/net/yacy/document/geolocalization/OpenGeoDB.java
rename to source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
index 3b156c614..51fb2ca2d 100644
--- a/source/net/yacy/document/geolocalization/OpenGeoDB.java
+++ b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
@@ -1,28 +1,24 @@
-// OpenGeoDB.java
-// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 04.10.2009 on http://yacy.net
-//
-// This is a part of YaCy
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+/**
+ * OpenGeoDBLocalization
+ * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 04.10.2009 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
package net.yacy.document.geolocalization;
@@ -59,9 +55,9 @@ import net.yacy.kelondro.logging.Log;
* This class will provide a super-fast access to the OpenGeoDB,
* since all request are evaluated using data in the RAM.
*/
-public class OpenGeoDB {
+public class OpenGeoDBLocalization implements Localization {
- // use a collator to relax when distinguishing between lowercase und uppercase letters
+ // use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
@@ -71,17 +67,19 @@ public class OpenGeoDB {
private final HashMap locTypeHash2locType;
private final HashMap id2loc;
private final HashMap id2locTypeHash;
- private final TreeMap> locationName2ids;
+ private final TreeMap> name2ids;
private final TreeMap> kfz2ids;
private final HashMap> predial2ids;
private final HashMap zip2id;
+ private final File file;
- public OpenGeoDB(final File file, boolean lonlat) {
+ public OpenGeoDBLocalization(final File file, boolean lonlat) {
+ this.file = file;
this.locTypeHash2locType = new HashMap();
this.id2loc = new HashMap();
this.id2locTypeHash = new HashMap();
- this.locationName2ids = new TreeMap>(insensitiveCollator);
+ this.name2ids = new TreeMap>(insensitiveCollator);
this.kfz2ids = new TreeMap>(insensitiveCollator);
this.predial2ids = new HashMap>();
this.zip2id = new HashMap();
@@ -123,10 +121,10 @@ public class OpenGeoDB {
if (v[1].equals("500100000")) { // Ortsname
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
- List l = this.locationName2ids.get(h);
+ List l = this.name2ids.get(h);
if (l == null) l = new ArrayList(1);
l.add(id);
- this.locationName2ids.put(h, l);
+ this.name2ids.put(h, l);
Location loc = this.id2loc.get(id);
if (loc != null) loc.setName(h);
} else if (v[1].equals("500400000")) { // Vorwahl
@@ -181,22 +179,20 @@ public class OpenGeoDB {
* @param anyname
* @return
*/
- public HashSet find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) {
+ public HashSet find(String anyname, boolean locationexact) {
HashSet r = new HashSet();
List c;
- if (location) {
- if (locationexact) {
- c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c);
- } else {
- SortedMap> cities = this.locationName2ids.tailMap(anyname);
- for (Map.Entry> e: cities.entrySet()) {
- if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
- }
+ if (locationexact) {
+ c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
+ } else {
+ SortedMap> cities = this.name2ids.tailMap(anyname);
+ for (Map.Entry> e: cities.entrySet()) {
+ if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
}
+ c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);
+ c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);
+ Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);
}
- if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);}
- if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);}
- if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);}
HashSet a = new HashSet();
for (Integer e: r) {
Location w = this.id2loc.get(e);
@@ -213,10 +209,23 @@ public class OpenGeoDB {
public Set recommend(String s) {
Set a = new HashSet();
s = s.trim().toLowerCase();
- SortedMap> t = this.locationName2ids.tailMap(s);
+ SortedMap> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
if (r.startsWith(s)) a.add(r); else break;
}
return a;
}
+
+ public String nickname() {
+ return this.file.getName();
+ }
+
+ public int hashCode() {
+ return this.nickname().hashCode();
+ }
+
+ public boolean equals(Object other) {
+ if (!(other instanceof Localization)) return false;
+ return this.nickname().equals(((Localization) other).nickname());
+ }
}
diff --git a/source/net/yacy/document/geolocalization/OverarchingLocalization.java b/source/net/yacy/document/geolocalization/OverarchingLocalization.java
new file mode 100644
index 000000000..ed5566b36
--- /dev/null
+++ b/source/net/yacy/document/geolocalization/OverarchingLocalization.java
@@ -0,0 +1,93 @@
+/**
+ * OverarchingLocalization.java
+ * Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 16.05.2010 on http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file COPYING.LESSER.
+ * If not, see .
+ */
+
+package net.yacy.document.geolocalization;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class OverarchingLocalization implements Localization {
+
+ private Map services;
+
+ /**
+ * create a new overarching localization object
+ */
+ public OverarchingLocalization() {
+ this.services = new HashMap();
+ }
+
+ /**
+ * add a localization service
+ * @param nickname the nickname of the service
+ * @param service the service
+ */
+ public void addLocalization(String nickname, Localization service) {
+ this.services.put(nickname, service);
+ }
+
+ /**
+ * remove a localization service
+ * @param nickname
+ */
+ public void removeLocalization(String nickname) {
+ this.services.remove(nickname);
+ }
+
+ /**
+ * find (a set of) locations
+ */
+ public Set find(String anyname, boolean locationexact) {
+ Set locations = new HashSet();
+ for (Localization service: this.services.values()) {
+ locations.addAll(service.find(anyname, locationexact));
+ }
+ return locations;
+ }
+
+ /**
+ * recommend location names
+ */
+ public Set recommend(String s) {
+ Set recommendations = new HashSet();
+ for (Localization service: this.services.values()) {
+ recommendations.addAll(service.recommend(s));
+ }
+ return recommendations;
+ }
+
+ public String nickname() {
+ return "oa";
+ }
+
+ public int hashCode() {
+ return this.nickname().hashCode();
+ }
+
+ public boolean equals(Object other) {
+ if (!(other instanceof Localization)) return false;
+ return this.nickname().equals(((Localization) other).nickname());
+ }
+
+}