From 789c6b26ce0698bcad19f93f0acfe3e30017c6b6 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 11 May 2010 12:58:05 +0000 Subject: [PATCH] added a location search service: using the following servlet/example: http://localhost:8080/yacysearch_location.kml?query=berlin&maximumTime=2000&maximumRecords=100 This will open any application that can consume kml data (which will probably be google earth) on your computer and displays the search result as positions on a map git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6865 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearch.java | 2 +- htroot/yacysearch_location.java | 98 +++++++++++++++++++ htroot/yacysearch_location.kml | 20 ++++ htroot/yacysearch_location.xml | 20 ++++ htroot/yacysearchitem.java | 5 +- htroot/yacysearchitem.xml | 6 +- source/de/anomic/search/Switchboard.java | 20 ---- source/de/anomic/yacy/yacyClient.java | 3 +- .../net/yacy/document/content/RSSMessage.java | 4 + .../document/geolocalization/OpenGeoDB.java | 22 +++-- .../net/yacy/kelondro/util/DateFormatter.java | 13 ++- 11 files changed, 174 insertions(+), 39 deletions(-) create mode 100644 htroot/yacysearch_location.java create mode 100644 htroot/yacysearch_location.kml create mode 100644 htroot/yacysearch_location.xml diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 900e771a3..59b2393e8 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -560,7 +560,7 @@ public class yacysearch { } // find geographic info - Set coordinates = LibraryProvider.geoDB.find(originalquerystring, false); + Set coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true); if (coordinates == null || coordinates.isEmpty() || offset > 0) { prop.put("geoinfo", "0"); } else { diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java new file mode 100644 index 000000000..2dbb4358c --- /dev/null +++ b/htroot/yacysearch_location.java @@ -0,0 +1,98 @@ +// yacysearch_location.java +// ----------------------- +// (C) 2010 by Michael Peter Christen; mc@yacy.net +// first published 09.05.2010 in Frankfurt, Germany on http://yacy.net +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.BlockingQueue; + +import net.yacy.document.content.RSSMessage; +import net.yacy.document.geolocalization.Location; +import de.anomic.data.LibraryProvider; +import de.anomic.http.server.HeaderFramework; +import de.anomic.http.server.RequestHeader; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyClient; + +public class yacysearch_location { + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + //final Switchboard sb = (Switchboard) env; + final serverObjects prop = new serverObjects(); + + prop.put("kml", 0); + if (post == null) return prop; + + if (header.get(HeaderFramework.CONNECTION_PROP_EXT, "").equals("kml") || header.get(HeaderFramework.CONNECTION_PROP_EXT, "").equals("xml")) { + // generate a kml output page + prop.put("kml", 1); + String query = post.get("query", ""); + long maximumTime = post.getLong("maximumTime", 1000); + int maximumRecords = post.getInt("maximumRecords", 100); + //i.e. http://localhost:8080/yacysearch_location.kml?query=berlin&maximumTime=2000&maximumRecords=100 + + // get a queue of search results + BlockingQueue results = yacyClient.search(null, query, false, false, maximumTime, Integer.MAX_VALUE); + + // take the results and compute some locations + RSSMessage message; + int placemarkCounter = 0; + try { + loop: while ((message = results.take()) != RSSMessage.POISON) { + // find all associated locations + Set locations = new HashSet(); + String words = message.getTitle() + " " + message.getCopyright() + " " + message.getAuthor(); + String subject = ""; + for (String s: message.getSubject()) subject += " " + s; + words += subject; + for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false)); + + if (locations.size() > 0) { + String locnames = ""; + for (Location location: locations) locnames += ", " + location.getName(); + locnames = locnames.substring(2); + // write for all locations a point to this message + prop.put("kml_placemark_" + placemarkCounter + "_location", locnames); + prop.put("kml_placemark_" + placemarkCounter + "_name", message.getTitle()); + prop.put("kml_placemark_" + placemarkCounter + "_author", message.getAuthor()); + prop.put("kml_placemark_" + placemarkCounter + "_copyright", message.getCopyright()); + prop.put("kml_placemark_" + placemarkCounter + "_subject", subject.trim()); + prop.put("kml_placemark_" + placemarkCounter + "_description", message.getDescription()); + prop.put("kml_placemark_" + placemarkCounter + "_date", message.getPubDate()); + prop.put("kml_placemark_" + placemarkCounter + "_url", message.getLink()); + int pc = 0; + for (Location location: locations) { + prop.put("kml_placemark_" + placemarkCounter + "_point_" + pc + "_name", location.getName()); + prop.put("kml_placemark_" + placemarkCounter + "_point_" + pc + "_lon", location.lon()); + prop.put("kml_placemark_" + placemarkCounter + "_point_" + pc + "_lat", location.lat()); + pc++; + } + prop.put("kml_placemark_" + placemarkCounter + "_point", pc); + placemarkCounter++; + if (placemarkCounter >= maximumRecords) break loop; + } + } + prop.put("kml_placemark", placemarkCounter); + } catch (InterruptedException e) {} + } + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacysearch_location.kml b/htroot/yacysearch_location.kml new file mode 100644 index 000000000..a8272e530 --- /dev/null +++ b/htroot/yacysearch_location.kml @@ -0,0 +1,20 @@ +#(kml)#:: + + + #{placemark}# + + #[name]# + Location: #[location]#

Author: #[author]#

Publisher: #[copyright]#

Subject: #[subject]#

Abstract: #[description]#

Source: #[url]#

]]>
+ #{point}# + + #[name]# + #[lon]#,#[lat]# + + #{/point}# + + #[date]# + +
+ #{/placemark}# +
+
#(/kml)# \ No newline at end of file diff --git a/htroot/yacysearch_location.xml b/htroot/yacysearch_location.xml new file mode 100644 index 000000000..a8272e530 --- /dev/null +++ b/htroot/yacysearch_location.xml @@ -0,0 +1,20 @@ +#(kml)#:: + + + #{placemark}# + + #[name]# + Location: #[location]#

Author: #[author]#

Publisher: #[copyright]#

Subject: #[subject]#

Abstract: #[description]#

Source: #[url]#

]]>
+ #{point}# + + #[name]# + #[lon]#,#[lat]# + + #{/point}# + + #[date]# + +
+ #{/placemark}# +
+
#(/kml)# \ No newline at end of file diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 705398054..6cb81ecd4 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.TreeSet; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.Formatter; @@ -128,8 +129,8 @@ public class yacysearchitem { prop.put("content_urlhash", resulthashString); prop.put("content_urlhexhash", yacySeed.b64Hash2hexHash(resulthashString)); prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), urllength)); - prop.put("content_date", Switchboard.dateString(result.modified())); - prop.put("content_date822", Switchboard.dateString822(result.modified())); + prop.put("content_date", DateFormatter.formatRFC1123_short(result.modified())); + prop.put("content_date822", DateFormatter.formatRFC1123(result.modified())); //prop.put("content_ybr", RankingProcess.ybr(result.hash())); prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename' prop.putHTML("content_sizename", sizename(result.filesize())); diff --git a/htroot/yacysearchitem.xml b/htroot/yacysearchitem.xml index 28c868a7b..897983341 100644 --- a/htroot/yacysearchitem.xml +++ b/htroot/yacysearchitem.xml @@ -3,9 +3,9 @@ #[link]# #[description-xml]# #[date822]# -#[publisher]# -#[creator]# -#[subject]# + + + #[size]# #[sizename]# #[host]# diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 948432129..3778edbfc 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -52,14 +52,12 @@ import java.net.MalformedURLException; import java.security.NoSuchAlgorithmException; import java.security.PublicKey; import java.security.spec.InvalidKeySpecException; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.TreeMap; @@ -1911,24 +1909,6 @@ public final class Switchboard extends serverSwitch { } } - private static SimpleDateFormat DateFormat1 = new SimpleDateFormat("EEE, dd MMM yyyy", Locale.US); - public static String dateString(final Date date) { - if (date == null) return ""; - return DateFormat1.format(date); - } - - // we need locale independent RFC-822 dates at some places - private static SimpleDateFormat DateFormatter822 = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US); - public static String dateString822(final Date date) { - if (date == null) return ""; - try { - return DateFormatter822.format(date); - } catch (Exception e) { - Log.logException(e); - return DateFormatter822.format(new Date()); - } - } - public int adminAuthenticated(final RequestHeader requestHeader) { // authorization for localhost, only if flag is set to grant localhost access as admin diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 6dadc4f04..e62d6313b 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -517,7 +517,8 @@ public final class yacyClient { // send request try { - final byte[] result = wput(urlBase, uri.getHost(), post, (int) timeout); + final byte[] result = wput(urlBase, uri.getHost(), post, (int) timeout); + //String debug = new String(result); System.out.println("*** DEBUG: " + debug); final RSSReader reader = RSSReader.parse(result); if (reader == null) { yacyCore.log.logWarning("yacyClient.search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null"); diff --git a/source/net/yacy/document/content/RSSMessage.java b/source/net/yacy/document/content/RSSMessage.java index 63dcdc6a8..bf4e08859 100644 --- a/source/net/yacy/document/content/RSSMessage.java +++ b/source/net/yacy/document/content/RSSMessage.java @@ -161,4 +161,8 @@ public class RSSMessage { for (String s: map.values()) sb.append(s).append(" "); return sb.toString(); } + + public String toString() { + return this.map.toString(); + } } diff --git a/source/net/yacy/document/geolocalization/OpenGeoDB.java b/source/net/yacy/document/geolocalization/OpenGeoDB.java index 826a450e0..b091352f7 100644 --- a/source/net/yacy/document/geolocalization/OpenGeoDB.java +++ b/source/net/yacy/document/geolocalization/OpenGeoDB.java @@ -173,20 +173,22 @@ public class OpenGeoDB { * @param anyname * @return */ - public HashSet find(String anyname, boolean exact) { + public HashSet find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) { HashSet r = new HashSet(); List c; - if (exact) { - c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c); - } else { - SortedMap> cities = this.locationName2ids.tailMap(anyname); - for (Map.Entry> e: cities.entrySet()) { - if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break; + if (location) { + if (locationexact) { + c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c); + } else { + SortedMap> cities = this.locationName2ids.tailMap(anyname); + for (Map.Entry> e: cities.entrySet()) { + if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break; + } } } - c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c); - c = this.predial2ids.get(anyname); if (c != null) r.addAll(c); - Integer i = this.zip2id.get(anyname); if (i != null) r.add(i); + if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);} + if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);} + if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);} HashSet a = new HashSet(); for (Integer e: r) { Location w = this.id2loc.get(e); diff --git a/source/net/yacy/kelondro/util/DateFormatter.java b/source/net/yacy/kelondro/util/DateFormatter.java index 609c9a72b..946e7bb6d 100644 --- a/source/net/yacy/kelondro/util/DateFormatter.java +++ b/source/net/yacy/kelondro/util/DateFormatter.java @@ -45,6 +45,7 @@ public final class DateFormatter { /** default HTTP 1.1 header date format pattern */ public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss Z"; // with numeric time zone indicator as defined in RFC5322 + public static final String PATTERN_RFC1123_SHORT = "EEE, dd MMM yyyy"; /** date pattern used in older HTTP implementations */ public static final String PATTERN_ANSIC = "EEE MMM d HH:mm:ss yyyy"; @@ -74,7 +75,9 @@ public final class DateFormatter { private static final SimpleDateFormat FORMAT_RFC1123 = new SimpleDateFormat(PATTERN_RFC1123, Locale.US); private static final SimpleDateFormat FORMAT_RFC1036 = new SimpleDateFormat(PATTERN_RFC1036, Locale.US); private static final SimpleDateFormat FORMAT_ANSIC = new SimpleDateFormat(PATTERN_ANSIC, Locale.US); - + + private static final SimpleDateFormat FORMAT_RFC1123_SHORT = new SimpleDateFormat(PATTERN_RFC1123_SHORT, Locale.US); + /** * RFC 2616 requires that HTTP clients are able to parse all 3 different * formats. All times MUST be in GMT/UTC, but ... @@ -161,7 +164,13 @@ public final class DateFormatter { lastRFC1123string = s; return s; } - + + public static String formatRFC1123_short(final Date date) { + if (date == null) return ""; + return FORMAT_RFC1123_SHORT.format(date); + } + + /** * Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}. * This format (also specified in ISO8601) allows different "precisions".