diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss index 1a4f0132d..6c3ba5147 100644 --- a/htroot/yacysearch.rss +++ b/htroot/yacysearch.rss @@ -6,6 +6,7 @@ xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" > diff --git a/htroot/yacysearch_location.html b/htroot/yacysearch_location.html index 202f26b6e..4734db426 100644 --- a/htroot/yacysearch_location.html +++ b/htroot/yacysearch_location.html @@ -9,11 +9,11 @@ var map; var searchLayer_md = null; var searchLayer_co = null; - var path_mdsearch = 'yacysearch_location.rss?dom=title,publisher,creator,subject&query='; + var path_mdsearch = 'yacysearch_location.rss?dom=metatag&query='; var path_cosearch = 'yacysearch_location.rss?dom=query&query='; var marker_md = new OpenLayers.Icon("/env/grafics/marker_red.png", new OpenLayers.Size(11,16)); var marker_co = new OpenLayers.Icon("/env/grafics/star_yellow.png", new OpenLayers.Size(25,25)); - // possible values for dom: query,mdall,title,publisher,creator,subject + // possible values for dom: query,metatag,alltext,title,publisher,creator,subject function init() { map = new OpenLayers.Map('map', { diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index e569c2add..e92d37edb 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -55,13 +55,13 @@ public class yacysearch_location { prop.put("kml", 1); if (post == null) return prop; String query = post.get("query", ""); - boolean search_all = !post.containsKey("dom") || post.get("dom", "").equals("all"); - boolean search_query = search_all || post.get("dom", "").indexOf("query") >= 0; - boolean search_mdall = search_all || post.get("dom", "").indexOf("mdall") >= 0; - boolean search_title = search_mdall || post.get("dom", "").indexOf("title") >= 0; - boolean search_publisher = search_mdall || post.get("dom", "").indexOf("publisher") >= 0; - boolean search_creator = search_mdall || post.get("dom", "").indexOf("creator") >= 0; - boolean search_subject = search_mdall || post.get("dom", "").indexOf("subject") >= 0; + boolean search_query = post.get("dom", "").indexOf("query") >= 0; + boolean metatag = post.get("dom", "").indexOf("metatag") >= 0; + boolean alltext = post.get("dom", "").indexOf("alltext") >= 0; + boolean search_title = alltext || post.get("dom", "").indexOf("title") >= 0; + boolean search_publisher = alltext || post.get("dom", "").indexOf("publisher") >= 0; + boolean search_creator = alltext || post.get("dom", "").indexOf("creator") >= 0; + boolean search_subject = alltext || post.get("dom", "").indexOf("subject") >= 0; long maximumTime = post.getLong("maximumTime", 3000); int maximumRecords = post.getInt("maximumRecords", 200); //i.e. http://localhost:8090/yacysearch_location.kml?query=berlin&maximumTime=2000&maximumRecords=100 @@ -89,7 +89,7 @@ public class yacysearch_location { } } - if (search_title || search_publisher || search_creator || search_subject) try { + if (metatag || search_title || search_publisher || search_creator || search_subject) try { // get a queue of search results String rssSearchServiceURL = "http://127.0.0.1:" + sb.getConfig("port", "8090") + "/yacysearch.rss"; BlockingQueue results = new LinkedBlockingQueue(); @@ -98,6 +98,7 @@ public class yacysearch_location { // take the results and compute some locations RSSMessage message; loop: while ((message = results.poll(maximumTime, TimeUnit.MILLISECONDS)) != RSSMessage.POISON) { + // find all associated locations Set locations = new HashSet(); StringBuilder words = new StringBuilder(120); @@ -112,6 +113,13 @@ public class yacysearch_location { for (int i = 0; i < wordlist.length - 1; i++) locations.addAll(LibraryProvider.geoLoc.find(wordlist[i] + space + wordlist[i + 1], true)); for (int i = 0; i < wordlist.length - 2; i++) locations.addAll(LibraryProvider.geoLoc.find(wordlist[i] + space + wordlist[i + 1] + space + wordlist[i + 2], true)); + // add locations from metatag + if (metatag) { + if (message.getLat() != 0.0f && message.getLon() != 0.0f) { + locations.add(new Location(message.getLon(), message.getLat(), message.getTitle().trim())); + } + } + for (Location location: locations) { // write for all locations a point to this message prop.put("kml_placemark_" + placemarkCounter + "_location", location.getName()); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 0c29a1c7d..2111fcde3 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -193,6 +193,13 @@ public class yacysearchitem { } else { prop.put("content_code", ""); } + if (result.lat() == 0.0f || result.lon() == 0.0f) { + prop.put("content_loc", 0); + } else { + prop.put("content_loc", 1); + prop.put("content_loc_lat", result.lat()); + prop.put("content_loc_lon", result.lon()); + } theQuery.transmitcount = item + 1; return prop; } diff --git a/htroot/yacysearchitem.xml b/htroot/yacysearchitem.xml index 897983341..5fc71a6a2 100644 --- a/htroot/yacysearchitem.xml +++ b/htroot/yacysearchitem.xml @@ -12,6 +12,7 @@ #[path]# #[file]# #[urlhash]# +#(loc)#::#[lat]##[lon]##(/loc)# :: #(item)#:: #[name]# diff --git a/source/de/anomic/search/ResultEntry.java b/source/de/anomic/search/ResultEntry.java index a0c764506..565a52d72 100644 --- a/source/de/anomic/search/ResultEntry.java +++ b/source/de/anomic/search/ResultEntry.java @@ -168,6 +168,12 @@ public class ResultEntry implements Comparable, Comparator linkTags0 = new HashSet(9,0.99f); private static final Set linkTags1 = new HashSet(7,0.99f); @@ -121,6 +124,55 @@ public class ContentScraper extends AbstractScraper implements Scraper { public void scrapeText(final char[] newtext, final String insideTag) { // System.out.println("SCRAPE: " + UTF8.String(newtext)); + int p, q, s = 0; + + // try to find location information in text + location: while (s < newtext.length) { + p = CharBuffer.indexOf(newtext, s, degree); + if (p < 0) break location; + // try to find a coordinate + // N 50o 05.453'E 008o 30.191' + // N 52o 28.025 E 013o 20.299 + q = CharBuffer.indexOf(newtext, p, minuteCharsHTML); + if (q < 0) q = CharBuffer.indexOf(newtext, p, " E".toCharArray()); + if (q < 0) q = CharBuffer.indexOf(newtext, p, " W".toCharArray()); + if (q < 0 && newtext.length - p == 8) q = newtext.length; + if (q < 0) break location; + int r = p; + while (r-- > 1) { + if (newtext[r] == ' ') { + r--; + if (newtext[r] == 'N') { + this.lat = Float.parseFloat(new String(newtext, r + 2, p - r - 2)) + + Float.parseFloat(new String(newtext, p + 2, q - p - 2)) / 60.0f; + s = q + 6; + continue location; + } + if (newtext[r] == 'S') { + this.lat = -Float.parseFloat(new String(newtext, r + 2, p - r - 2)) - + Float.parseFloat(new String(newtext, p + 2, q - p - 2)) / 60.0f; + s = q + 6; + continue location; + } + if (newtext[r] == 'E') { + this.lon = Float.parseFloat(new String(newtext, r + 2, p - r - 2)) + + Float.parseFloat(new String(newtext, p + 2, q - p - 2)) / 60.0f; + s = q + 6; + continue location; + } + if (newtext[r] == 'W') { + this.lon = -Float.parseFloat(new String(newtext, r + 2, p - r - 2)) - + Float.parseFloat(new String(newtext, p + 2, q - p - 2)) / 60.0f; + s = q + 6; + continue location; + } + break location; + } + } + break location; + } + + // find tags inside text String b = cleanLine(super.stripAll(newtext)); if ((insideTag != null) && (!(insideTag.equals("a")))) { // texts inside tags sometimes have no punctuation at the line end @@ -132,7 +184,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { //System.out.println("*** Appended dot: " + b.toString()); } // find http links inside text - int p, q, s = 0; + s = 0; String u; MultiProtocolURI url; while (s < b.length()) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 3b8cb4ee2..68f985faa 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -191,8 +191,8 @@ public class URIMetadataRow implements URIMetadata { final CharBuffer s = new CharBuffer(360); s.append(url.toNormalform(false, true)).append(10); s.append(dc_title).append(10); - s.append(dc_creator).append(10); - s.append(dc_subject).append(10); + s.append(dc_creator.length() > 80 ? dc_creator.substring(0, 80) : dc_creator).append(10); + s.append(dc_subject.length() > 120 ? dc_subject.substring(0, 120) : dc_subject).append(10); s.append(dc_publisher).append(10); if (lon == 0.0f && lat == 0.0f) s.append(10); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).append(10); return UTF8.getBytes(s.toString()); diff --git a/source/net/yacy/kelondro/io/CharBuffer.java b/source/net/yacy/kelondro/io/CharBuffer.java index 892f005c6..919368968 100644 --- a/source/net/yacy/kelondro/io/CharBuffer.java +++ b/source/net/yacy/kelondro/io/CharBuffer.java @@ -246,6 +246,36 @@ public final class CharBuffer extends Writer { } return -1; } + + public static int indexOf(final char[] b, final char c) { + return indexOf(b, 0, c); + } + + public static int indexOf(final char[] b, final int offset, final char c) { + for (int i = offset; i < b.length; i++) if (b[i] == c) return i; + return -1; + } + + public static int indexOf(final char[] b, final char[] s) { + return indexOf(b, 0, s); + } + + public static int indexOf(final char[] b, final int start, final char[] bs) { + if (start + bs.length > b.length) return -1; + loop: for (int i = start; i <= b.length - bs.length; i++) { + // first test only first char + if (b[i] != bs[0]) continue loop; + + // then test all remaining char + for (int j = 1; j < bs.length; j++) { + if (b[i + j] != bs[j]) continue loop; + } + + // found hit + return i; + } + return -1; + } public int lastIndexOf(final char b) { for (int i = length - 1; i >= 0; i--) if (buffer[offset + i] == b) return i;