From 7de5b9cfa00ff066f7b1d2a7334ef172c02545a1 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 3 May 2013 00:24:39 +0200 Subject: [PATCH] fix for http://bugs.yacy.net/view.php?id=233 - check geolocation coordinates and accept only those, which are well-formed - the solr push process does not stop crawling any more if after 20 requests to Solr Solr does not accept the record. Instead, a severe log entry asks the user to create a bug request --- source/net/yacy/document/Document.java | 10 ++++++++-- .../kelondro/data/meta/URIMetadataRow.java | 20 ++++++++++--------- source/net/yacy/search/index/Segment.java | 17 +++++++--------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 030b97296..d5e7bbda6 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -119,8 +119,14 @@ public class Document { this.sections = new LinkedList() ; if (sections != null) this.sections.addAll(Arrays.asList(sections)); this.description = (abstrct == null) ? new StringBuilder(0) : new StringBuilder(abstrct); - this.lon = lon; - this.lat = lat; + if (lat >= -90.0d && lat <= 90.0d && lon >= -180.0d && lon <= 180.0d) { + this.lon = lon; + this.lat = lat; + } else { + // we ignore false values because otherwise solr will cause an error when we input the coordinates into the index + this.lon = 0.0d; + this.lat = 0.0d; + } this.anchors = (anchors == null) ? new HashMap(0) : anchors; this.rss = (rss == null) ? new HashMap(0) : rss; this.images = (images == null) ? new HashMap() : images; diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index fc70af16c..65c163632 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -468,7 +468,7 @@ public class URIMetadataRow { private String urlRaw; private byte[] urlHash; private final String dc_title, dc_creator, dc_subject, dc_publisher; - private final String latlon; // a comma-separated tuple as "," where the coordinates are given as WGS84 spatial coordinates in decimal degrees + private String latlon; // a comma-separated tuple as "," where the coordinates are given as WGS84 spatial coordinates in decimal degrees public Components( final String urlRaw, @@ -511,11 +511,12 @@ public class URIMetadataRow { public double lat() { if (this.latlon == null || this.latlon.isEmpty()) return 0.0d; final int p = this.latlon.indexOf(','); - if (p < 0) { - return 0.0d; - } + if (p < 0) return 0.0d; try { - return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p)); + double lat = this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p)); + if (lat >= -90.0d && lat <= 90.0d) return lat; + this.latlon = null; // wrong value + return 0.0d; } catch (NumberFormatException e) { return 0.0d; } @@ -523,11 +524,12 @@ public class URIMetadataRow { public double lon() { if (this.latlon == null || this.latlon.isEmpty()) return 0.0d; final int p = this.latlon.indexOf(','); - if (p < 0) { - return 0.0d; - } + if (p < 0) return 0.0d; try { - return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1)); + double lon = this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1)); + if (lon >= -180.0d && lon <= 180.0d) return lon; + this.latlon = null; // wrong value + return 0.0d; } catch (NumberFormatException e) { return 0.0d; } diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index c33c2bbf5..3e54d0323 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -74,8 +74,6 @@ import net.yacy.kelondro.util.ByteBuffer; import net.yacy.kelondro.util.ISO639; import net.yacy.kelondro.util.MemoryControl; import net.yacy.repository.LoaderDispatcher; -import net.yacy.search.Switchboard; -import net.yacy.search.SwitchboardConstants; import net.yacy.search.query.SearchEvent; import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionSchema; @@ -556,8 +554,8 @@ public class Segment { this.fulltext.putDocument(vector); break tryloop; } catch ( final IOException e ) { - error = "failed to send " + urlNormalform + " to solr"; - Log.logWarning("SOLR", error + e.getMessage()); + error = "failed to send " + urlNormalform + " to solr: " + e.getMessage(); + Log.logWarning("SOLR", error); if (i == 10) this.fulltext.commit(false); try {Thread.sleep(1000);} catch (InterruptedException e1) {} continue tryloop; @@ -570,8 +568,8 @@ public class Segment { this.fulltext.putEdges(vector.getWebgraphDocuments()); break tryloop; } catch ( final IOException e ) { - error = "failed to send " + urlNormalform + " to solr"; - Log.logWarning("SOLR", error + e.getMessage()); + error = "failed to send " + urlNormalform + " to solr: " + e.getMessage(); + Log.logWarning("SOLR", error); if (i == 10) this.fulltext.commit(false); try {Thread.sleep(1000);} catch (InterruptedException e1) {} continue tryloop; @@ -579,10 +577,9 @@ public class Segment { } } if (error != null) { - Log.logWarning("SOLR", error + ", pausing Crawler!"); - // pause the crawler!!! - Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, error); - Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, error); + Log.logSevere("SOLR", error + ", PLEASE REPORT TO bugs.yacy.net"); + //Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, error); + //Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, error); } final long storageEndTime = System.currentTimeMillis();