- check geolocation coordinates and accept only those, which are
well-formed
- the solr push process does not stop crawling any more if after 20
requests to Solr Solr does not accept the record. Instead, a severe log
entry asks the user to create a bug request
pull/1/head
orbiter 12 years ago
parent e145afb8d6
commit 7de5b9cfa0

@ -119,8 +119,14 @@ public class Document {
this.sections = new LinkedList<String>() ;
if (sections != null) this.sections.addAll(Arrays.asList(sections));
this.description = (abstrct == null) ? new StringBuilder(0) : new StringBuilder(abstrct);
this.lon = lon;
this.lat = lat;
if (lat >= -90.0d && lat <= 90.0d && lon >= -180.0d && lon <= 180.0d) {
this.lon = lon;
this.lat = lat;
} else {
// we ignore false values because otherwise solr will cause an error when we input the coordinates into the index
this.lon = 0.0d;
this.lat = 0.0d;
}
this.anchors = (anchors == null) ? new HashMap<DigestURI, Properties>(0) : anchors;
this.rss = (rss == null) ? new HashMap<DigestURI, String>(0) : rss;
this.images = (images == null) ? new HashMap<DigestURI, ImageEntry>() : images;

@ -468,7 +468,7 @@ public class URIMetadataRow {
private String urlRaw;
private byte[] urlHash;
private final String dc_title, dc_creator, dc_subject, dc_publisher;
private final String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
private String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
public Components(
final String urlRaw,
@ -511,11 +511,12 @@ public class URIMetadataRow {
public double lat() {
if (this.latlon == null || this.latlon.isEmpty()) return 0.0d;
final int p = this.latlon.indexOf(',');
if (p < 0) {
return 0.0d;
}
if (p < 0) return 0.0d;
try {
return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
double lat = this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
if (lat >= -90.0d && lat <= 90.0d) return lat;
this.latlon = null; // wrong value
return 0.0d;
} catch (NumberFormatException e) {
return 0.0d;
}
@ -523,11 +524,12 @@ public class URIMetadataRow {
public double lon() {
if (this.latlon == null || this.latlon.isEmpty()) return 0.0d;
final int p = this.latlon.indexOf(',');
if (p < 0) {
return 0.0d;
}
if (p < 0) return 0.0d;
try {
return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
double lon = this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
if (lon >= -180.0d && lon <= 180.0d) return lon;
this.latlon = null; // wrong value
return 0.0d;
} catch (NumberFormatException e) {
return 0.0d;
}

@ -74,8 +74,6 @@ import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.ISO639;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
@ -556,8 +554,8 @@ public class Segment {
this.fulltext.putDocument(vector);
break tryloop;
} catch ( final IOException e ) {
error = "failed to send " + urlNormalform + " to solr";
Log.logWarning("SOLR", error + e.getMessage());
error = "failed to send " + urlNormalform + " to solr: " + e.getMessage();
Log.logWarning("SOLR", error);
if (i == 10) this.fulltext.commit(false);
try {Thread.sleep(1000);} catch (InterruptedException e1) {}
continue tryloop;
@ -570,8 +568,8 @@ public class Segment {
this.fulltext.putEdges(vector.getWebgraphDocuments());
break tryloop;
} catch ( final IOException e ) {
error = "failed to send " + urlNormalform + " to solr";
Log.logWarning("SOLR", error + e.getMessage());
error = "failed to send " + urlNormalform + " to solr: " + e.getMessage();
Log.logWarning("SOLR", error);
if (i == 10) this.fulltext.commit(false);
try {Thread.sleep(1000);} catch (InterruptedException e1) {}
continue tryloop;
@ -579,10 +577,9 @@ public class Segment {
}
}
if (error != null) {
Log.logWarning("SOLR", error + ", pausing Crawler!");
// pause the crawler!!!
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, error);
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, error);
Log.logSevere("SOLR", error + ", PLEASE REPORT TO bugs.yacy.net");
//Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, error);
//Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, error);
}
final long storageEndTime = System.currentTimeMillis();

Loading…
Cancel
Save