prevent metadata records in index w/o valid url

by throwing MalformedURL exception on URIMetadataNode creation
pull/14/head
reger 10 years ago
parent 41c4eade51
commit e37a4f0b3d

@ -21,6 +21,7 @@ package net.yacy.cora.federate;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -143,7 +144,7 @@ abstract public class AbstractFederateSearchConnector implements FederateSearchC
* @param remote result (with remote fieldnames) * @param remote result (with remote fieldnames)
* @return SolrDocument with field names according to the YaCy schema * @return SolrDocument with field names according to the YaCy schema
*/ */
protected URIMetadataNode toYaCySchema(final SolrDocument doc) { protected URIMetadataNode toYaCySchema(final SolrDocument doc) throws MalformedURLException {
// set YaCy id // set YaCy id
String urlstr; String urlstr;
if (localcfg.contains("sku")) { if (localcfg.contains("sku")) {
@ -156,7 +157,8 @@ abstract public class AbstractFederateSearchConnector implements FederateSearchC
} }
} }
URIMetadataNode newdoc = new URIMetadataNode(urlstr); final DigestURL url = new DigestURL(urlstr);
URIMetadataNode newdoc = new URIMetadataNode(url);
Iterator<Configuration.Entry> it = localcfg.entryIterator(); Iterator<Configuration.Entry> it = localcfg.entryIterator();
while (it.hasNext()) { while (it.hasNext()) {
Configuration.Entry et = it.next(); Configuration.Entry et = it.next();

@ -20,6 +20,7 @@
package net.yacy.cora.federate; package net.yacy.cora.federate;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -101,8 +102,10 @@ public class SolrFederateSearchConnector extends AbstractFederateSearchConnector
SolrDocumentList docList = solrConnector.getDocumentListByParams(msp); SolrDocumentList docList = solrConnector.getDocumentListByParams(msp);
// convert to YaCy schema documentlist // convert to YaCy schema documentlist
for (SolrDocument doc : docList) { for (SolrDocument doc : docList) {
URIMetadataNode anew = toYaCySchema(doc); try {
docs.add(anew); URIMetadataNode anew = toYaCySchema(doc);
docs.add(anew);
} catch (MalformedURLException ex) { }
} }
} catch (IOException | SolrException e) { } catch (IOException | SolrException e) {
} finally { } finally {

@ -90,18 +90,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
private String alternative_urlname; private String alternative_urlname;
private TextSnippet textSnippet = null; private TextSnippet textSnippet = null;
public URIMetadataNode(final Properties prop, String collection) { public URIMetadataNode(final Properties prop, String collection) throws MalformedURLException {
// generates an plasmaLURLEntry using the properties from the argument // generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString // the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); //System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
super(); super();
final String urlRaw = crypt.simpleDecode(prop.getProperty("url", "")); final String urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
try { url = new DigestURL(urlRaw);
url = new DigestURL(urlRaw);
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = ""; String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = ""; String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", "")); if (tags == null) tags = ""; String tags = crypt.simpleDecode(prop.getProperty("tags", "")); if (tags == null) tags = "";
@ -161,7 +156,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
} }
} }
public URIMetadataNode(final SolrDocument doc) { public URIMetadataNode(final SolrDocument doc) throws MalformedURLException {
super(); super();
for (String name : doc.getFieldNames()) { for (String name : doc.getFieldNames()) {
this.addField(name, doc.getFieldValue(name)); this.addField(name, doc.getFieldValue(name));
@ -170,31 +165,15 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
this.score = scorex == null ? 0.0f : scorex.floatValue(); this.score = scorex == null ? 0.0f : scorex.floatValue();
final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ? final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
final String urlRaw = getString(CollectionSchema.sku); final String urlRaw = getString(CollectionSchema.sku);
try { this.url = new DigestURL(urlRaw, hash);
this.url = new DigestURL(urlRaw, hash);
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
} }
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) { public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) throws MalformedURLException {
this(doc); this(doc);
this.word = searchedWord; this.word = searchedWord;
this.score = scorex; this.score = scorex;
} }
public URIMetadataNode (final String urlstr) {
super();
try {
url = new DigestURL(urlstr);
this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
}
public URIMetadataNode(DigestURL theurl) { public URIMetadataNode(DigestURL theurl) {
super(); super();
url = theurl; url = theurl;
@ -520,7 +499,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
} }
try { try {
return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection); return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection);
} catch (final kelondroException e) { } catch (final kelondroException | MalformedURLException e) {
// wrong format // wrong format
ConcurrentLog.severe("URIMetadataNode", e.getMessage()); ConcurrentLog.severe("URIMetadataNode", e.getMessage());
return null; return null;

@ -48,6 +48,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -1089,7 +1090,12 @@ public final class Protocol {
if ( doc == null ) { if ( doc == null ) {
continue; continue;
} }
URIMetadataNode urlEntry = new URIMetadataNode(doc); URIMetadataNode urlEntry;
try {
urlEntry = new URIMetadataNode(doc);
} catch (MalformedURLException ex) {
continue;
}
if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry.url()) ) { if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry.url()) ) {
if ( Network.log.isInfo() ) { if ( Network.log.isInfo() ) {

@ -165,6 +165,7 @@ import net.yacy.document.parser.pdfParser;
import net.yacy.document.parser.html.Evaluation; import net.yacy.document.parser.html.Evaluation;
import net.yacy.gui.Audio; import net.yacy.gui.Audio;
import net.yacy.gui.Tray; import net.yacy.gui.Tray;
import net.yacy.http.YaCyHttpServer;
import net.yacy.kelondro.blob.BEncodedHeap; import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataNode;
@ -215,7 +216,6 @@ import net.yacy.visualization.CircleTool;
import com.google.common.io.Files; import com.google.common.io.Files;
import net.yacy.http.YaCyHttpServer;
public final class Switchboard extends serverSwitch { public final class Switchboard extends serverSwitch {
@ -2998,8 +2998,12 @@ public final class Switchboard extends serverSwitch {
final Seed initiatorPeer = this.peers.getConnected(queueEntry.initiator()); final Seed initiatorPeer = this.peers.getConnected(queueEntry.initiator());
if ( initiatorPeer != null ) { if ( initiatorPeer != null ) {
// start a thread for receipt sending to avoid a blocking here // start a thread for receipt sending to avoid a blocking here
SolrDocument sd = this.index.fulltext().getDefaultConfiguration().toSolrDocument(newEntry); try {
new Thread(new receiptSending(initiatorPeer, new URIMetadataNode(sd)), "sending receipt to " + ASCII.String(queueEntry.initiator())).start(); SolrDocument sd = this.index.fulltext().getDefaultConfiguration().toSolrDocument(newEntry);
new Thread(new receiptSending(initiatorPeer, new URIMetadataNode(sd)), "sending receipt to " + ASCII.String(queueEntry.initiator())).start();
} catch (MalformedURLException ex) {
this.log.info("malformed url: "+ex.getMessage());
}
} }
} }
} }

Loading…
Cancel
Save