prevent metadata records in index w/o valid url

by throwing MalformedURL exception on URIMetadataNode creation
pull/14/head
reger 10 years ago
parent 41c4eade51
commit e37a4f0b3d

@ -21,6 +21,7 @@ package net.yacy.cora.federate;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -143,7 +144,7 @@ abstract public class AbstractFederateSearchConnector implements FederateSearchC
* @param remote result (with remote fieldnames)
* @return SolrDocument with field names according to the YaCy schema
*/
protected URIMetadataNode toYaCySchema(final SolrDocument doc) {
protected URIMetadataNode toYaCySchema(final SolrDocument doc) throws MalformedURLException {
// set YaCy id
String urlstr;
if (localcfg.contains("sku")) {
@ -156,7 +157,8 @@ abstract public class AbstractFederateSearchConnector implements FederateSearchC
}
}
URIMetadataNode newdoc = new URIMetadataNode(urlstr);
final DigestURL url = new DigestURL(urlstr);
URIMetadataNode newdoc = new URIMetadataNode(url);
Iterator<Configuration.Entry> it = localcfg.entryIterator();
while (it.hasNext()) {
Configuration.Entry et = it.next();

@ -20,6 +20,7 @@
package net.yacy.cora.federate;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -101,8 +102,10 @@ public class SolrFederateSearchConnector extends AbstractFederateSearchConnector
SolrDocumentList docList = solrConnector.getDocumentListByParams(msp);
// convert to YaCy schema documentlist
for (SolrDocument doc : docList) {
URIMetadataNode anew = toYaCySchema(doc);
docs.add(anew);
try {
URIMetadataNode anew = toYaCySchema(doc);
docs.add(anew);
} catch (MalformedURLException ex) { }
}
} catch (IOException | SolrException e) {
} finally {

@ -90,18 +90,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
private String alternative_urlname;
private TextSnippet textSnippet = null;
public URIMetadataNode(final Properties prop, String collection) {
public URIMetadataNode(final Properties prop, String collection) throws MalformedURLException {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
super();
final String urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
try {
url = new DigestURL(urlRaw);
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
url = new DigestURL(urlRaw);
String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", "")); if (tags == null) tags = "";
@ -161,7 +156,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
}
public URIMetadataNode(final SolrDocument doc) {
public URIMetadataNode(final SolrDocument doc) throws MalformedURLException {
super();
for (String name : doc.getFieldNames()) {
this.addField(name, doc.getFieldValue(name));
@ -170,31 +165,15 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
this.score = scorex == null ? 0.0f : scorex.floatValue();
final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
final String urlRaw = getString(CollectionSchema.sku);
try {
this.url = new DigestURL(urlRaw, hash);
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
this.url = new DigestURL(urlRaw, hash);
}
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) {
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) throws MalformedURLException {
this(doc);
this.word = searchedWord;
this.score = scorex;
}
public URIMetadataNode (final String urlstr) {
super();
try {
url = new DigestURL(urlstr);
this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
} catch (final MalformedURLException e) {
ConcurrentLog.logException(e);
this.url = null;
}
}
public URIMetadataNode(DigestURL theurl) {
super();
url = theurl;
@ -520,7 +499,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
try {
return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection);
} catch (final kelondroException e) {
} catch (final kelondroException | MalformedURLException e) {
// wrong format
ConcurrentLog.severe("URIMetadataNode", e.getMessage());
return null;

@ -48,6 +48,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -1089,7 +1090,12 @@ public final class Protocol {
if ( doc == null ) {
continue;
}
URIMetadataNode urlEntry = new URIMetadataNode(doc);
URIMetadataNode urlEntry;
try {
urlEntry = new URIMetadataNode(doc);
} catch (MalformedURLException ex) {
continue;
}
if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry.url()) ) {
if ( Network.log.isInfo() ) {

@ -165,6 +165,7 @@ import net.yacy.document.parser.pdfParser;
import net.yacy.document.parser.html.Evaluation;
import net.yacy.gui.Audio;
import net.yacy.gui.Tray;
import net.yacy.http.YaCyHttpServer;
import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.URIMetadataNode;
@ -215,7 +216,6 @@ import net.yacy.visualization.CircleTool;
import com.google.common.io.Files;
import net.yacy.http.YaCyHttpServer;
public final class Switchboard extends serverSwitch {
@ -2998,8 +2998,12 @@ public final class Switchboard extends serverSwitch {
final Seed initiatorPeer = this.peers.getConnected(queueEntry.initiator());
if ( initiatorPeer != null ) {
// start a thread for receipt sending to avoid a blocking here
SolrDocument sd = this.index.fulltext().getDefaultConfiguration().toSolrDocument(newEntry);
new Thread(new receiptSending(initiatorPeer, new URIMetadataNode(sd)), "sending receipt to " + ASCII.String(queueEntry.initiator())).start();
try {
SolrDocument sd = this.index.fulltext().getDefaultConfiguration().toSolrDocument(newEntry);
new Thread(new receiptSending(initiatorPeer, new URIMetadataNode(sd)), "sending receipt to " + ASCII.String(queueEntry.initiator())).start();
} catch (MalformedURLException ex) {
this.log.info("malformed url: "+ex.getMessage());
}
}
}
}

Loading…
Cancel
Save