check the document protocol & host values before proceeding to form final url.

pull/650/head
zutto 5 months ago
parent 962aaec0c0
commit 5268ae2ce9

@ -617,6 +617,10 @@ public class CrawlQueues {
deep = true; deep = true;
} }
DigestURL url; DigestURL url;
if (doc.getFieldValue("url_protocol_s") == null || doc.getFieldValue("host_s") == null) {
//Skip this document if either of these values is null.
continue;
}
final String u = doc.getFieldValue("url_protocol_s").toString() + "://" + doc.getFieldValue("host_s").toString(); final String u = doc.getFieldValue("url_protocol_s").toString() + "://" + doc.getFieldValue("host_s").toString();
try { try {
url = new DigestURL(u); url = new DigestURL(u);

Loading…
Cancel
Save