From 5268ae2ce93c07cd222909fc0732060cc378184a Mon Sep 17 00:00:00 2001 From: zutto Date: Sat, 29 Jun 2024 10:11:58 +0300 Subject: [PATCH] check the document protocol & host values before proceeding to form final url. --- source/net/yacy/crawler/data/CrawlQueues.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 30a5ea93a..38c8e8a2c 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -617,6 +617,10 @@ public class CrawlQueues { deep = true; } DigestURL url; + if (doc.getFieldValue("url_protocol_s") == null || doc.getFieldValue("host_s") == null) { + //Skip this document if either of these values is null. + continue; + } final String u = doc.getFieldValue("url_protocol_s").toString() + "://" + doc.getFieldValue("host_s").toString(); try { url = new DigestURL(u);