From a15ac8e0caa7a9b0fe3178c1fc2b6657289e4a52 Mon Sep 17 00:00:00 2001 From: luccioman Date: Tue, 19 Jun 2018 12:53:17 +0200 Subject: [PATCH] Made CrawlProfile loading tolerant to malformed json string attribute --- .../net/yacy/crawler/data/CrawlProfile.java | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java index 63a1f30db..c3973afd4 100644 --- a/source/net/yacy/crawler/data/CrawlProfile.java +++ b/source/net/yacy/crawler/data/CrawlProfile.java @@ -49,6 +49,7 @@ import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.JSONArray; +import net.yacy.cora.util.JSONException; import net.yacy.cora.util.JSONTokener; import net.yacy.crawler.CrawlSwitchboard; import net.yacy.document.VocabularyScraper; @@ -290,11 +291,32 @@ public class CrawlProfile extends ConcurrentHashMap implements M if (ext != null) putAll(ext); this.doms = new ConcurrentHashMap(); String jsonString = ext.get(CrawlAttribute.IGNORE_DIV_CLASS_NAME.key); - JSONArray a = jsonString == null ? new JSONArray() : new JSONArray(new JSONTokener(jsonString)); + JSONArray a; + if(jsonString == null) { + a = new JSONArray(); + } else { + try { + a = new JSONArray(new JSONTokener(jsonString)); + } catch(final JSONException e) { + ConcurrentLog.logException(e); + a = new JSONArray(); + } + } this.ignore_class_name = new HashSet(); for (int i = 0; i < a.length(); i++) this.ignore_class_name.add(a.getString(i)); jsonString = ext.get(CrawlAttribute.SCRAPER.key); - this.scraper = jsonString == null || jsonString.length() == 0 ? new VocabularyScraper() : new VocabularyScraper(jsonString); + if(jsonString == null || jsonString.length() == 0) { + this.scraper = new VocabularyScraper(); + } else { + VocabularyScraper loadedScraper; + try { + loadedScraper = new VocabularyScraper(jsonString); + } catch(final JSONException e) { + ConcurrentLog.logException(e); + loadedScraper = new VocabularyScraper(); + } + this.scraper = loadedScraper; + } } public Set ignoreDivClassName() {