From f425b2c61c09c32beaf408690245d458527e1da7 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 27 Jul 2013 10:56:02 +0200 Subject: [PATCH] re-try to fetch url after a soft commit --- htroot/CrawlResults.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 1aa7d95d8..c8fb7f258 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -182,7 +182,12 @@ public class CrawlResults { while (i.hasNext()) { entry = i.next(); try { - urle = sb.index.fulltext().getMetadata(UTF8.getBytes(entry.getKey())); + byte[] urlhash = UTF8.getBytes(entry.getKey()); + urle = sb.index.fulltext().getMetadata(urlhash); + if (urle == null) { + sb.index.fulltext().commit(true); + urle = sb.index.fulltext().getMetadata(urlhash); + } if (urle == null) { ConcurrentLog.warn("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); urlstr = null;