From 52d307c735af8f173dc595b6d4411502ecddf03e Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 17 May 2012 05:18:52 +0200 Subject: [PATCH] prevent that the snippet fectch process removes catchall entries --- source/net/yacy/search/index/Segment.java | 4 ++-- source/net/yacy/search/query/SnippetProcess.java | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index d4ae0a852..cb2abc0a3 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -72,8 +72,8 @@ import de.anomic.crawler.retrieval.Response; public class Segment { // catchall word - final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents - final static byte[] catchallHash; + public final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents + public final static byte[] catchallHash; final static Word catchallWord = new Word(0, 0, 0); static { catchallHash = Word.word2hash(catchallString); // "KZzU-Vf6h5k-" diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index 9eeb816d0..028450ab9 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -62,6 +62,7 @@ import org.apache.solr.common.SolrDocumentList; import de.anomic.crawler.Cache; import de.anomic.data.WorkTables; +import net.yacy.search.index.Segment; import net.yacy.search.index.SolrField; public class SnippetProcess { @@ -606,6 +607,10 @@ public class SnippetProcess { return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet } else { // problems with snippet fetch + if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { + // we accept that because the word cannot be on the page + return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); + } final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); if (this.deleteIfSnippetFail) { this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);