prevent that the snippet fectch process removes catchall entries

pull/1/head
Michael Peter Christen 13 years ago
parent 7eece0256f
commit 52d307c735

@ -72,8 +72,8 @@ import de.anomic.crawler.retrieval.Response;
public class Segment { public class Segment {
// catchall word // catchall word
final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents public final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents
final static byte[] catchallHash; public final static byte[] catchallHash;
final static Word catchallWord = new Word(0, 0, 0); final static Word catchallWord = new Word(0, 0, 0);
static { static {
catchallHash = Word.word2hash(catchallString); // "KZzU-Vf6h5k-" catchallHash = Word.word2hash(catchallString); // "KZzU-Vf6h5k-"

@ -62,6 +62,7 @@ import org.apache.solr.common.SolrDocumentList;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrField; import net.yacy.search.index.SolrField;
public class SnippetProcess { public class SnippetProcess {
@ -606,6 +607,10 @@ public class SnippetProcess {
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
} else { } else {
// problems with snippet fetch // problems with snippet fetch
if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
// we accept that because the word cannot be on the page
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0);
}
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.deleteIfSnippetFail) { if (this.deleteIfSnippetFail) {
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason); this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);

Loading…
Cancel
Save