diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 2a98b692d..137f7050c 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -1246,7 +1246,7 @@ public final class SearchEvent { if (solrsnippet != null && solrsnippet.size() > 0) { OpensearchResponseWriter.removeSubsumedTitle(solrsnippet, node.dc_title()); final TextSnippet snippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippet), true, ResultClass.SOURCE_CACHE, ""); - ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, null, 0); + ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, 0); addResult(re); success = true; } else { @@ -1375,7 +1375,7 @@ public final class SearchEvent { ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), SearchEvent.SNIPPET_MAX_LENGTH, !this.query.isLocal()); - return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, 0); // result without snippet + return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, 0); // result without snippet } // load snippet @@ -1396,16 +1396,16 @@ public final class SearchEvent { if (!snippet.getErrorCode().fail()) { // we loaded the file and found the snippet - return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, snippetComputationTime); // result with snippet attached + return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, snippetComputationTime); // result with snippet attached } else if (cacheStrategy.mustBeOffline()) { // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result // this may happen during a remote search, because snippet loading is omitted to retrieve results faster - return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, snippetComputationTime); // result without snippet + return new ResultEntry(page, this.query.getSegment(), this.peers, null, snippetComputationTime); // result without snippet } else { // problems with snippet fetch if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { // we accept that because the word cannot be on the page - return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); + return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0); } final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); if (this.deleteIfSnippetFail) { @@ -1415,7 +1415,7 @@ public final class SearchEvent { return null; } } - return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); // result without snippet + return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0); // result without snippet } public ResultEntry oneResult(final int item, final long timeout) { diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java index 056e0cde0..7eafcdbd1 100644 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ b/source/net/yacy/search/snippet/ResultEntry.java @@ -57,7 +57,6 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets; private final Segment indexSegment; // statistic objects @@ -67,7 +66,6 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets, final long snippetComputationTime) { this.urlentry = urlentry; this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here @@ -75,7 +73,6 @@ public class ResultEntry implements Comparable, Comparator, Comparator mediaSnippets() { - return this.mediaSnippets; - } public Date modified() { return this.urlentry.moddate(); } @@ -211,9 +205,6 @@ public class ResultEntry implements Comparable, Comparator, Comparator" + audioline; - //if (videoline != null) line += (line.isEmpty()) ? videoline : "
" + videoline; - //if (appline != null) line += (line.isEmpty()) ? appline : "
" + appline; - //if (hrefline != null) line += (line.isEmpty()) ? hrefline : "
" + hrefline; - //if (textline != null) snippetLine += (snippetLine.isEmpty()) ? textline : "
" + textline; - - if (snippetLine == null || !remainingHashes.isEmpty()) { + if (textline == null || !remainingHashes.isEmpty()) { init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found"); return; } if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength); // finally store this snippet in our own cache - snippetsCache.put(wordhashes, urls, snippetLine); - -// document.close(); + snippetsCache.put(wordhashes, urls, textline); init(url.hash(), snippetLine, false, source, null); } diff --git a/test/net/yacy/search/snippet/TextSnippetTest.java b/test/net/yacy/search/snippet/TextSnippetTest.java new file mode 100644 index 000000000..0c92f581f --- /dev/null +++ b/test/net/yacy/search/snippet/TextSnippetTest.java @@ -0,0 +1,120 @@ + +package net.yacy.search.snippet; + +import net.yacy.cora.document.encoding.ASCII; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.federate.yacy.CacheStrategy; +import net.yacy.cora.storage.HandleSet; +import net.yacy.kelondro.data.meta.URIMetadataNode; +import net.yacy.search.query.QueryGoal; +import net.yacy.search.schema.CollectionSchema; +import org.apache.solr.common.SolrDocument; +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.Test; + + +public class TextSnippetTest { + + // declare some required parameter + final CacheStrategy cacheStrategy = CacheStrategy.CACHEONLY; + final boolean pre = true; + final int snippetMaxLength = 220; + final boolean reindexing = false; + + SolrDocument doc; + + public TextSnippetTest() { + } + + @Before + public void setUp() throws Exception { + + // prepare a empty test document + doc = new SolrDocument(); + DigestURL url = new DigestURL("http://localhost/page.html"); + doc.addField(CollectionSchema.id.name(), ASCII.String(url.hash())); + doc.addField(CollectionSchema.sku.name(),url.toString()); + // for testcases add other fields + // fields involved in snippet extraction: + // url, title, keywords, author, text_t + } + + @Test + public void testTextSnippet() { + + URIMetadataNode testpage = new URIMetadataNode(doc); + testpage.addField(CollectionSchema.title.name(), "New test case"); + testpage.addField(CollectionSchema.keywords.name(), "junit"); + testpage.addField(CollectionSchema.author.name(), "test author"); + testpage.addField(CollectionSchema.text_t.name(), "A new testcase has been introduced. " + + "It includes a few test lines and one line that should match."); + + String querywords = "testcase line"; + QueryGoal qg = new QueryGoal(querywords); + HandleSet queryhashes = qg.getIncludeHashes(); + + TextSnippet ts = new TextSnippet( + null, + testpage, + queryhashes, + cacheStrategy, + pre, + snippetMaxLength, + reindexing + ); + String rstr = ts.getError(); + assertEquals("testTextSnippet Error Code: ", "", rstr); + + String[] wordlist = querywords.split(" "); + rstr = ts.toString(); + System.out.println("testTextSnippet: query=" + querywords); + System.out.println("testTextSnippet: snippet=" + rstr); + // check words included in snippet + for (String word : wordlist) { + assertTrue("testTextSnippet word included " + word, rstr.contains(word)); + } + + } + + /** + * Test of getLineMarked method, of class TextSnippet. + */ + @Test + public void testGetLineMarked() { + URIMetadataNode testpage = new URIMetadataNode(doc); + testpage.addField(CollectionSchema.title.name(), "New test case"); + testpage.addField(CollectionSchema.keywords.name(), "junit"); + testpage.addField(CollectionSchema.author.name(), "test author"); + testpage.addField(CollectionSchema.text_t.name(), + "A new testcase has been introduced. " + + "It includes a few test lines and one line that should match."); + + String querywords = "testcase line"; + QueryGoal qg = new QueryGoal(querywords); + HandleSet queryhashes = qg.getIncludeHashes(); + + TextSnippet ts = new TextSnippet( + null, + testpage, + queryhashes, + cacheStrategy, + pre, + snippetMaxLength, + reindexing + ); + + String rstr = ts.getError(); + assertEquals("testGetLineMarked Error Code: ", "", rstr); + + // check words marked in snippet + rstr = ts.getLineMarked(qg); + System.out.println("testGetLineMarked: query=" + querywords); + System.out.println("testGetLineMarked: snippet=" + rstr); + String[] wordlist = querywords.split(" "); + for (String wordstr : wordlist) { + assertTrue("testGetLineMarked marked word " + wordstr, rstr.contains("" + wordstr + "")); + } + } + +}