Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

11 years ago · 1cd4b2e8be
parent 8c52f0651b 431a5f9c4e
commit 1cd4b2e8be
4 changed files with 128 additions and 33 deletions
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@ -1246,7 +1246,7 @@ public final class SearchEvent {
            if (solrsnippet != null && solrsnippet.size() > 0) {
                OpensearchResponseWriter.removeSubsumedTitle(solrsnippet, node.dc_title());
                final TextSnippet snippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippet), true, ResultClass.SOURCE_CACHE, "");
-                ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, null, 0);
+                ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, 0);
                addResult(re);
                success = true;
            } else {
@ -1375,7 +1375,7 @@ public final class SearchEvent {
                    ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
                    SearchEvent.SNIPPET_MAX_LENGTH,
                    !this.query.isLocal());
-            return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, 0); // result without snippet
+            return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, 0); // result without snippet
        }

        // load snippet
@ -1396,16 +1396,16 @@ public final class SearchEvent {

            if (!snippet.getErrorCode().fail()) {
                // we loaded the file and found the snippet
-                return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, snippetComputationTime); // result with snippet attached
+                return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, snippetComputationTime); // result with snippet attached
            } else if (cacheStrategy.mustBeOffline()) {
                // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
                // this may happen during a remote search, because snippet loading is omitted to retrieve results faster
-                return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, snippetComputationTime); // result without snippet
+                return new ResultEntry(page, this.query.getSegment(), this.peers, null, snippetComputationTime); // result without snippet
            } else {
                // problems with snippet fetch
                if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
                    // we accept that because the word cannot be on the page
-                    return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0);
+                    return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0);
                }
                final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
                if (this.deleteIfSnippetFail) {
@ -1415,7 +1415,7 @@ public final class SearchEvent {
                return null;
            }
        }
-        return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); // result without snippet
+        return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0); // result without snippet
    }
    
    public ResultEntry oneResult(final int item, final long timeout) {        
--- a/source/net/yacy/search/snippet/ResultEntry.java
+++ b/source/net/yacy/search/snippet/ResultEntry.java
@ -57,7 +57,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
    private String alternative_urlstring;
    private String alternative_urlname;
    private final TextSnippet textSnippet;
-    private final List<MediaSnippet> mediaSnippets;
    private final Segment indexSegment;

    // statistic objects
@ -67,7 +66,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
                       final Segment indexSegment,
                       SeedDB peers,
                       final TextSnippet textSnippet,
-                       final List<MediaSnippet> mediaSnippets,
                       final long snippetComputationTime) {
        this.urlentry = urlentry;
        this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
@ -75,7 +73,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
        this.alternative_urlstring = null;
        this.alternative_urlname = null;
        this.textSnippet = textSnippet;
-        this.mediaSnippets = mediaSnippets;
        this.snippetComputationTime = snippetComputationTime;
        final String host = urlentry.url().getHost();
        if (host != null && host.endsWith(".yacyh")) {
@ -163,9 +160,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
    public TextSnippet textSnippet() {
        return this.textSnippet;
    }
-    public List<MediaSnippet> mediaSnippets() {
-        return this.mediaSnippets;
-    }
    public Date modified() {
        return this.urlentry.moddate();
    }
@ -211,9 +205,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
    public boolean hasTextSnippet() {
        return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
    }
-    public boolean hasMediaSnippets() {
-        return (this.mediaSnippets != null) && (!this.mediaSnippets.isEmpty());
-    }
    public String resource() {
        // generate transport resource
        if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
--- a/source/net/yacy/search/snippet/TextSnippet.java
+++ b/source/net/yacy/search/snippet/TextSnippet.java
@ -320,30 +320,14 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
            sentences = null;
        } //encapsulate potential expensive sentences END

-        // compute snippet from media - attention document closed above!
-        //String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
-        //String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
-        //String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
-        //String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
-        //String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
-
-        snippetLine = "";
-        //if (audioline != null) line += (line.isEmpty()) ? audioline : "<br />" + audioline;
-        //if (videoline != null) line += (line.isEmpty()) ? videoline : "<br />" + videoline;
-        //if (appline   != null) line += (line.isEmpty()) ? appline   : "<br />" + appline;
-        //if (hrefline  != null) line += (line.isEmpty()) ? hrefline  : "<br />" + hrefline;
-        //if (textline  != null) snippetLine += (snippetLine.isEmpty()) ? textline  : "<br />" + textline;
-
-        if (snippetLine == null || !remainingHashes.isEmpty()) {
+        if (textline == null || !remainingHashes.isEmpty()) {
            init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
            return;
        }
        if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);

        // finally store this snippet in our own cache
-        snippetsCache.put(wordhashes, urls, snippetLine);
-
-//        document.close();
+        snippetsCache.put(wordhashes, urls, textline);
        init(url.hash(), snippetLine, false, source, null);
    }

--- a/test/net/yacy/search/snippet/TextSnippetTest.java
+++ b/test/net/yacy/search/snippet/TextSnippetTest.java
@ -0,0 +1,120 @@
+
+package net.yacy.search.snippet;
+
+import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.yacy.CacheStrategy;
+import net.yacy.cora.storage.HandleSet;
+import net.yacy.kelondro.data.meta.URIMetadataNode;
+import net.yacy.search.query.QueryGoal;
+import net.yacy.search.schema.CollectionSchema;
+import org.apache.solr.common.SolrDocument;
+import static org.junit.Assert.*;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TextSnippetTest {
+
+    // declare some required parameter
+    final CacheStrategy cacheStrategy = CacheStrategy.CACHEONLY;
+    final boolean pre = true;
+    final int snippetMaxLength = 220;
+    final boolean reindexing = false;
+
+    SolrDocument doc;
+
+    public TextSnippetTest() {
+    }
+
+    @Before
+    public void setUp() throws Exception {
+
+        // prepare a empty test document
+        doc = new SolrDocument();
+        DigestURL url = new DigestURL("http://localhost/page.html");
+        doc.addField(CollectionSchema.id.name(), ASCII.String(url.hash()));
+        doc.addField(CollectionSchema.sku.name(),url.toString());
+        // for testcases add other fields
+        // fields involved in snippet extraction:
+        // url, title, keywords, author, text_t
+    }
+
+    @Test
+    public void testTextSnippet() {
+
+        URIMetadataNode testpage = new URIMetadataNode(doc);
+        testpage.addField(CollectionSchema.title.name(), "New test case");
+        testpage.addField(CollectionSchema.keywords.name(), "junit");
+        testpage.addField(CollectionSchema.author.name(), "test author");
+        testpage.addField(CollectionSchema.text_t.name(), "A new testcase has been introduced. "
+                + "It includes a few test lines and one line that should match.");
+
+        String querywords = "testcase line";
+        QueryGoal qg = new QueryGoal(querywords);
+        HandleSet queryhashes = qg.getIncludeHashes();
+
+        TextSnippet ts = new TextSnippet(
+                null,
+                testpage,
+                queryhashes,
+                cacheStrategy,
+                pre,
+                snippetMaxLength,
+                reindexing
+        );
+        String rstr = ts.getError();
+        assertEquals("testTextSnippet Error Code: ", "", rstr);
+
+        String[] wordlist = querywords.split(" ");
+        rstr = ts.toString();
+        System.out.println("testTextSnippet: query=" + querywords);
+        System.out.println("testTextSnippet: snippet=" + rstr);
+        // check words included in snippet
+        for (String word : wordlist) {
+            assertTrue("testTextSnippet word included " + word, rstr.contains(word));
+        }
+
+    }
+
+    /**
+     * Test of getLineMarked method, of class TextSnippet.
+     */
+    @Test
+    public void testGetLineMarked() {
+        URIMetadataNode testpage = new URIMetadataNode(doc);
+        testpage.addField(CollectionSchema.title.name(), "New test case");
+        testpage.addField(CollectionSchema.keywords.name(), "junit");
+        testpage.addField(CollectionSchema.author.name(), "test author");
+        testpage.addField(CollectionSchema.text_t.name(),
+                "A new testcase has been introduced. "
+                + "It includes a few test lines and one line that should match.");
+
+        String querywords = "testcase line";
+        QueryGoal qg = new QueryGoal(querywords);
+        HandleSet queryhashes = qg.getIncludeHashes();
+
+        TextSnippet ts = new TextSnippet(
+                null,
+                testpage,
+                queryhashes,
+                cacheStrategy,
+                pre,
+                snippetMaxLength,
+                reindexing
+        );
+
+        String rstr = ts.getError();
+        assertEquals("testGetLineMarked Error Code: ", "", rstr);
+
+        // check words marked in snippet
+        rstr = ts.getLineMarked(qg);
+        System.out.println("testGetLineMarked: query=" + querywords);
+        System.out.println("testGetLineMarked: snippet=" + rstr);
+        String[] wordlist = querywords.split(" ");
+        for (String wordstr : wordlist) {
+            assertTrue("testGetLineMarked marked word " + wordstr, rstr.contains("<b>" + wordstr + "</b>"));
+        }
+    }
+
+}