Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit 1cd4b2e8be

@ -1246,7 +1246,7 @@ public final class SearchEvent {
if (solrsnippet != null && solrsnippet.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippet, node.dc_title());
final TextSnippet snippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippet), true, ResultClass.SOURCE_CACHE, "");
ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, null, 0);
ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, 0);
addResult(re);
success = true;
} else {
@ -1375,7 +1375,7 @@ public final class SearchEvent {
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
SearchEvent.SNIPPET_MAX_LENGTH,
!this.query.isLocal());
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, 0); // result without snippet
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, 0); // result without snippet
}
// load snippet
@ -1396,16 +1396,16 @@ public final class SearchEvent {
if (!snippet.getErrorCode().fail()) {
// we loaded the file and found the snippet
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, snippetComputationTime); // result with snippet attached
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, snippetComputationTime); // result with snippet attached
} else if (cacheStrategy.mustBeOffline()) {
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, snippetComputationTime); // result without snippet
return new ResultEntry(page, this.query.getSegment(), this.peers, null, snippetComputationTime); // result without snippet
} else {
// problems with snippet fetch
if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
// we accept that because the word cannot be on the page
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0);
return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0);
}
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.deleteIfSnippetFail) {
@ -1415,7 +1415,7 @@ public final class SearchEvent {
return null;
}
}
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); // result without snippet
return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0); // result without snippet
}
public ResultEntry oneResult(final int item, final long timeout) {

@ -57,7 +57,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
private String alternative_urlstring;
private String alternative_urlname;
private final TextSnippet textSnippet;
private final List<MediaSnippet> mediaSnippets;
private final Segment indexSegment;
// statistic objects
@ -67,7 +66,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
final Segment indexSegment,
SeedDB peers,
final TextSnippet textSnippet,
final List<MediaSnippet> mediaSnippets,
final long snippetComputationTime) {
this.urlentry = urlentry;
this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
@ -75,7 +73,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
this.alternative_urlstring = null;
this.alternative_urlname = null;
this.textSnippet = textSnippet;
this.mediaSnippets = mediaSnippets;
this.snippetComputationTime = snippetComputationTime;
final String host = urlentry.url().getHost();
if (host != null && host.endsWith(".yacyh")) {
@ -163,9 +160,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
public TextSnippet textSnippet() {
return this.textSnippet;
}
public List<MediaSnippet> mediaSnippets() {
return this.mediaSnippets;
}
public Date modified() {
return this.urlentry.moddate();
}
@ -211,9 +205,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
public boolean hasTextSnippet() {
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
}
public boolean hasMediaSnippets() {
return (this.mediaSnippets != null) && (!this.mediaSnippets.isEmpty());
}
public String resource() {
// generate transport resource
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {

@ -320,30 +320,14 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
sentences = null;
} //encapsulate potential expensive sentences END
// compute snippet from media - attention document closed above!
//String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
//String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
//String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
//String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
//String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
snippetLine = "";
//if (audioline != null) line += (line.isEmpty()) ? audioline : "<br />" + audioline;
//if (videoline != null) line += (line.isEmpty()) ? videoline : "<br />" + videoline;
//if (appline != null) line += (line.isEmpty()) ? appline : "<br />" + appline;
//if (hrefline != null) line += (line.isEmpty()) ? hrefline : "<br />" + hrefline;
//if (textline != null) snippetLine += (snippetLine.isEmpty()) ? textline : "<br />" + textline;
if (snippetLine == null || !remainingHashes.isEmpty()) {
if (textline == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return;
}
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, snippetLine);
// document.close();
snippetsCache.put(wordhashes, urls, textline);
init(url.hash(), snippetLine, false, source, null);
}

@ -0,0 +1,120 @@
package net.yacy.search.snippet;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.storage.HandleSet;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.common.SolrDocument;
import static org.junit.Assert.*;
import org.junit.Before;
import org.junit.Test;
public class TextSnippetTest {
// declare some required parameter
final CacheStrategy cacheStrategy = CacheStrategy.CACHEONLY;
final boolean pre = true;
final int snippetMaxLength = 220;
final boolean reindexing = false;
SolrDocument doc;
public TextSnippetTest() {
}
@Before
public void setUp() throws Exception {
// prepare a empty test document
doc = new SolrDocument();
DigestURL url = new DigestURL("http://localhost/page.html");
doc.addField(CollectionSchema.id.name(), ASCII.String(url.hash()));
doc.addField(CollectionSchema.sku.name(),url.toString());
// for testcases add other fields
// fields involved in snippet extraction:
// url, title, keywords, author, text_t
}
@Test
public void testTextSnippet() {
URIMetadataNode testpage = new URIMetadataNode(doc);
testpage.addField(CollectionSchema.title.name(), "New test case");
testpage.addField(CollectionSchema.keywords.name(), "junit");
testpage.addField(CollectionSchema.author.name(), "test author");
testpage.addField(CollectionSchema.text_t.name(), "A new testcase has been introduced. "
+ "It includes a few test lines and one line that should match.");
String querywords = "testcase line";
QueryGoal qg = new QueryGoal(querywords);
HandleSet queryhashes = qg.getIncludeHashes();
TextSnippet ts = new TextSnippet(
null,
testpage,
queryhashes,
cacheStrategy,
pre,
snippetMaxLength,
reindexing
);
String rstr = ts.getError();
assertEquals("testTextSnippet Error Code: ", "", rstr);
String[] wordlist = querywords.split(" ");
rstr = ts.toString();
System.out.println("testTextSnippet: query=" + querywords);
System.out.println("testTextSnippet: snippet=" + rstr);
// check words included in snippet
for (String word : wordlist) {
assertTrue("testTextSnippet word included " + word, rstr.contains(word));
}
}
/**
* Test of getLineMarked method, of class TextSnippet.
*/
@Test
public void testGetLineMarked() {
URIMetadataNode testpage = new URIMetadataNode(doc);
testpage.addField(CollectionSchema.title.name(), "New test case");
testpage.addField(CollectionSchema.keywords.name(), "junit");
testpage.addField(CollectionSchema.author.name(), "test author");
testpage.addField(CollectionSchema.text_t.name(),
"A new testcase has been introduced. "
+ "It includes a few test lines and one line that should match.");
String querywords = "testcase line";
QueryGoal qg = new QueryGoal(querywords);
HandleSet queryhashes = qg.getIncludeHashes();
TextSnippet ts = new TextSnippet(
null,
testpage,
queryhashes,
cacheStrategy,
pre,
snippetMaxLength,
reindexing
);
String rstr = ts.getError();
assertEquals("testGetLineMarked Error Code: ", "", rstr);
// check words marked in snippet
rstr = ts.getLineMarked(qg);
System.out.println("testGetLineMarked: query=" + querywords);
System.out.println("testGetLineMarked: snippet=" + rstr);
String[] wordlist = querywords.split(" ");
for (String wordstr : wordlist) {
assertTrue("testGetLineMarked marked word " + wordstr, rstr.contains("<b>" + wordstr + "</b>"));
}
}
}
Loading…
Cancel
Save