Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 10 years ago
commit d9d3111d10

@ -83,7 +83,7 @@
<compilation-unit>
<package-root>source</package-root>
<package-root>htroot</package-root>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.4.jar;lib/httpcore-4.4.jar;lib/httpmime-4.4.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.9.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.7.v20150116.jar;lib/jetty-continuation-9.2.7.v20150116.jar;lib/jetty-deploy-9.2.7.v20150116.jar;lib/jetty-http-9.2.7.v20150116.jar;lib/jetty-io-9.2.7.v20150116.jar;lib/jetty-jmx-9.2.7.v20150116.jar;lib/jetty-proxy-9.2.7.v20150116.jar;lib/jetty-security-9.2.7.v20150116.jar;lib/jetty-server-9.2.7.v20150116.jar;lib/jetty-servlet-9.2.7.v20150116.jar;lib/jetty-servlets-9.2.7.v20150116.jar;lib/jetty-util-9.2.7.v20150116.jar;lib/jetty-webapp-9.2.7.v20150116.jar;lib/jetty-xml-9.2.7.v20150116.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.9.jar;lib/lucene-analyzers-common-4.10.3.jar;lib/lucene-analyzers-phonetic-4.10.3.jar;lib/lucene-classification-4.10.3.jar;lib/lucene-codecs-4.10.3.jar;lib/lucene-core-4.10.3.jar;lib/lucene-facet-4.10.3.jar;lib/lucene-grouping-4.10.3.jar;lib/lucene-highlighter-4.10.3.jar;lib/lucene-join-4.10.3.jar;lib/lucene-memory-4.10.3.jar;lib/lucene-misc-4.10.3.jar;lib/lucene-queries-4.10.3.jar;lib/lucene-queryparser-4.10.3.jar;lib/lucene-spatial-4.10.3.jar;lib/lucene-suggest-4.10.3.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.9.jar;lib/slf4j-jdk14-1.7.9.jar;lib/solr-core-4.10.3.jar;lib/solr-solrj-4.10.3.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.4.jar;lib/httpcore-4.4.jar;lib/httpmime-4.4.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.9.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.9.v20150224.jar;lib/jetty-continuation-9.2.9.v20150224.jar;lib/jetty-deploy-9.2.9.v20150224.jar;lib/jetty-http-9.2.9.v20150224.jar;lib/jetty-io-9.2.9.v20150224.jar;lib/jetty-jmx-9.2.9.v20150224.jar;lib/jetty-proxy-9.2.9.v20150224.jar;lib/jetty-security-9.2.9.v20150224.jar;lib/jetty-server-9.2.9.v20150224.jar;lib/jetty-servlet-9.2.9.v20150224.jar;lib/jetty-servlets-9.2.9.v20150224.jar;lib/jetty-util-9.2.9.v20150224.jar;lib/jetty-webapp-9.2.9.v20150224.jar;lib/jetty-xml-9.2.9.v20150224.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.9.jar;lib/lucene-analyzers-common-4.10.3.jar;lib/lucene-analyzers-phonetic-4.10.3.jar;lib/lucene-classification-4.10.3.jar;lib/lucene-codecs-4.10.3.jar;lib/lucene-core-4.10.3.jar;lib/lucene-facet-4.10.3.jar;lib/lucene-grouping-4.10.3.jar;lib/lucene-highlighter-4.10.3.jar;lib/lucene-join-4.10.3.jar;lib/lucene-memory-4.10.3.jar;lib/lucene-misc-4.10.3.jar;lib/lucene-queries-4.10.3.jar;lib/lucene-queryparser-4.10.3.jar;lib/lucene-spatial-4.10.3.jar;lib/lucene-suggest-4.10.3.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.9.jar;lib/slf4j-jdk14-1.7.9.jar;lib/solr-core-4.10.3.jar;lib/solr-solrj-4.10.3.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<built-to>lib/yacycore.jar</built-to>
<source-level>1.7</source-level>
</compilation-unit>

@ -35,6 +35,8 @@
<maven.compiler.target>1.7</maven.compiler.target>
<!-- the Solr version used in dependency section for all related dependencies -->
<solr.version>4.10.3</solr.version>
<!-- the Jetty version used in dependency section for all related dependencies -->
<jetty.version>9.2.9.v20150224</jetty.version>
<!-- properties used for filtering yacyBuildProperties.java -->
<REPL_DATE>${DSTAMP}</REPL_DATE>
@ -549,67 +551,67 @@
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlets</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-xml</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-http</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-security</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-io</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-continuation</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-jmx</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-proxy</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-deploy</artifactId>
<version>9.2.7.v20150116</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.bitlet</groupId>

@ -131,7 +131,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
private byte[] urlhash;
private String line;
private String line; // the raw (unmodified) line from source ( use getDescriptionLine() to get the html encoded version for display)
private boolean isMarked;
private String error;
private ResultClass resultStatus;
@ -344,9 +344,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
/**
* Init a snippet line for urlhash, used for display in search results.
* HTML code in text will be html-encoded.
* If the input is already marked (highlightened) <b>...</b> tags are kept as is.
* Init a snippet line for urlhash
*
* @param urlhash hash of the url for this snippet
* @param line text to use as snippet
@ -361,15 +359,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
final ResultClass errorCode,
final String errortext) {
this.urlhash = urlhash;
if (line != null) { // line may contain html code (possible to mess up result display)
if (isMarked) { // if marked, keep <b>...</b> html tags in place
this.line = CharacterCoding.unicode2html(line, false).replaceAll("&lt;b&gt;(.+?)&lt;/b&gt;", "<b>$1</b>");
} else { // otherwise encode all text for html display
this.line = CharacterCoding.unicode2html(line, false);
}
} else {
this.line = line;
}
this.line = line;
this.isMarked = isMarked;
this.resultStatus = errorCode;
this.error = errortext;
@ -437,9 +427,22 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
private String descriptionline = null;
/**
* Snippet line formatted/encoded for display in browser
* possible html code in raw line is html encoded
* query words marked by <b>..</b>
*
* @param queryGoal
* @return html encoded snippet line
*/
public String descriptionline(QueryGoal queryGoal) {
if (descriptionline != null) return descriptionline;
descriptionline = this.isMarked() ? this.getLineRaw() : this.getLineMarked(queryGoal);
if (this.isMarked) {
// html encode source, keep <b>..</b>
descriptionline = CharacterCoding.unicode2html(this.getLineRaw(), false).replaceAll("&lt;b&gt;(.+?)&lt;/b&gt;", "<b>$1</b>");
} else {
descriptionline = this.getLineMarked(queryGoal);
}
return descriptionline;
}

@ -1,6 +1,7 @@
package net.yacy.search.snippet;
import java.net.MalformedURLException;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
@ -118,4 +119,38 @@ public class TextSnippetTest {
}
}
/**
* Test of descriptionline method, of class TextSnippet.
* checking poper encoding of remaining html in raw snippet line.
*/
@Test
public void testDescriptionline() throws MalformedURLException {
String rawtestline = "Über großer test case </span> <pre> <hr><hr /></pre>"; // test line with html, risk of snippet format issue
DigestURL url = new DigestURL("http://localhost/page.html");
QueryGoal qg = new QueryGoal("test");
// test with raw line (no marking added by YaCy)
TextSnippet ts = new TextSnippet(
url.hash(),
rawtestline,
true, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
String sniptxt = ts.descriptionline(qg); // snippet text for display
System.out.println("testDescriptionline: snippet=" + sniptxt);
assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("<pre>")); // display text not to include unwanted html
// test with marking of query word
ts = new TextSnippet(
url.hash(),
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
sniptxt = ts.descriptionline(qg);
System.out.println("testDescriptionline: snippet=" + sniptxt);
assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("<pre>")); // display text not to include unwanted html
assertTrue ("Query word not marked", sniptxt.contains("<b>test</b>")); // query word to be marked
}
}

Loading…
Cancel
Save