From d7259419f397450b256bfedddea29408efc88f88 Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 28 Feb 2015 19:02:18 +0100 Subject: [PATCH 1/3] postpone raw snippet html encoding upon use instead of during init of snippet adressing http://mantis.tokeek.de/view.php?id=551 --- .../net/yacy/search/snippet/TextSnippet.java | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/source/net/yacy/search/snippet/TextSnippet.java b/source/net/yacy/search/snippet/TextSnippet.java index 163141a3e..fb19b266d 100644 --- a/source/net/yacy/search/snippet/TextSnippet.java +++ b/source/net/yacy/search/snippet/TextSnippet.java @@ -131,7 +131,7 @@ public class TextSnippet implements Comparable, Comparator, Comparator... tags are kept as is. + * Init a snippet line for urlhash * * @param urlhash hash of the url for this snippet * @param line text to use as snippet @@ -361,15 +359,7 @@ public class TextSnippet implements Comparable, Comparator... html tags in place - this.line = CharacterCoding.unicode2html(line, false).replaceAll("<b>(.+?)</b>", "$1"); - } else { // otherwise encode all text for html display - this.line = CharacterCoding.unicode2html(line, false); - } - } else { - this.line = line; - } + this.line = line; this.isMarked = isMarked; this.resultStatus = errorCode; this.error = errortext; @@ -437,9 +427,22 @@ public class TextSnippet implements Comparable, Comparator.. + * + * @param queryGoal + * @return html encoded snippet line + */ public String descriptionline(QueryGoal queryGoal) { if (descriptionline != null) return descriptionline; - descriptionline = this.isMarked() ? this.getLineRaw() : this.getLineMarked(queryGoal); + if (this.isMarked) { + // html encode source, keep .. + descriptionline = CharacterCoding.unicode2html(this.getLineRaw(), false).replaceAll("<b>(.+?)</b>", "$1"); + } else { + descriptionline = this.getLineMarked(queryGoal); + } return descriptionline; } From a4629ad83be986587db24d1b5477219d0be0846a Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 28 Feb 2015 19:48:29 +0100 Subject: [PATCH 2/3] upd pom --- nbproject/project.xml | 2 +- pom.xml | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/nbproject/project.xml b/nbproject/project.xml index 19263c281..5396d7b0e 100644 --- a/nbproject/project.xml +++ b/nbproject/project.xml @@ -83,7 +83,7 @@ source htroot - lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.4.jar;lib/httpcore-4.4.jar;lib/httpmime-4.4.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.9.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.7.v20150116.jar;lib/jetty-continuation-9.2.7.v20150116.jar;lib/jetty-deploy-9.2.7.v20150116.jar;lib/jetty-http-9.2.7.v20150116.jar;lib/jetty-io-9.2.7.v20150116.jar;lib/jetty-jmx-9.2.7.v20150116.jar;lib/jetty-proxy-9.2.7.v20150116.jar;lib/jetty-security-9.2.7.v20150116.jar;lib/jetty-server-9.2.7.v20150116.jar;lib/jetty-servlet-9.2.7.v20150116.jar;lib/jetty-servlets-9.2.7.v20150116.jar;lib/jetty-util-9.2.7.v20150116.jar;lib/jetty-webapp-9.2.7.v20150116.jar;lib/jetty-xml-9.2.7.v20150116.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.9.jar;lib/lucene-analyzers-common-4.10.3.jar;lib/lucene-analyzers-phonetic-4.10.3.jar;lib/lucene-classification-4.10.3.jar;lib/lucene-codecs-4.10.3.jar;lib/lucene-core-4.10.3.jar;lib/lucene-facet-4.10.3.jar;lib/lucene-grouping-4.10.3.jar;lib/lucene-highlighter-4.10.3.jar;lib/lucene-join-4.10.3.jar;lib/lucene-memory-4.10.3.jar;lib/lucene-misc-4.10.3.jar;lib/lucene-queries-4.10.3.jar;lib/lucene-queryparser-4.10.3.jar;lib/lucene-spatial-4.10.3.jar;lib/lucene-suggest-4.10.3.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.9.jar;lib/slf4j-jdk14-1.7.9.jar;lib/solr-core-4.10.3.jar;lib/solr-solrj-4.10.3.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar + lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.4.jar;lib/httpcore-4.4.jar;lib/httpmime-4.4.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.9.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.9.v20150224.jar;lib/jetty-continuation-9.2.9.v20150224.jar;lib/jetty-deploy-9.2.9.v20150224.jar;lib/jetty-http-9.2.9.v20150224.jar;lib/jetty-io-9.2.9.v20150224.jar;lib/jetty-jmx-9.2.9.v20150224.jar;lib/jetty-proxy-9.2.9.v20150224.jar;lib/jetty-security-9.2.9.v20150224.jar;lib/jetty-server-9.2.9.v20150224.jar;lib/jetty-servlet-9.2.9.v20150224.jar;lib/jetty-servlets-9.2.9.v20150224.jar;lib/jetty-util-9.2.9.v20150224.jar;lib/jetty-webapp-9.2.9.v20150224.jar;lib/jetty-xml-9.2.9.v20150224.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.9.jar;lib/lucene-analyzers-common-4.10.3.jar;lib/lucene-analyzers-phonetic-4.10.3.jar;lib/lucene-classification-4.10.3.jar;lib/lucene-codecs-4.10.3.jar;lib/lucene-core-4.10.3.jar;lib/lucene-facet-4.10.3.jar;lib/lucene-grouping-4.10.3.jar;lib/lucene-highlighter-4.10.3.jar;lib/lucene-join-4.10.3.jar;lib/lucene-memory-4.10.3.jar;lib/lucene-misc-4.10.3.jar;lib/lucene-queries-4.10.3.jar;lib/lucene-queryparser-4.10.3.jar;lib/lucene-spatial-4.10.3.jar;lib/lucene-suggest-4.10.3.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.9.jar;lib/slf4j-jdk14-1.7.9.jar;lib/solr-core-4.10.3.jar;lib/solr-solrj-4.10.3.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar lib/yacycore.jar 1.7 diff --git a/pom.xml b/pom.xml index 31f6fbe3a..992d05347 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,9 @@ 1.7 4.10.3 - + + 9.2.9.v20150224 + ${DSTAMP} yacy_v${project.version}_${DSTAMP}_${releaseNr}.tar.gz @@ -549,67 +551,67 @@ org.eclipse.jetty jetty-server - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-servlet - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-servlets - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-webapp - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-util - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-xml - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-http - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-security - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-io - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-continuation - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-jmx - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-proxy - 9.2.7.v20150116 + ${jetty.version} org.eclipse.jetty jetty-deploy - 9.2.7.v20150116 + ${jetty.version} org.bitlet From 16bc267a3238cec0cdbf1346c0598e9bf788edf0 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 1 Mar 2015 23:50:17 +0100 Subject: [PATCH 3/3] add test case for snippet html encoding check --- .../yacy/search/snippet/TextSnippetTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/net/yacy/search/snippet/TextSnippetTest.java b/test/net/yacy/search/snippet/TextSnippetTest.java index bd93f58c3..bd92903c6 100644 --- a/test/net/yacy/search/snippet/TextSnippetTest.java +++ b/test/net/yacy/search/snippet/TextSnippetTest.java @@ -1,6 +1,7 @@ package net.yacy.search.snippet; +import java.net.MalformedURLException; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -118,4 +119,38 @@ public class TextSnippetTest { } } + /** + * Test of descriptionline method, of class TextSnippet. + * checking poper encoding of remaining html in raw snippet line. + */ + @Test + public void testDescriptionline() throws MalformedURLException { + String rawtestline = "Über großer test case
 

"; // test line with html, risk of snippet format issue + + DigestURL url = new DigestURL("http://localhost/page.html"); + QueryGoal qg = new QueryGoal("test"); + + // test with raw line (no marking added by YaCy) + TextSnippet ts = new TextSnippet( + url.hash(), + rawtestline, + true, // isMarked, + TextSnippet.ResultClass.SOURCE_METADATA, ""); + + String sniptxt = ts.descriptionline(qg); // snippet text for display + System.out.println("testDescriptionline: snippet=" + sniptxt); + assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("
")); // display text not to include unwanted html
+
+        // test with marking of query word
+         ts = new TextSnippet(
+            url.hash(),
+            rawtestline,
+            false, // isMarked,
+            TextSnippet.ResultClass.SOURCE_METADATA, "");
+
+        sniptxt = ts.descriptionline(qg);
+        System.out.println("testDescriptionline: snippet=" + sniptxt);
+        assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("
")); // display text not to include unwanted html
+        assertTrue ("Query word not marked", sniptxt.contains("test")); // query word to be marked
+    }
 }