From 0edec2b760e63f53b5c26da645ceec36434b71ae Mon Sep 17 00:00:00 2001
From: orbiter
Date: Wed, 22 Oct 2008 18:59:04 +0000
Subject: [PATCH] FULL redesign of algorithms in htmlTools to encode/decode
strings from/to unicode and html. The old process used a not really efficient
way to detect html encoding strings in texts. All calling methods had been
adoped to call the new class in an enhanced way with less parameters.
Many classes in interfaces used a XML encoding only (instead of full html conversion from unicode to html); this behavior was not changed with this commit but should be controlled again since it points out possible XSS leaks
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5295 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/Blacklist_p.java | 8 +-
htroot/Blog.java | 34 +--
htroot/CrawlProfileEditor_p.java | 2 +-
htroot/MessageSend_p.java | 10 +-
htroot/Messages_p.java | 24 +-
htroot/PerformanceQueues_p.java | 4 +-
htroot/Settings_p.java | 4 +-
htroot/Status.java | 6 +-
htroot/Surftips.java | 10 +-
htroot/Threaddump_p.java | 6 +-
htroot/ViewFile.java | 4 +-
htroot/opensearchdescription.java | 6 +-
htroot/xml/bookmarks/posts/all.java | 12 +-
htroot/xml/bookmarks/tags/get.java | 2 +-
htroot/xml/bookmarks/xbel/xbel.java | 14 +-
htroot/xml/feed.java | 10 +-
htroot/xml/queues_p.java | 10 +-
htroot/xml/util/getpageinfo_p.java | 12 +-
htroot/yacy/user/ysearch.java | 2 +-
htroot/yacy/user/ysearchitem.java | 6 +-
htroot/yacysearch.java | 2 +-
htroot/yacysearchitem.java | 6 +-
source/de/anomic/data/diff.java | 4 +-
source/de/anomic/data/wikiCode.java | 3 +-
.../htmlFilterCharacterCoding.java} | 231 +++++++++---------
.../htmlFilter/htmlFilterContentScraper.java | 5 +-
source/de/anomic/http/httpd.java | 4 +-
.../index/indexRepositoryReference.java | 10 +-
.../de/anomic/plasma/plasmaSearchQuery.java | 4 +-
source/de/anomic/server/serverObjects.java | 10 +-
30 files changed, 232 insertions(+), 233 deletions(-)
rename source/de/anomic/{data/htmlTools.java => htmlFilter/htmlFilterCharacterCoding.java} (54%)
diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java
index 252249b06..540a2d1d9 100644
--- a/htroot/Blacklist_p.java
+++ b/htroot/Blacklist_p.java
@@ -319,7 +319,7 @@ prop.putHTML("asd", "0");
while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername);
prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_hash", Hash);
- prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_name", peername, true);
+ prop.putXML(DISABLED + "otherHosts_" + peerCount + "_name", peername);
hostList.remove(peername);
peerCount++;
}
@@ -332,14 +332,14 @@ prop.putHTML("asd", "0");
int blacklistCount = 0;
if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) {
- prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i], true);
+ prop.putXML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "0");
if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
- prop.putHTML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes], true);
+ prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
listManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i]) ? "0" : "1");
}
@@ -366,7 +366,7 @@ prop.putHTML("asd", "0");
}
prop.put(DISABLED + "blackLists", blacklistCount);
- prop.putHTML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse, true);
+ prop.putXML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse);
prop.put("disabled", (blacklistToUse == null) ? "1" : "0");
return prop;
}
diff --git a/htroot/Blog.java b/htroot/Blog.java
index 21fe0acaf..867a5e55d 100644
--- a/htroot/Blog.java
+++ b/htroot/Blog.java
@@ -180,9 +180,9 @@ public class Blog {
try {
prop.put("mode", "1"); //edit
prop.put("mode_commentMode", page.getCommentMode());
- prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"), xml);
+ prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"));
prop.put("mode_pageid", page.getKey());
- prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"), xml);
+ prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"));
prop.put("mode_page-code", new String(page.getPage(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {}
}
@@ -195,16 +195,16 @@ public class Blog {
if(hasRights) {
prop.put("mode", "2");//preview
prop.put("mode_commentMode", post.getInt("commentMode", 1));
- prop.putHTML("mode_pageid", pagename, xml);
+ prop.putHTML("mode_pageid", pagename);
try {
- prop.putHTML("mode_author", new String(author, "UTF-8"), xml);
+ prop.putHTML("mode_author", new String(author, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
- prop.putHTML("mode_author", new String(author), xml);
+ prop.putHTML("mode_author", new String(author));
}
- prop.putHTML("mode_subject", post.get("subject",""), xml);
+ prop.putHTML("mode_subject", post.get("subject",""));
prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", ""));
- prop.putHTML("mode_page-code", post.get("content", ""), xml);
+ prop.putHTML("mode_page-code", post.get("content", ""));
}
else {
prop.put("mode", "3"); //access denied (no rights)
@@ -213,16 +213,16 @@ public class Blog {
else if(post.get("delete", "").equals("try")) {
if(hasRights) {
prop.put("mode", "4");
- prop.putHTML("mode_pageid", pagename, xml);
+ prop.putHTML("mode_pageid", pagename);
try {
- prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"), xml);
+ prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {
- prop.putHTML("mode_author",new String(page.getAuthor()), xml);
+ prop.putHTML("mode_author",new String(page.getAuthor()));
}
try {
- prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"), xml);
+ prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
- prop.putHTML("mode_subject",new String(page.getSubject()), xml);
+ prop.putHTML("mode_subject",new String(page.getSubject()));
}
}
else prop.put("mode", "3"); //access denied (no rights)
@@ -246,7 +246,7 @@ public class Blog {
if(pagename.equals(DEFAULT_PAGE)) {
// XXX: where are "peername" and "address" used in the template?
// XXX: "clientname" is already set to the peername, no need for a new setting
- prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName(), xml);
+ prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName());
prop.put("address", address);
//index all entries
putBlogDefault(prop, sb, address, start, num, hasRights, xml);
@@ -321,16 +321,16 @@ public class Blog {
{
// subject
try {
- prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"), xml);
+ prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
- prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()), xml);
+ prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()));
}
// author
try {
- prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"), xml);
+ prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
- prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()), xml);
+ prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()));
}
// comments
diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java
index 6b321d91c..be6120a0a 100644
--- a/htroot/CrawlProfileEditor_p.java
+++ b/htroot/CrawlProfileEditor_p.java
@@ -212,7 +212,7 @@ public class CrawlProfileEditor_p {
prop.put("crawlProfiles_" + count + "_dark", dark ? "1" : "0");
prop.put("crawlProfiles_" + count + "_status", active ? "1" : "0");
prop.put("crawlProfiles_" + count + "_name", profile.name());
- prop.putHTML("crawlProfiles_" + count + "_startURL", profile.startURL(), true);
+ prop.putXML("crawlProfiles_" + count + "_startURL", profile.startURL());
prop.put("crawlProfiles_" + count + "_handle", profile.handle());
prop.put("crawlProfiles_" + count + "_depth", profile.generalDepth());
prop.put("crawlProfiles_" + count + "_filter", profile.generalFilter());
diff --git a/htroot/MessageSend_p.java b/htroot/MessageSend_p.java
index cdf498b91..863bb6e81 100644
--- a/htroot/MessageSend_p.java
+++ b/htroot/MessageSend_p.java
@@ -81,7 +81,7 @@ public class MessageSend_p {
peerName = targetPeer.get(yacySeed.NAME,"nameless");
}
- prop.putHTML("mode_permission_peerName", peerName, true);
+ prop.putXML("mode_permission_peerName", peerName);
final String response = (result == null) ? null : (String) result.get("response");
if (response == null || result == null) {
// we don't have permission or other peer does not exist
@@ -98,11 +98,11 @@ public class MessageSend_p {
final int messagesize = Integer.parseInt(result.get("messagesize"));
final int attachmentsize = Integer.parseInt(result.get("attachmentsize"));
- prop.putHTML("mode_permission_response", response, true);
+ prop.putXML("mode_permission_response", response);
prop.put("mode_permission_messagesize", messagesize);
prop.put("mode_permission_attachmentsize", attachmentsize);
- prop.putHTML("mode_permission_subject", subject, true);
- prop.putHTML("mode_permission_message", message, true);
+ prop.putXML("mode_permission_subject", subject);
+ prop.putXML("mode_permission_message", message);
prop.putHTML("mode_permission_hash", hash);
if (post.containsKey("preview")) {
prop.putWiki("mode_permission_previewmessage", message);
@@ -140,7 +140,7 @@ public class MessageSend_p {
prop.put("mode_status", "1");
// "unresolved pattern", the remote peer is alive but had an exception
- prop.putHTML("mode_status_message", message, true);
+ prop.putXML("mode_status_message", message);
}
}
return prop;
diff --git a/htroot/Messages_p.java b/htroot/Messages_p.java
index 0fe80b5f7..e4462e0b2 100644
--- a/htroot/Messages_p.java
+++ b/htroot/Messages_p.java
@@ -58,7 +58,7 @@ public class Messages_p {
final String peerAddress = sb.webIndex.seedDB.mySeed().getPublicAddress();
final String peerName = sb.webIndex.seedDB.mySeed().getName();
prop.put("peerAddress", peerAddress);
- prop.putHTML("peerName", peerName, true);
+ prop.putXML("peerName", peerName);
// List known hosts for message sending (from Blacklist_p.java)
if (sb.webIndex.seedDB != null && sb.webIndex.seedDB.sizeConnected() > 0) {
@@ -76,7 +76,7 @@ public class Messages_p {
while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername);
prop.put(PEERSKNOWN + "peers_" + peerCount + "_hash", Hash);
- prop.putHTML(PEERSKNOWN + "peers_" + peerCount + "_name", peername, true);
+ prop.putXML(PEERSKNOWN + "peers_" + peerCount + "_name", peername);
hostList.remove(peername);
peerCount++;
}
@@ -119,11 +119,11 @@ public class Messages_p {
message = sb.messageDB.read(key);
prop.put("mode_messages_"+count+"_dark", ((dark) ? "1" : "0") );
prop.put("mode_messages_"+count+"_date", dateString(message.date()));
- prop.putHTML("mode_messages_"+count+"_from", message.author(), true);
- prop.putHTML("mode_messages_"+count+"_to", message.recipient(), true);
- prop.putHTML("mode_messages_"+count+"_subject", message.subject(), true);
- prop.putHTML("mode_messages_"+count+"_category", message.category(), true);
- prop.putHTML("mode_messages_"+count+"_key", key, true);
+ prop.putXML("mode_messages_"+count+"_from", message.author());
+ prop.putXML("mode_messages_"+count+"_to", message.recipient());
+ prop.putXML("mode_messages_"+count+"_subject", message.subject());
+ prop.putXML("mode_messages_"+count+"_category", message.category());
+ prop.putXML("mode_messages_"+count+"_key", key);
prop.put("mode_messages_"+count+"_hash", message.authorHash());
if ((header.get(httpRequestHeader.CONNECTION_PROP_PATH)).endsWith(".rss")) {
@@ -135,7 +135,7 @@ public class Messages_p {
// also write out the message body (needed for the RSS feed)
try {
- prop.putHTML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"), true);
+ prop.putXML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {
// can not happen, because UTF-8 must be supported by every JVM
}
@@ -157,10 +157,10 @@ public class Messages_p {
message = sb.messageDB.read(key);
if (message == null) throw new NullPointerException("Message with ID " + key + " does not exist");
- prop.putHTML("mode_from", message.author(), true);
- prop.putHTML("mode_to", message.recipient(), true);
+ prop.putXML("mode_from", message.author());
+ prop.putXML("mode_to", message.recipient());
prop.put("mode_date", dateString(message.date()));
- prop.putHTML("mode_subject", message.subject(), true);
+ prop.putXML("mode_subject", message.subject());
String theMessage = null;
try {
theMessage = new String(message.message(), "UTF-8");
@@ -169,7 +169,7 @@ public class Messages_p {
}
prop.putWiki("mode_message", theMessage);
prop.put("mode_hash", message.authorHash());
- prop.putHTML("mode_key", key, true);
+ prop.putXML("mode_key", key);
}
// return rewrite properties
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index 08bcd607e..76c2aa089 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -111,14 +111,14 @@ public class PerformanceQueues_p {
// set values to templates
prop.put("table_" + c + "_threadname", threadName);
- prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription(), xml);
+ prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription());
if(thread.getMonitorURL() == null) {
prop.put("table_"+c+"_hasurl", "0");
}else{
prop.put("table_"+c+"_hasurl", "1");
prop.put("table_" + c + "_hasurl_url", thread.getMonitorURL());
}
- prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription(), xml);
+ prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription());
queuesize = thread.getJobCount();
prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : yFormatter.number(queuesize, !xml));
diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java
index 753c7395f..93f10b21d 100644
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@@ -162,7 +162,7 @@ public final class Settings_p {
}
// clientIP
- prop.putHTML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, ""), true); // read an artificial header addendum
+ prop.putXML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, "")); // read an artificial header addendum
/*
* seed upload settings
@@ -239,7 +239,7 @@ public final class Settings_p {
while (availableParserIter.hasNext()) {
final ParserInfo parserInfo = availableParserIter.next();
prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
- prop.putHTML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr, true);
+ prop.putXML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount);
prop.put("parser_" + parserIdx + "_colspan", configArray.length);
diff --git a/htroot/Status.java b/htroot/Status.java
index 23c555470..d148e3c87 100644
--- a/htroot/Status.java
+++ b/htroot/Status.java
@@ -170,8 +170,8 @@ public class Status {
if (sb.getConfig("remoteProxyUse", "false").equals("true")) {
prop.put("remoteProxy", "1");
- prop.putHTML("remoteProxy_host", sb.getConfig("remoteProxyHost", ""), true);
- prop.putHTML("remoteProxy_port", sb.getConfig("remoteProxyPort", ""), true);
+ prop.putXML("remoteProxy_host", sb.getConfig("remoteProxyHost", ""));
+ prop.putXML("remoteProxy_port", sb.getConfig("remoteProxyPort", ""));
prop.put("remoteProxy_4Yacy", sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true") ? "0" : "1");
} else {
prop.put("remoteProxy", "0"); // not used
@@ -201,7 +201,7 @@ public class Status {
} else {
prop.put("peerAddress", "1"); // Address
prop.put("peerAddress_address", sb.webIndex.seedDB.mySeed().getPublicAddress());
- prop.putHTML("peerAddress_peername", sb.getConfig("peerName", "").toLowerCase(), true);
+ prop.putXML("peerAddress_peername", sb.getConfig("peerName", "").toLowerCase());
}
}
final String peerStatus = ((sb.webIndex.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_VIRGIN : sb.webIndex.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN));
diff --git a/htroot/Surftips.java b/htroot/Surftips.java
index 6b1fb1dd3..133a38c33 100644
--- a/htroot/Surftips.java
+++ b/htroot/Surftips.java
@@ -153,11 +153,11 @@ public class Surftips {
prop.put("surftips_results_" + i + "_authorized_recommend_display", display);
prop.put("surftips_results_" + i + "_authorized_recommend_showScore", (showScore ? "1" : "0"));
- prop.putHTML("surftips_results_" + i + "_authorized_urlhash", urlhash, true);
- prop.putHTML("surftips_results_" + i + "_url", url, true);
- prop.putHTML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60), true);
- prop.putHTML("surftips_results_" + i + "_urlhash", urlhash, true);
- prop.putHTML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title, true);
+ prop.putXML("surftips_results_" + i + "_authorized_urlhash", urlhash);
+ prop.putXML("surftips_results_" + i + "_url", url);
+ prop.putXML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60));
+ prop.putXML("surftips_results_" + i + "_urlhash", urlhash);
+ prop.putXML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title);
prop.putHTML("surftips_results_" + i + "_description", description);
i++;
diff --git a/htroot/Threaddump_p.java b/htroot/Threaddump_p.java
index 1e513d9e7..7b3f422ad 100644
--- a/htroot/Threaddump_p.java
+++ b/htroot/Threaddump_p.java
@@ -35,7 +35,7 @@ import java.util.Date;
import java.util.Map;
import java.util.Map.Entry;
-import de.anomic.data.htmlTools;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
@@ -122,9 +122,9 @@ public class Threaddump_p {
line = null;
}
if ((line != null) && (line.length() > 0)) {
- bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true) + " [" + line.trim() + "]");
+ bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true) + " [" + line.trim() + "]");
} else {
- bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true));
+ bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true));
}
}
bufferappend(buffer, plain, "");
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index d2b281100..792052df7 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -33,8 +33,8 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
-import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterImageEntry;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
@@ -352,7 +352,7 @@ public class ViewFile {
}
private static final String markup(final String[] wordArray, String message) {
- message = htmlTools.encodeUnicode2html(message, true);
+ message = htmlFilterCharacterCoding.unicode2html(message, true);
if (wordArray != null)
for (int j = 0; j < wordArray.length; j++) {
final String currentWord = wordArray[j].trim();
diff --git a/htroot/opensearchdescription.java b/htroot/opensearchdescription.java
index 7375db667..11a9a775e 100644
--- a/htroot/opensearchdescription.java
+++ b/htroot/opensearchdescription.java
@@ -43,9 +43,9 @@ public class opensearchdescription {
if (thisaddress.indexOf(":") == -1) thisaddress += ":" + serverCore.getPortNr(env.getConfig("port", "8080"));
final serverObjects prop = new serverObjects();
- prop.putHTML("thisaddress", thisaddress, true);
- prop.putHTML("SearchPageGreeting", promoteSearchPageGreeting, true);
- prop.putHTML("clientname", sb.webIndex.seedDB.mySeed().getName(), true);
+ prop.putXML("thisaddress", thisaddress);
+ prop.putXML("SearchPageGreeting", promoteSearchPageGreeting);
+ prop.putXML("clientname", sb.webIndex.seedDB.mySeed().getName());
// return rewrite properties
return prop;
diff --git a/htroot/xml/bookmarks/posts/all.java b/htroot/xml/bookmarks/posts/all.java
index d39133d48..3ca5ac4c3 100644
--- a/htroot/xml/bookmarks/posts/all.java
+++ b/htroot/xml/bookmarks/posts/all.java
@@ -59,13 +59,13 @@ public class all {
Date date;
while(it.hasNext()){
bookmark=switchboard.bookmarksDB.getBookmark(it.next());
- prop.putHTML("posts_"+count+"_url", bookmark.getUrl(), true);
- prop.putHTML("posts_"+count+"_title", bookmark.getTitle(), true);
- prop.putHTML("posts_"+count+"_description", bookmark.getDescription(), true);
- prop.putHTML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()), true);
+ prop.putXML("posts_"+count+"_url", bookmark.getUrl());
+ prop.putXML("posts_"+count+"_title", bookmark.getTitle());
+ prop.putXML("posts_"+count+"_description", bookmark.getDescription());
+ prop.putXML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()));
date=new Date(bookmark.getTimeStamp());
- prop.putHTML("posts_"+count+"_time", serverDate.formatISO8601(date), true);
- prop.putHTML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "), true);
+ prop.putXML("posts_"+count+"_time", serverDate.formatISO8601(date));
+ prop.putXML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "));
// additional XML tags
prop.put("posts_"+count+"_isExtended",extendedXML ? "1" : "0");
diff --git a/htroot/xml/bookmarks/tags/get.java b/htroot/xml/bookmarks/tags/get.java
index 7f7360ab9..fd36d6b2f 100644
--- a/htroot/xml/bookmarks/tags/get.java
+++ b/htroot/xml/bookmarks/tags/get.java
@@ -88,7 +88,7 @@ public class get {
while (it.hasNext()) {
tag = it.next();
if(!tag.getTagName().startsWith("/")) { // ignore folder tags
- prop.putHTML("tags_"+count+"_name", tag.getTagName(), true);
+ prop.putXML("tags_"+count+"_name", tag.getTagName());
prop.put("tags_"+count+"_count", tag.size());
count++;
}
diff --git a/htroot/xml/bookmarks/xbel/xbel.java b/htroot/xml/bookmarks/xbel/xbel.java
index c3e2626db..3c1f9058b 100644
--- a/htroot/xml/bookmarks/xbel/xbel.java
+++ b/htroot/xml/bookmarks/xbel/xbel.java
@@ -4,7 +4,7 @@ import java.util.Date;
import java.util.Iterator;
import de.anomic.data.bookmarksDB;
-import de.anomic.data.htmlTools;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate;
@@ -83,7 +83,7 @@ public class xbel {
count++;
final String title = fn; // just to make sure fn stays untouched
- prop.put("xbel_"+count+"_elements", "" + htmlTools.encodeUnicode2xml(title.replaceAll("(/.[^/]*)*/", "")) + "");
+ prop.put("xbel_"+count+"_elements", "" + htmlFilterCharacterCoding.unicode2xml(title.replaceAll("(/.[^/]*)*/", ""), true) + "");
count++;
final Iterator bit=switchboard.bookmarksDB.getBookmarksIterator(fn, isAdmin);
count = print_XBEL(bit, count);
@@ -106,19 +106,19 @@ public class xbel {
bookmark=switchboard.bookmarksDB.getBookmark(bit.next());
date=new Date(bookmark.getTimeStamp());
prop.put("xbel_"+count+"_elements", "");
+ + "\" href=\"" + htmlFilterCharacterCoding.unicode2xml(bookmark.getUrl(), true)
+ + "\" added=\"" + htmlFilterCharacterCoding.unicode2xml(serverDate.formatISO8601(date), true)+"\">");
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
- prop.putHTML("xbel_"+count+"_elements", bookmark.getTitle(), true);
+ prop.putXML("xbel_"+count+"_elements", bookmark.getTitle());
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
prop.put("xbel_"+count+"_elements", "");
@@ -127,7 +127,7 @@ public class xbel {
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
- prop.putHTML("xbel_"+count+"_elements", bookmark.getDescription(), true);
+ prop.putXML("xbel_"+count+"_elements", bookmark.getDescription());
count++;
prop.put("xbel_"+count+"_elements", "");
count++;
diff --git a/htroot/xml/feed.java b/htroot/xml/feed.java
index 01194a66a..48490c548 100755
--- a/htroot/xml/feed.java
+++ b/htroot/xml/feed.java
@@ -66,8 +66,8 @@ public class feed {
RSSMessage message = feed.getChannel();
if (message != null) {
- prop.putHTML("channel_title", message.getTitle(), true);
- prop.putHTML("channel_description", message.getDescription(), true);
+ prop.putXML("channel_title", message.getTitle());
+ prop.putXML("channel_description", message.getDescription());
prop.put("channel_pubDate", message.getPubDate());
}
while ((messageMaxCount > 0) && (feed.size() > 0)) {
@@ -75,9 +75,9 @@ public class feed {
if (message == null) continue;
// create RSS entry
- prop.putHTML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle(), true);
- prop.putHTML("item_" + messageCount + "_description", message.getDescription(), true);
- prop.putHTML("item_" + messageCount + "_link", message.getLink(), true);
+ prop.putXML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle());
+ prop.putXML("item_" + messageCount + "_description", message.getDescription());
+ prop.putXML("item_" + messageCount + "_link", message.getLink());
prop.put("item_" + messageCount + "_pubDate", message.getPubDate());
prop.put("item_" + messageCount + "_guid", message.getGuid());
messageCount++;
diff --git a/htroot/xml/queues_p.java b/htroot/xml/queues_p.java
index ce8ca9103..0a24f3f9a 100644
--- a/htroot/xml/queues_p.java
+++ b/htroot/xml/queues_p.java
@@ -110,8 +110,8 @@ public class queues_p {
prop.putHTML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-indexing_"+i+"_depth", pcentry.depth());
prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate());
- prop.putHTML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName(), true);
- prop.putHTML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true), true);
+ prop.putXML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName());
+ prop.putXML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true));
prop.putNum("list-indexing_"+i+"_size", entrySize);
prop.put("list-indexing_"+i+"_inProcess", (inProcess) ? "1" : "0");
prop.put("list-indexing_"+i+"_hash", pcentry.urlHash());
@@ -135,7 +135,7 @@ public class queues_p {
initiator = sb.webIndex.seedDB.getConnected(w[i].initiator());
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-loader_"+count+"_depth", w[i].depth());
- prop.putHTML("list-loader_"+count+"_url", w[i].url().toString(), true);
+ prop.putXML("list-loader_"+count+"_url", w[i].url().toString());
count++;
}
prop.put("list-loader", count);
@@ -181,8 +181,8 @@ public class queues_p {
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put(tableName + "_" + showNum + "_depth", urle.depth());
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
- prop.putHTML(tableName + "_" + showNum + "_anchor", urle.name(), true);
- prop.putHTML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true), true);
+ prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
+ prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
showNum++;
}
diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java
index 70118e045..8559a1f57 100644
--- a/htroot/xml/util/getpageinfo_p.java
+++ b/htroot/xml/util/getpageinfo_p.java
@@ -65,7 +65,7 @@ public class getpageinfo_p {
String url=post.get("url");
if(url.toLowerCase().startsWith("ftp://")){
prop.put("robots-allowed", "1");
- prop.putHTML("title", "FTP: "+url, true);
+ prop.putXML("title", "FTP: "+url);
return prop;
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
url = "http://" + url;
@@ -86,7 +86,7 @@ public class getpageinfo_p {
writer.close();
// put the document title
- prop.putHTML("title", scraper.getTitle(), true);
+ prop.putXML("title", scraper.getTitle());
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@@ -97,16 +97,16 @@ public class getpageinfo_p {
for(int i=0;i languages = scraper.getContentLanguages();
- prop.putHTML("lang", (languages == null) ? "unknown" : languages.iterator().next(), true);
+ prop.putXML("lang", (languages == null) ? "unknown" : languages.iterator().next());
} catch (final MalformedURLException e) { /* ignore this */
} catch (final IOException e) { /* ignore this */
@@ -121,7 +121,7 @@ public class getpageinfo_p {
// get the sitemap URL of the domain
final yacyURL sitemapURL = sb.robots.getSitemapURL(theURL);
- prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true);
+ prop.putXML("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
} catch (final MalformedURLException e) {}
}
diff --git a/htroot/yacy/user/ysearch.java b/htroot/yacy/user/ysearch.java
index d9e817b05..8f8f9f7cb 100644
--- a/htroot/yacy/user/ysearch.java
+++ b/htroot/yacy/user/ysearch.java
@@ -339,7 +339,7 @@ public class ysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements
- prop.putHTML("rss_query", querystring, true);
+ prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis();
diff --git a/htroot/yacy/user/ysearchitem.java b/htroot/yacy/user/ysearchitem.java
index 27ae6952e..dd68ab87d 100644
--- a/htroot/yacy/user/ysearchitem.java
+++ b/htroot/yacy/user/ysearchitem.java
@@ -96,9 +96,9 @@ public class ysearchitem {
if (rss) {
// text search for rss output
prop.put("rss", "1"); // switch on specific content
- prop.putHTML("rss_title", result.title(), true);
- prop.putHTML("rss_description", result.textSnippet().getLineRaw(), true);
- prop.putHTML("rss_link", result.urlstring(), true);
+ prop.putXML("rss_title", result.title());
+ prop.putXML("rss_description", result.textSnippet().getLineRaw());
+ prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop;
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 25b5b98b8..a4bbe8ec4 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -439,7 +439,7 @@ public class yacysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements
- prop.putHTML("rss_query", querystring, true);
+ prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis();
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index f1719f7f3..00d516489 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -182,10 +182,10 @@ public class yacysearchitem {
if (rss) {
// text search for rss output
prop.put("rss", "1"); // switch on specific content
- prop.putHTML("rss_title", result.title(), true);
+ prop.putXML("rss_title", result.title());
final plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
- prop.putHTML("rss_description", (snippet == null) ? "" : snippet.getLineRaw(), true);
- prop.putHTML("rss_link", result.urlstring(), true);
+ prop.putXML("rss_description", (snippet == null) ? "" : snippet.getLineRaw());
+ prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop;
diff --git a/source/de/anomic/data/diff.java b/source/de/anomic/data/diff.java
index 3f98805e5..1c3c327b0 100644
--- a/source/de/anomic/data/diff.java
+++ b/source/de/anomic/data/diff.java
@@ -30,6 +30,8 @@ package de.anomic.data;
import java.util.ArrayList;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
+
/**
* This class provides a diff-functionality.
*/
@@ -253,7 +255,7 @@ public class diff {
case diff.Part.ADDED: sb.append("added"); break;
case diff.Part.DELETED: sb.append("deleted"); break;
}
- sb.append("\">").append(htmlTools.encodeUnicode2html(ps[j].getString(), true).replaceAll("\n", " "));
+ sb.append("\">").append(htmlFilterCharacterCoding.unicode2html(ps[j].getString(), true).replaceAll("\n", " "));
sb.append("");
}
sb.append("
");
diff --git a/source/de/anomic/data/wikiCode.java b/source/de/anomic/data/wikiCode.java
index a96e237f3..044b413c0 100644
--- a/source/de/anomic/data/wikiCode.java
+++ b/source/de/anomic/data/wikiCode.java
@@ -35,6 +35,7 @@ import java.util.HashMap;
import de.anomic.data.wiki.abstractWikiParser;
import de.anomic.data.wiki.wikiParser;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
@@ -757,7 +758,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
public String transformLine(String result, final String publicAddress, final plasmaSwitchboard switchboard) {
//If HTML has not bee replaced yet (can happen if method gets called in recursion), replace now!
if (!replacedHTML || preformattedSpan){
- result = htmlTools.encodeUnicode2html(result, true);
+ result = htmlFilterCharacterCoding.unicode2html(result, true);
replacedHTML = true;
}
diff --git a/source/de/anomic/data/htmlTools.java b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
similarity index 54%
rename from source/de/anomic/data/htmlTools.java
rename to source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
index f910ab0af..d3c7b36d3 100644
--- a/source/de/anomic/data/htmlTools.java
+++ b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
@@ -1,13 +1,8 @@
-// htmlTools.java
-// -----------------------
-// (C) by Michael Peter Christen; mc@yacy.net,
-// (C) by Jan Sandbrink (NN), Franz Brausse (FB, karlchenofhell),
-// (C) by Bjoern 'fuchs' Krombholz (fuchsi)
-// first published on http://www.yacy.net
-//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// htmlFilterCharacterCoding.java
+// ----------------------------------
+// (C) 22.10.2008 by Michael Peter Christen; mc@yacy.net
+// first published on http://yacy.net
+// Frankfurt, Germany, 2008
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -23,114 +18,22 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-package de.anomic.data;
+package de.anomic.htmlFilter;
-public class htmlTools {
+import java.util.HashMap;
- /** Replaces characters in a string with other entities according to HTML standards.
- * @param text a string that possibly contains special characters
- * @param includingAmpersand if false ampersands are not encoded
- * @param forXML if true then only &, ", < and > will
- * be transcoded.
- * @return the string with all characters replaced by the corresponding character from array
- */
- public static String encodeUnicode2html(final String text, final boolean includingAmpersand, final boolean forXML) {
- if (text == null)
- return null;
-
- final int spos = (includingAmpersand ? 0 : 2);
- // if (forXML), then only encode ampersand, quotation mark, less than and
- // greather than which are the first 4 pairs in default mapping table
- final int epos = (forXML ? 8 : mapping.length);
+public class htmlFilterCharacterCoding {
- return encode(text, mapping, spos, epos);
- }
+ private static final char amp_unicode = "\u0026".charAt(0);
+ private static final String amp_html = "&";
- /**
- * Like {@link #encodeUnicode2html(String, boolean, boolean)} with forXML = false
- */
- public static String encodeUnicode2html(final String text, final boolean includingAmpersand) {
- return encodeUnicode2html(text, includingAmpersand, false);
- }
-
-
- /**
- * Replaces special entities ampersand, quotation marks, and less than/graiter than
- * by the escaping entities allowed in XML documents.
- *
- * Like {@link #encodeUnicode2html(String, boolean, boolean)} with
- * includingAmpersand = true and foxXML = true.
- *
- * @param text the original String
- * @return the encoded String
- */
- public static String encodeUnicode2xml(final String text) {
- return encodeUnicode2html(text, true, true);
- }
-
- /**
- * Generic method that replaces occurences of special character entities defined in map
- * array with their corresponding mapping.
- * @param text The String too process.
- * @param map An array defining the entity mapping.
- * @param spos It is possible to use a subset of the map only. This parameter defines the
- * starting point in the map array.
- * @param epos The ending point, see above.
- * @return A copy of the original String with all entities defined in map replaced.
- */
- public static String encode(final String text, final String[] map, final int spos, final int epos) {
- final StringBuffer sb = new StringBuffer(text.length());
- int textpos = 0;
- search: while (textpos < text.length()) {
- // find a (forward) mapping
- loop: for (int i = spos; i < epos; i += 2) {
- if (text.charAt(textpos) != map[i].charAt(0)) continue loop;
- // found match
- sb.append(map[i + 1]);
- textpos++;
- continue search;
- }
- // not found match
- sb.append(text.charAt(textpos));
- textpos++;
- }
-
- return sb.toString();
- }
+ private static final String[] mapping4xml = {
+ "\"",""", //quotation mark
+ "\u003C","<", //less than
+ "\u003E",">", //greater than
+ };
- public static String decodeHtml2Unicode(final String text) {
- if (text == null) return null;
- int pos = 0;
- final StringBuffer sb = new StringBuffer(text.length());
- search: while (pos < text.length()) {
- // find a reverse mapping. TODO: replace matching with hashtable(s)
- loop: for (int i = 0; i < mapping.length; i += 2) {
- if (pos + mapping[i + 1].length() > text.length()) continue loop;
- for (int j = mapping[i + 1].length() - 1; j >= 0; j--) {
- if (text.charAt(pos + j) != mapping[i + 1].charAt(j)) continue loop;
- }
- // found match
- sb.append(mapping[i]);
- pos = pos + mapping[i + 1].length();
- continue search;
- }
- // not found match
- sb.append(text.charAt(pos));
- pos++;
- }
- return new String(sb);
- }
-
- //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details)
- //that will be replaced. To add new codes or patterns, just put them at the end
- //of the list. Codes or patterns in this list can not be escaped with [= or
- private static final String[] mapping = {
- // Ampersands _have_ to be replaced first. If they were replaced later,
- // other replaced characters containing ampersands would get messed up.
- "\u0026","&", //ampersand
- "\"",""", //quotation mark
- "\u003C","<", //less than
- "\u003E",">", //greater than
+ private static final String[] mapping4html = {
"\\", "\", // Backslash
"\u005E","^", // Caret
@@ -267,15 +170,109 @@ public class htmlTools {
"\u00FF","ÿ"
};
+ private final static HashMap html2unicode4xml = new HashMap();
+ private final static HashMap html2unicode4html = new HashMap();
+ private final static HashMap unicode2html4xml = new HashMap();
+ private final static HashMap unicode2html4html = new HashMap();
+ static {
+ Character c;
+ for (int i = 0; i < mapping4html.length; i += 2) {
+ c = new Character(mapping4html[i].charAt(0));
+ html2unicode4html.put(mapping4html[i + 1], c);
+ unicode2html4html.put(c, mapping4html[i + 1]);
+ }
+ for (int i = 0; i < mapping4xml.length; i += 2) {
+ c = new Character(mapping4xml[i].charAt(0));
+ html2unicode4xml.put(mapping4xml[i + 1], c);
+ unicode2html4xml.put(c, mapping4xml[i + 1]);
+ }
+ }
+
+ public static String unicode2xml(final String text, boolean amp) {
+ return unicode2html(text, amp, false);
+ }
+
+ public static String unicode2html(final String text, boolean amp) {
+ return unicode2html(text, amp, true);
+ }
+
+ private static String unicode2html(final String text, boolean amp, boolean html) {
+ if (text == null) return null;
+ final StringBuffer sb = new StringBuffer(text.length() * 12 / 10);
+ int textpos = 0;
+ String r;
+ char c;
+ while (textpos < text.length()) {
+ // find a (forward) mapping
+ c = text.charAt(textpos);
+ if (amp && c == amp_unicode) {
+ sb.append(amp_html);
+ textpos++;
+ continue;
+ }
+ if ((r = unicode2html4xml.get(c)) != null) {
+ sb.append(r);
+ textpos++;
+ continue;
+ }
+ if (html && (r = unicode2html4html.get(c)) != null) {
+ sb.append(r);
+ textpos++;
+ continue;
+ }
+ sb.append(c);
+ textpos++;
+ }
+ return sb.toString();
+ }
+
+ public static String html2unicode(final String text) {
+ if (text == null) return null;
+ int p = 0, p1, q;
+ final StringBuffer sb = new StringBuffer(text.length());
+ String s;
+ Character r;
+ while (p < text.length()) {
+ p1 = text.indexOf('&', p);
+ if (p1 < 0) p1 = text.length();
+ sb.append(text.subSequence(p, p1));
+ p = p1;
+ if (p >= text.length()) break;
+ q = text.indexOf(';', p);
+ if (q < 0) {
+ p++;
+ continue;
+ }
+ s = text.substring(p, q + 1);
+ if (s.equals(amp_html)) {
+ sb.append(amp_unicode);
+ p = q + 1;
+ continue;
+ }
+ if ((r = html2unicode4xml.get(s)) != null) {
+ sb.append(r.charValue());
+ p = q + 1;
+ continue;
+ }
+ if ((r = html2unicode4html.get(s)) != null) {
+ sb.append(r);
+ p = q + 1;
+ continue;
+ }
+ // the entity is unknown, skip it
+ }
+ return new String(sb);
+ }
+
public static void main(final String[] args) {
final String text = "Test-Text mit & um zyklische ü & Ersetzungen auszuschliessen";
- final String txet = encodeUnicode2html(text, true);
+ final String txet = unicode2html(text, true);
System.out.println(txet);
- System.out.println(decodeHtml2Unicode(txet));
- if (decodeHtml2Unicode(txet).equals(text)) System.out.println("correct");
+ System.out.println(html2unicode(txet));
+ if (html2unicode(txet).equals(text)) System.out.println("correct");
final String text2 = "encodeUnicode2xml: & \" < >";
System.out.println(text2);
- System.out.println(encodeUnicode2xml(text2));
+ System.out.println(unicode2xml(text2, true));
}
}
diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index d5f7bd89e..e47e7124b 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -44,7 +44,6 @@ import java.util.Properties;
import javax.swing.event.EventListenerList;
import de.anomic.crawler.HTTPLoader;
-import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.server.serverCharBuffer;
@@ -166,11 +165,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", "");
if (name.length() > 0) {
- metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
+ metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
} else {
name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) {
- metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
+ metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
}
}
}
diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java
index a94bbc9ee..1c94b236f 100644
--- a/source/de/anomic/http/httpd.java
+++ b/source/de/anomic/http/httpd.java
@@ -59,8 +59,8 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.httpclient.ChunkedInputStream;
import org.apache.commons.httpclient.ContentLengthInputStream;
-import de.anomic.data.htmlTools;
import de.anomic.data.userDB;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
@@ -850,7 +850,7 @@ public final class httpd implements serverHandler, Cloneable {
// 06.01.2007: decode HTML entities by [FB]
public static String decodeHtmlEntities(String s) {
// replace all entities defined in wikiCode.characters and htmlentities
- s = htmlTools.decodeHtml2Unicode(s);
+ s = htmlFilterCharacterCoding.html2unicode(s);
// replace all other
final CharArrayWriter b = new CharArrayWriter(s.length());
diff --git a/source/de/anomic/index/indexRepositoryReference.java b/source/de/anomic/index/indexRepositoryReference.java
index 23332d5e6..883ab1abd 100644
--- a/source/de/anomic/index/indexRepositoryReference.java
+++ b/source/de/anomic/index/indexRepositoryReference.java
@@ -39,7 +39,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
-import de.anomic.data.htmlTools;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpRemoteProxyConfig;
@@ -452,14 +452,14 @@ public final class indexRepositoryReference {
pw.println(url);
}
if (format == 1) {
- pw.println("" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + " ");
+ pw.println("" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + " ");
}
if (format == 2) {
pw.println("");
- pw.println("" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "");
+ pw.println("" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + "");
pw.println("" + yacyURL.escape(url) + "");
- if (comp.dc_creator().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, true) + "");
- if (comp.dc_subject().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.dc_subject(), true, true) + "");
+ if (comp.dc_creator().length() > 0) pw.println("" + htmlFilterCharacterCoding.unicode2xml(comp.dc_creator(), true) + "");
+ if (comp.dc_subject().length() > 0) pw.println("" + htmlFilterCharacterCoding.unicode2xml(comp.dc_subject(), true) + "");
pw.println("" + entry.moddate().toString() + "");
pw.println("" + entry.hash() + "");
pw.println("");
diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java
index d80fcbedd..a5e529f3c 100644
--- a/source/de/anomic/plasma/plasmaSearchQuery.java
+++ b/source/de/anomic/plasma/plasmaSearchQuery.java
@@ -26,8 +26,8 @@ import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
-import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterAbstractScraper;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.index.indexWord;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield;
@@ -266,7 +266,7 @@ public final class plasmaSearchQuery {
public String queryString(final boolean encodeHTML) {
if(encodeHTML){
- return htmlTools.encodeUnicode2html(this.queryString, true);
+ return htmlFilterCharacterCoding.unicode2html(this.queryString, true);
}
return this.queryString;
}
diff --git a/source/de/anomic/server/serverObjects.java b/source/de/anomic/server/serverObjects.java
index 7c1e1160c..e4083da1b 100644
--- a/source/de/anomic/server/serverObjects.java
+++ b/source/de/anomic/server/serverObjects.java
@@ -52,7 +52,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
-import de.anomic.data.htmlTools;
+import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.tools.yFormatter;
@@ -146,10 +146,10 @@ public class serverObjects extends HashMap implements Cloneable
* @param key key name as String.
* @param value a String that will be reencoded for HTML output.
* @return the modified String that was added to the map.
- * @see htmlTools#encodeUnicode2html(String, boolean)
+ * @see htmlFilterCharacterCoding#encodeUnicode2html(String, boolean)
*/
public String putHTML(final String key, final String value) {
- return putHTML(key, value, false);
+ return put(key, htmlFilterCharacterCoding.unicode2html(value, true));
}
/**
@@ -158,8 +158,8 @@ public class serverObjects extends HashMap implements Cloneable
* If forXML is true, then only the characters & " < > will be
* replaced in the returned String.
*/
- public String putHTML(final String key, final String value, final boolean forXML) {
- return put(key, htmlTools.encodeUnicode2html(value, true, forXML));
+ public String putXML(final String key, final String value) {
+ return put(key, htmlFilterCharacterCoding.unicode2xml(value, true));
}
/**