FULL redesign of algorithms in htmlTools to encode/decode strings from/to unicode and html.

The old process used a not really efficient way to detect html encoding strings in texts.
All calling methods had been adoped to call the new class in an enhanced way with less parameters.

Many classes in interfaces used a XML encoding only (instead of full html conversion from unicode to html); this behavior was not changed with this commit but should be controlled again since it points out possible XSS leaks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5295 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 958ec20cd0
commit 0edec2b760

@ -319,7 +319,7 @@ prop.putHTML("asd", "0");
while ((peername = hostList.firstKey()) != null) { while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername); final String Hash = hostList.get(peername);
prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_hash", Hash); prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_hash", Hash);
prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_name", peername, true); prop.putXML(DISABLED + "otherHosts_" + peerCount + "_name", peername);
hostList.remove(peername); hostList.remove(peername);
peerCount++; peerCount++;
} }
@ -332,14 +332,14 @@ prop.putHTML("asd", "0");
int blacklistCount = 0; int blacklistCount = 0;
if (dirlist != null) { if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) { for (int i = 0; i <= dirlist.length - 1; i++) {
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i], true); prop.putXML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "0"); prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "0");
if (dirlist[i].equals(blacklistToUse)) { //current List if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1"); prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.putHTML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes], true); prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked", prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
listManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i]) ? "0" : "1"); listManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i]) ? "0" : "1");
} }
@ -366,7 +366,7 @@ prop.putHTML("asd", "0");
} }
prop.put(DISABLED + "blackLists", blacklistCount); prop.put(DISABLED + "blackLists", blacklistCount);
prop.putHTML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse, true); prop.putXML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse);
prop.put("disabled", (blacklistToUse == null) ? "1" : "0"); prop.put("disabled", (blacklistToUse == null) ? "1" : "0");
return prop; return prop;
} }

@ -180,9 +180,9 @@ public class Blog {
try { try {
prop.put("mode", "1"); //edit prop.put("mode", "1"); //edit
prop.put("mode_commentMode", page.getCommentMode()); prop.put("mode_commentMode", page.getCommentMode());
prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"), xml); prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"));
prop.put("mode_pageid", page.getKey()); prop.put("mode_pageid", page.getKey());
prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"), xml); prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"));
prop.put("mode_page-code", new String(page.getPage(), "UTF-8")); prop.put("mode_page-code", new String(page.getPage(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {} } catch (final UnsupportedEncodingException e) {}
} }
@ -195,16 +195,16 @@ public class Blog {
if(hasRights) { if(hasRights) {
prop.put("mode", "2");//preview prop.put("mode", "2");//preview
prop.put("mode_commentMode", post.getInt("commentMode", 1)); prop.put("mode_commentMode", post.getInt("commentMode", 1));
prop.putHTML("mode_pageid", pagename, xml); prop.putHTML("mode_pageid", pagename);
try { try {
prop.putHTML("mode_author", new String(author, "UTF-8"), xml); prop.putHTML("mode_author", new String(author, "UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_author", new String(author), xml); prop.putHTML("mode_author", new String(author));
} }
prop.putHTML("mode_subject", post.get("subject",""), xml); prop.putHTML("mode_subject", post.get("subject",""));
prop.put("mode_date", dateString(new Date())); prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", "")); prop.putWiki("mode_page", post.get("content", ""));
prop.putHTML("mode_page-code", post.get("content", ""), xml); prop.putHTML("mode_page-code", post.get("content", ""));
} }
else { else {
prop.put("mode", "3"); //access denied (no rights) prop.put("mode", "3"); //access denied (no rights)
@ -213,16 +213,16 @@ public class Blog {
else if(post.get("delete", "").equals("try")) { else if(post.get("delete", "").equals("try")) {
if(hasRights) { if(hasRights) {
prop.put("mode", "4"); prop.put("mode", "4");
prop.putHTML("mode_pageid", pagename, xml); prop.putHTML("mode_pageid", pagename);
try { try {
prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"), xml); prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_author",new String(page.getAuthor()), xml); prop.putHTML("mode_author",new String(page.getAuthor()));
} }
try { try {
prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"), xml); prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_subject",new String(page.getSubject()), xml); prop.putHTML("mode_subject",new String(page.getSubject()));
} }
} }
else prop.put("mode", "3"); //access denied (no rights) else prop.put("mode", "3"); //access denied (no rights)
@ -246,7 +246,7 @@ public class Blog {
if(pagename.equals(DEFAULT_PAGE)) { if(pagename.equals(DEFAULT_PAGE)) {
// XXX: where are "peername" and "address" used in the template? // XXX: where are "peername" and "address" used in the template?
// XXX: "clientname" is already set to the peername, no need for a new setting // XXX: "clientname" is already set to the peername, no need for a new setting
prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName(), xml); prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName());
prop.put("address", address); prop.put("address", address);
//index all entries //index all entries
putBlogDefault(prop, sb, address, start, num, hasRights, xml); putBlogDefault(prop, sb, address, start, num, hasRights, xml);
@ -321,16 +321,16 @@ public class Blog {
{ {
// subject // subject
try { try {
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"), xml); prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()), xml); prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()));
} }
// author // author
try { try {
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"), xml); prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()), xml); prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()));
} }
// comments // comments

@ -212,7 +212,7 @@ public class CrawlProfileEditor_p {
prop.put("crawlProfiles_" + count + "_dark", dark ? "1" : "0"); prop.put("crawlProfiles_" + count + "_dark", dark ? "1" : "0");
prop.put("crawlProfiles_" + count + "_status", active ? "1" : "0"); prop.put("crawlProfiles_" + count + "_status", active ? "1" : "0");
prop.put("crawlProfiles_" + count + "_name", profile.name()); prop.put("crawlProfiles_" + count + "_name", profile.name());
prop.putHTML("crawlProfiles_" + count + "_startURL", profile.startURL(), true); prop.putXML("crawlProfiles_" + count + "_startURL", profile.startURL());
prop.put("crawlProfiles_" + count + "_handle", profile.handle()); prop.put("crawlProfiles_" + count + "_handle", profile.handle());
prop.put("crawlProfiles_" + count + "_depth", profile.generalDepth()); prop.put("crawlProfiles_" + count + "_depth", profile.generalDepth());
prop.put("crawlProfiles_" + count + "_filter", profile.generalFilter()); prop.put("crawlProfiles_" + count + "_filter", profile.generalFilter());

@ -81,7 +81,7 @@ public class MessageSend_p {
peerName = targetPeer.get(yacySeed.NAME,"nameless"); peerName = targetPeer.get(yacySeed.NAME,"nameless");
} }
prop.putHTML("mode_permission_peerName", peerName, true); prop.putXML("mode_permission_peerName", peerName);
final String response = (result == null) ? null : (String) result.get("response"); final String response = (result == null) ? null : (String) result.get("response");
if (response == null || result == null) { if (response == null || result == null) {
// we don't have permission or other peer does not exist // we don't have permission or other peer does not exist
@ -98,11 +98,11 @@ public class MessageSend_p {
final int messagesize = Integer.parseInt(result.get("messagesize")); final int messagesize = Integer.parseInt(result.get("messagesize"));
final int attachmentsize = Integer.parseInt(result.get("attachmentsize")); final int attachmentsize = Integer.parseInt(result.get("attachmentsize"));
prop.putHTML("mode_permission_response", response, true); prop.putXML("mode_permission_response", response);
prop.put("mode_permission_messagesize", messagesize); prop.put("mode_permission_messagesize", messagesize);
prop.put("mode_permission_attachmentsize", attachmentsize); prop.put("mode_permission_attachmentsize", attachmentsize);
prop.putHTML("mode_permission_subject", subject, true); prop.putXML("mode_permission_subject", subject);
prop.putHTML("mode_permission_message", message, true); prop.putXML("mode_permission_message", message);
prop.putHTML("mode_permission_hash", hash); prop.putHTML("mode_permission_hash", hash);
if (post.containsKey("preview")) { if (post.containsKey("preview")) {
prop.putWiki("mode_permission_previewmessage", message); prop.putWiki("mode_permission_previewmessage", message);
@ -140,7 +140,7 @@ public class MessageSend_p {
prop.put("mode_status", "1"); prop.put("mode_status", "1");
// "unresolved pattern", the remote peer is alive but had an exception // "unresolved pattern", the remote peer is alive but had an exception
prop.putHTML("mode_status_message", message, true); prop.putXML("mode_status_message", message);
} }
} }
return prop; return prop;

@ -58,7 +58,7 @@ public class Messages_p {
final String peerAddress = sb.webIndex.seedDB.mySeed().getPublicAddress(); final String peerAddress = sb.webIndex.seedDB.mySeed().getPublicAddress();
final String peerName = sb.webIndex.seedDB.mySeed().getName(); final String peerName = sb.webIndex.seedDB.mySeed().getName();
prop.put("peerAddress", peerAddress); prop.put("peerAddress", peerAddress);
prop.putHTML("peerName", peerName, true); prop.putXML("peerName", peerName);
// List known hosts for message sending (from Blacklist_p.java) // List known hosts for message sending (from Blacklist_p.java)
if (sb.webIndex.seedDB != null && sb.webIndex.seedDB.sizeConnected() > 0) { if (sb.webIndex.seedDB != null && sb.webIndex.seedDB.sizeConnected() > 0) {
@ -76,7 +76,7 @@ public class Messages_p {
while ((peername = hostList.firstKey()) != null) { while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername); final String Hash = hostList.get(peername);
prop.put(PEERSKNOWN + "peers_" + peerCount + "_hash", Hash); prop.put(PEERSKNOWN + "peers_" + peerCount + "_hash", Hash);
prop.putHTML(PEERSKNOWN + "peers_" + peerCount + "_name", peername, true); prop.putXML(PEERSKNOWN + "peers_" + peerCount + "_name", peername);
hostList.remove(peername); hostList.remove(peername);
peerCount++; peerCount++;
} }
@ -119,11 +119,11 @@ public class Messages_p {
message = sb.messageDB.read(key); message = sb.messageDB.read(key);
prop.put("mode_messages_"+count+"_dark", ((dark) ? "1" : "0") ); prop.put("mode_messages_"+count+"_dark", ((dark) ? "1" : "0") );
prop.put("mode_messages_"+count+"_date", dateString(message.date())); prop.put("mode_messages_"+count+"_date", dateString(message.date()));
prop.putHTML("mode_messages_"+count+"_from", message.author(), true); prop.putXML("mode_messages_"+count+"_from", message.author());
prop.putHTML("mode_messages_"+count+"_to", message.recipient(), true); prop.putXML("mode_messages_"+count+"_to", message.recipient());
prop.putHTML("mode_messages_"+count+"_subject", message.subject(), true); prop.putXML("mode_messages_"+count+"_subject", message.subject());
prop.putHTML("mode_messages_"+count+"_category", message.category(), true); prop.putXML("mode_messages_"+count+"_category", message.category());
prop.putHTML("mode_messages_"+count+"_key", key, true); prop.putXML("mode_messages_"+count+"_key", key);
prop.put("mode_messages_"+count+"_hash", message.authorHash()); prop.put("mode_messages_"+count+"_hash", message.authorHash());
if ((header.get(httpRequestHeader.CONNECTION_PROP_PATH)).endsWith(".rss")) { if ((header.get(httpRequestHeader.CONNECTION_PROP_PATH)).endsWith(".rss")) {
@ -135,7 +135,7 @@ public class Messages_p {
// also write out the message body (needed for the RSS feed) // also write out the message body (needed for the RSS feed)
try { try {
prop.putHTML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"), true); prop.putXML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
// can not happen, because UTF-8 must be supported by every JVM // can not happen, because UTF-8 must be supported by every JVM
} }
@ -157,10 +157,10 @@ public class Messages_p {
message = sb.messageDB.read(key); message = sb.messageDB.read(key);
if (message == null) throw new NullPointerException("Message with ID " + key + " does not exist"); if (message == null) throw new NullPointerException("Message with ID " + key + " does not exist");
prop.putHTML("mode_from", message.author(), true); prop.putXML("mode_from", message.author());
prop.putHTML("mode_to", message.recipient(), true); prop.putXML("mode_to", message.recipient());
prop.put("mode_date", dateString(message.date())); prop.put("mode_date", dateString(message.date()));
prop.putHTML("mode_subject", message.subject(), true); prop.putXML("mode_subject", message.subject());
String theMessage = null; String theMessage = null;
try { try {
theMessage = new String(message.message(), "UTF-8"); theMessage = new String(message.message(), "UTF-8");
@ -169,7 +169,7 @@ public class Messages_p {
} }
prop.putWiki("mode_message", theMessage); prop.putWiki("mode_message", theMessage);
prop.put("mode_hash", message.authorHash()); prop.put("mode_hash", message.authorHash());
prop.putHTML("mode_key", key, true); prop.putXML("mode_key", key);
} }
// return rewrite properties // return rewrite properties

@ -111,14 +111,14 @@ public class PerformanceQueues_p {
// set values to templates // set values to templates
prop.put("table_" + c + "_threadname", threadName); prop.put("table_" + c + "_threadname", threadName);
prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription(), xml); prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription());
if(thread.getMonitorURL() == null) { if(thread.getMonitorURL() == null) {
prop.put("table_"+c+"_hasurl", "0"); prop.put("table_"+c+"_hasurl", "0");
}else{ }else{
prop.put("table_"+c+"_hasurl", "1"); prop.put("table_"+c+"_hasurl", "1");
prop.put("table_" + c + "_hasurl_url", thread.getMonitorURL()); prop.put("table_" + c + "_hasurl_url", thread.getMonitorURL());
} }
prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription(), xml); prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription());
queuesize = thread.getJobCount(); queuesize = thread.getJobCount();
prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : yFormatter.number(queuesize, !xml)); prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : yFormatter.number(queuesize, !xml));

@ -162,7 +162,7 @@ public final class Settings_p {
} }
// clientIP // clientIP
prop.putHTML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an artificial header addendum prop.putXML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, "<unknown>")); // read an artificial header addendum
/* /*
* seed upload settings * seed upload settings
@ -239,7 +239,7 @@ public final class Settings_p {
while (availableParserIter.hasNext()) { while (availableParserIter.hasNext()) {
final ParserInfo parserInfo = availableParserIter.next(); final ParserInfo parserInfo = availableParserIter.next();
prop.put("parser_" + parserIdx + "_name", parserInfo.parserName); prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
prop.putHTML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr, true); prop.putXML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount); prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount);
prop.put("parser_" + parserIdx + "_colspan", configArray.length); prop.put("parser_" + parserIdx + "_colspan", configArray.length);

@ -170,8 +170,8 @@ public class Status {
if (sb.getConfig("remoteProxyUse", "false").equals("true")) { if (sb.getConfig("remoteProxyUse", "false").equals("true")) {
prop.put("remoteProxy", "1"); prop.put("remoteProxy", "1");
prop.putHTML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"), true); prop.putXML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"));
prop.putHTML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"), true); prop.putXML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"));
prop.put("remoteProxy_4Yacy", sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true") ? "0" : "1"); prop.put("remoteProxy_4Yacy", sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true") ? "0" : "1");
} else { } else {
prop.put("remoteProxy", "0"); // not used prop.put("remoteProxy", "0"); // not used
@ -201,7 +201,7 @@ public class Status {
} else { } else {
prop.put("peerAddress", "1"); // Address prop.put("peerAddress", "1"); // Address
prop.put("peerAddress_address", sb.webIndex.seedDB.mySeed().getPublicAddress()); prop.put("peerAddress_address", sb.webIndex.seedDB.mySeed().getPublicAddress());
prop.putHTML("peerAddress_peername", sb.getConfig("peerName", "<nameless>").toLowerCase(), true); prop.putXML("peerAddress_peername", sb.getConfig("peerName", "<nameless>").toLowerCase());
} }
} }
final String peerStatus = ((sb.webIndex.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_VIRGIN : sb.webIndex.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN)); final String peerStatus = ((sb.webIndex.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_VIRGIN : sb.webIndex.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN));

@ -153,11 +153,11 @@ public class Surftips {
prop.put("surftips_results_" + i + "_authorized_recommend_display", display); prop.put("surftips_results_" + i + "_authorized_recommend_display", display);
prop.put("surftips_results_" + i + "_authorized_recommend_showScore", (showScore ? "1" : "0")); prop.put("surftips_results_" + i + "_authorized_recommend_showScore", (showScore ? "1" : "0"));
prop.putHTML("surftips_results_" + i + "_authorized_urlhash", urlhash, true); prop.putXML("surftips_results_" + i + "_authorized_urlhash", urlhash);
prop.putHTML("surftips_results_" + i + "_url", url, true); prop.putXML("surftips_results_" + i + "_url", url);
prop.putHTML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60), true); prop.putXML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60));
prop.putHTML("surftips_results_" + i + "_urlhash", urlhash, true); prop.putXML("surftips_results_" + i + "_urlhash", urlhash);
prop.putHTML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title, true); prop.putXML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title);
prop.putHTML("surftips_results_" + i + "_description", description); prop.putHTML("surftips_results_" + i + "_description", description);
i++; i++;

@ -35,7 +35,7 @@ import java.util.Date;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import de.anomic.data.htmlTools; import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader; import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils; import de.anomic.server.serverFileUtils;
@ -122,9 +122,9 @@ public class Threaddump_p {
line = null; line = null;
} }
if ((line != null) && (line.length() > 0)) { if ((line != null) && (line.length() > 0)) {
bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true) + " [" + line.trim() + "]"); bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true) + " [" + line.trim() + "]");
} else { } else {
bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true)); bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true));
} }
} }
bufferappend(buffer, plain, ""); bufferappend(buffer, plain, "");

@ -33,8 +33,8 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.HttpClient; import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader; import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader; import de.anomic.http.httpResponseHeader;
@ -352,7 +352,7 @@ public class ViewFile {
} }
private static final String markup(final String[] wordArray, String message) { private static final String markup(final String[] wordArray, String message) {
message = htmlTools.encodeUnicode2html(message, true); message = htmlFilterCharacterCoding.unicode2html(message, true);
if (wordArray != null) if (wordArray != null)
for (int j = 0; j < wordArray.length; j++) { for (int j = 0; j < wordArray.length; j++) {
final String currentWord = wordArray[j].trim(); final String currentWord = wordArray[j].trim();

@ -43,9 +43,9 @@ public class opensearchdescription {
if (thisaddress.indexOf(":") == -1) thisaddress += ":" + serverCore.getPortNr(env.getConfig("port", "8080")); if (thisaddress.indexOf(":") == -1) thisaddress += ":" + serverCore.getPortNr(env.getConfig("port", "8080"));
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
prop.putHTML("thisaddress", thisaddress, true); prop.putXML("thisaddress", thisaddress);
prop.putHTML("SearchPageGreeting", promoteSearchPageGreeting, true); prop.putXML("SearchPageGreeting", promoteSearchPageGreeting);
prop.putHTML("clientname", sb.webIndex.seedDB.mySeed().getName(), true); prop.putXML("clientname", sb.webIndex.seedDB.mySeed().getName());
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -59,13 +59,13 @@ public class all {
Date date; Date date;
while(it.hasNext()){ while(it.hasNext()){
bookmark=switchboard.bookmarksDB.getBookmark(it.next()); bookmark=switchboard.bookmarksDB.getBookmark(it.next());
prop.putHTML("posts_"+count+"_url", bookmark.getUrl(), true); prop.putXML("posts_"+count+"_url", bookmark.getUrl());
prop.putHTML("posts_"+count+"_title", bookmark.getTitle(), true); prop.putXML("posts_"+count+"_title", bookmark.getTitle());
prop.putHTML("posts_"+count+"_description", bookmark.getDescription(), true); prop.putXML("posts_"+count+"_description", bookmark.getDescription());
prop.putHTML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()), true); prop.putXML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()));
date=new Date(bookmark.getTimeStamp()); date=new Date(bookmark.getTimeStamp());
prop.putHTML("posts_"+count+"_time", serverDate.formatISO8601(date), true); prop.putXML("posts_"+count+"_time", serverDate.formatISO8601(date));
prop.putHTML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "), true); prop.putXML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "));
// additional XML tags // additional XML tags
prop.put("posts_"+count+"_isExtended",extendedXML ? "1" : "0"); prop.put("posts_"+count+"_isExtended",extendedXML ? "1" : "0");

@ -88,7 +88,7 @@ public class get {
while (it.hasNext()) { while (it.hasNext()) {
tag = it.next(); tag = it.next();
if(!tag.getTagName().startsWith("/")) { // ignore folder tags if(!tag.getTagName().startsWith("/")) { // ignore folder tags
prop.putHTML("tags_"+count+"_name", tag.getTagName(), true); prop.putXML("tags_"+count+"_name", tag.getTagName());
prop.put("tags_"+count+"_count", tag.size()); prop.put("tags_"+count+"_count", tag.size());
count++; count++;
} }

@ -4,7 +4,7 @@ import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.data.bookmarksDB; import de.anomic.data.bookmarksDB;
import de.anomic.data.htmlTools; import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader; import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate; import de.anomic.server.serverDate;
@ -83,7 +83,7 @@ public class xbel {
count++; count++;
final String title = fn; // just to make sure fn stays untouched final String title = fn; // just to make sure fn stays untouched
prop.put("xbel_"+count+"_elements", "<title>" + htmlTools.encodeUnicode2xml(title.replaceAll("(/.[^/]*)*/", "")) + "</title>"); prop.put("xbel_"+count+"_elements", "<title>" + htmlFilterCharacterCoding.unicode2xml(title.replaceAll("(/.[^/]*)*/", ""), true) + "</title>");
count++; count++;
final Iterator<String> bit=switchboard.bookmarksDB.getBookmarksIterator(fn, isAdmin); final Iterator<String> bit=switchboard.bookmarksDB.getBookmarksIterator(fn, isAdmin);
count = print_XBEL(bit, count); count = print_XBEL(bit, count);
@ -106,19 +106,19 @@ public class xbel {
bookmark=switchboard.bookmarksDB.getBookmark(bit.next()); bookmark=switchboard.bookmarksDB.getBookmark(bit.next());
date=new Date(bookmark.getTimeStamp()); date=new Date(bookmark.getTimeStamp());
prop.put("xbel_"+count+"_elements", "<bookmark id=\"" + bookmark.getUrlHash() prop.put("xbel_"+count+"_elements", "<bookmark id=\"" + bookmark.getUrlHash()
+ "\" href=\"" + htmlTools.encodeUnicode2xml(bookmark.getUrl()) + "\" href=\"" + htmlFilterCharacterCoding.unicode2xml(bookmark.getUrl(), true)
+ "\" added=\"" + htmlTools.encodeUnicode2xml(serverDate.formatISO8601(date))+"\">"); + "\" added=\"" + htmlFilterCharacterCoding.unicode2xml(serverDate.formatISO8601(date), true)+"\">");
count++; count++;
prop.put("xbel_"+count+"_elements", "<title>"); prop.put("xbel_"+count+"_elements", "<title>");
count++; count++;
prop.putHTML("xbel_"+count+"_elements", bookmark.getTitle(), true); prop.putXML("xbel_"+count+"_elements", bookmark.getTitle());
count++; count++;
prop.put("xbel_"+count+"_elements", "</title>"); prop.put("xbel_"+count+"_elements", "</title>");
count++; count++;
prop.put("xbel_"+count+"_elements", "<info>"); prop.put("xbel_"+count+"_elements", "<info>");
count++; count++;
prop.put("xbel_"+count+"_elements", "<metadata owner=\"Mozilla\" ShortcutURL=\"" prop.put("xbel_"+count+"_elements", "<metadata owner=\"Mozilla\" ShortcutURL=\""
+ htmlTools.encodeUnicode2xml(bookmark.getTagsString().replaceAll("/.*,", "").toLowerCase()) + htmlFilterCharacterCoding.unicode2xml(bookmark.getTagsString().replaceAll("/.*,", "").toLowerCase(), true)
+ "\"/>"); + "\"/>");
count++; count++;
prop.put("xbel_"+count+"_elements", "<metadata owner=\"YaCy\" public=\""+Boolean.toString(bookmark.getPublic())+"\"/>"); prop.put("xbel_"+count+"_elements", "<metadata owner=\"YaCy\" public=\""+Boolean.toString(bookmark.getPublic())+"\"/>");
@ -127,7 +127,7 @@ public class xbel {
count++; count++;
prop.put("xbel_"+count+"_elements", "<desc>"); prop.put("xbel_"+count+"_elements", "<desc>");
count++; count++;
prop.putHTML("xbel_"+count+"_elements", bookmark.getDescription(), true); prop.putXML("xbel_"+count+"_elements", bookmark.getDescription());
count++; count++;
prop.put("xbel_"+count+"_elements", "</desc>"); prop.put("xbel_"+count+"_elements", "</desc>");
count++; count++;

@ -66,8 +66,8 @@ public class feed {
RSSMessage message = feed.getChannel(); RSSMessage message = feed.getChannel();
if (message != null) { if (message != null) {
prop.putHTML("channel_title", message.getTitle(), true); prop.putXML("channel_title", message.getTitle());
prop.putHTML("channel_description", message.getDescription(), true); prop.putXML("channel_description", message.getDescription());
prop.put("channel_pubDate", message.getPubDate()); prop.put("channel_pubDate", message.getPubDate());
} }
while ((messageMaxCount > 0) && (feed.size() > 0)) { while ((messageMaxCount > 0) && (feed.size() > 0)) {
@ -75,9 +75,9 @@ public class feed {
if (message == null) continue; if (message == null) continue;
// create RSS entry // create RSS entry
prop.putHTML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle(), true); prop.putXML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle());
prop.putHTML("item_" + messageCount + "_description", message.getDescription(), true); prop.putXML("item_" + messageCount + "_description", message.getDescription());
prop.putHTML("item_" + messageCount + "_link", message.getLink(), true); prop.putXML("item_" + messageCount + "_link", message.getLink());
prop.put("item_" + messageCount + "_pubDate", message.getPubDate()); prop.put("item_" + messageCount + "_pubDate", message.getPubDate());
prop.put("item_" + messageCount + "_guid", message.getGuid()); prop.put("item_" + messageCount + "_guid", message.getGuid());
messageCount++; messageCount++;

@ -110,8 +110,8 @@ public class queues_p {
prop.putHTML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.putHTML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-indexing_"+i+"_depth", pcentry.depth()); prop.put("list-indexing_"+i+"_depth", pcentry.depth());
prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate()); prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate());
prop.putHTML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName(), true); prop.putXML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName());
prop.putHTML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true), true); prop.putXML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true));
prop.putNum("list-indexing_"+i+"_size", entrySize); prop.putNum("list-indexing_"+i+"_size", entrySize);
prop.put("list-indexing_"+i+"_inProcess", (inProcess) ? "1" : "0"); prop.put("list-indexing_"+i+"_inProcess", (inProcess) ? "1" : "0");
prop.put("list-indexing_"+i+"_hash", pcentry.urlHash()); prop.put("list-indexing_"+i+"_hash", pcentry.urlHash());
@ -135,7 +135,7 @@ public class queues_p {
initiator = sb.webIndex.seedDB.getConnected(w[i].initiator()); initiator = sb.webIndex.seedDB.getConnected(w[i].initiator());
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-loader_"+count+"_depth", w[i].depth()); prop.put("list-loader_"+count+"_depth", w[i].depth());
prop.putHTML("list-loader_"+count+"_url", w[i].url().toString(), true); prop.putXML("list-loader_"+count+"_url", w[i].url().toString());
count++; count++;
} }
prop.put("list-loader", count); prop.put("list-loader", count);
@ -181,8 +181,8 @@ public class queues_p {
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put(tableName + "_" + showNum + "_depth", urle.depth()); prop.put(tableName + "_" + showNum + "_depth", urle.depth());
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate())); prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
prop.putHTML(tableName + "_" + showNum + "_anchor", urle.name(), true); prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
prop.putHTML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true), true); prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash()); prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
showNum++; showNum++;
} }

@ -65,7 +65,7 @@ public class getpageinfo_p {
String url=post.get("url"); String url=post.get("url");
if(url.toLowerCase().startsWith("ftp://")){ if(url.toLowerCase().startsWith("ftp://")){
prop.put("robots-allowed", "1"); prop.put("robots-allowed", "1");
prop.putHTML("title", "FTP: "+url, true); prop.putXML("title", "FTP: "+url);
return prop; return prop;
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) { } else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
url = "http://" + url; url = "http://" + url;
@ -86,7 +86,7 @@ public class getpageinfo_p {
writer.close(); writer.close();
// put the document title // put the document title
prop.putHTML("title", scraper.getTitle(), true); prop.putXML("title", scraper.getTitle());
// put the favicon that belongs to the document // put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString()); prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@ -97,16 +97,16 @@ public class getpageinfo_p {
for(int i=0;i<list.length;i++){ for(int i=0;i<list.length;i++){
String tag = list[i]; String tag = list[i];
if (!tag.equals("")) { if (!tag.equals("")) {
prop.putHTML("tags_"+count+"_tag", tag, true); prop.putXML("tags_"+count+"_tag", tag);
count++; count++;
} }
} }
prop.put("tags", count); prop.put("tags", count);
// put description // put description
prop.putHTML("desc", scraper.getDescription(), true); prop.putXML("desc", scraper.getDescription());
// put language // put language
Set<String> languages = scraper.getContentLanguages(); Set<String> languages = scraper.getContentLanguages();
prop.putHTML("lang", (languages == null) ? "unknown" : languages.iterator().next(), true); prop.putXML("lang", (languages == null) ? "unknown" : languages.iterator().next());
} catch (final MalformedURLException e) { /* ignore this */ } catch (final MalformedURLException e) { /* ignore this */
} catch (final IOException e) { /* ignore this */ } catch (final IOException e) { /* ignore this */
@ -121,7 +121,7 @@ public class getpageinfo_p {
// get the sitemap URL of the domain // get the sitemap URL of the domain
final yacyURL sitemapURL = sb.robots.getSitemapURL(theURL); final yacyURL sitemapURL = sb.robots.getSitemapURL(theURL);
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true); prop.putXML("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
} catch (final MalformedURLException e) {} } catch (final MalformedURLException e) {}
} }

@ -339,7 +339,7 @@ public class ysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0"); prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements // for RSS: don't HTML encode some elements
prop.putHTML("rss_query", querystring, true); prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+'))); prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis(); sb.localSearchLastAccess = System.currentTimeMillis();

@ -96,9 +96,9 @@ public class ysearchitem {
if (rss) { if (rss) {
// text search for rss output // text search for rss output
prop.put("rss", "1"); // switch on specific content prop.put("rss", "1"); // switch on specific content
prop.putHTML("rss_title", result.title(), true); prop.putXML("rss_title", result.title());
prop.putHTML("rss_description", result.textSnippet().getLineRaw(), true); prop.putXML("rss_description", result.textSnippet().getLineRaw());
prop.putHTML("rss_link", result.urlstring(), true); prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash()); prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified())); prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop; return prop;

@ -439,7 +439,7 @@ public class yacysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0"); prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements // for RSS: don't HTML encode some elements
prop.putHTML("rss_query", querystring, true); prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+'))); prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis(); sb.localSearchLastAccess = System.currentTimeMillis();

@ -182,10 +182,10 @@ public class yacysearchitem {
if (rss) { if (rss) {
// text search for rss output // text search for rss output
prop.put("rss", "1"); // switch on specific content prop.put("rss", "1"); // switch on specific content
prop.putHTML("rss_title", result.title(), true); prop.putXML("rss_title", result.title());
final plasmaSnippetCache.TextSnippet snippet = result.textSnippet(); final plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
prop.putHTML("rss_description", (snippet == null) ? "" : snippet.getLineRaw(), true); prop.putXML("rss_description", (snippet == null) ? "" : snippet.getLineRaw());
prop.putHTML("rss_link", result.urlstring(), true); prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash()); prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified())); prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop; return prop;

@ -30,6 +30,8 @@ package de.anomic.data;
import java.util.ArrayList; import java.util.ArrayList;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
/** /**
* This class provides a diff-functionality. * This class provides a diff-functionality.
*/ */
@ -253,7 +255,7 @@ public class diff {
case diff.Part.ADDED: sb.append("added"); break; case diff.Part.ADDED: sb.append("added"); break;
case diff.Part.DELETED: sb.append("deleted"); break; case diff.Part.DELETED: sb.append("deleted"); break;
} }
sb.append("\">").append(htmlTools.encodeUnicode2html(ps[j].getString(), true).replaceAll("\n", "<br />")); sb.append("\">").append(htmlFilterCharacterCoding.unicode2html(ps[j].getString(), true).replaceAll("\n", "<br />"));
sb.append("</span>"); sb.append("</span>");
} }
sb.append("</p>"); sb.append("</p>");

@ -35,6 +35,7 @@ import java.util.HashMap;
import de.anomic.data.wiki.abstractWikiParser; import de.anomic.data.wiki.abstractWikiParser;
import de.anomic.data.wiki.wikiParser; import de.anomic.data.wiki.wikiParser;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
@ -757,7 +758,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
public String transformLine(String result, final String publicAddress, final plasmaSwitchboard switchboard) { public String transformLine(String result, final String publicAddress, final plasmaSwitchboard switchboard) {
//If HTML has not bee replaced yet (can happen if method gets called in recursion), replace now! //If HTML has not bee replaced yet (can happen if method gets called in recursion), replace now!
if (!replacedHTML || preformattedSpan){ if (!replacedHTML || preformattedSpan){
result = htmlTools.encodeUnicode2html(result, true); result = htmlFilterCharacterCoding.unicode2html(result, true);
replacedHTML = true; replacedHTML = true;
} }

@ -1,13 +1,8 @@
// htmlTools.java // htmlFilterCharacterCoding.java
// ----------------------- // ----------------------------------
// (C) by Michael Peter Christen; mc@yacy.net, // (C) 22.10.2008 by Michael Peter Christen; mc@yacy.net
// (C) by Jan Sandbrink (NN), Franz Brausse (FB, karlchenofhell), // first published on http://yacy.net
// (C) by Bjoern 'fuchs' Krombholz (fuchsi) // Frankfurt, Germany, 2008
// first published on http://www.yacy.net
//
// $LastChangedDate: $
// $LastChangedRevision: $
// $LastChangedBy: $
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -23,114 +18,22 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data; package de.anomic.htmlFilter;
public class htmlTools {
/** Replaces characters in a string with other entities according to HTML standards.
* @param text a string that possibly contains special characters
* @param includingAmpersand if <code>false</code> ampersands are not encoded
* @param forXML if <code>true</code> then only &amp;, &quot;, &lt; and &gt; will
* be transcoded.
* @return the string with all characters replaced by the corresponding character from array
*/
public static String encodeUnicode2html(final String text, final boolean includingAmpersand, final boolean forXML) {
if (text == null)
return null;
final int spos = (includingAmpersand ? 0 : 2);
// if (forXML), then only encode ampersand, quotation mark, less than and
// greather than which are the first 4 pairs in default mapping table
final int epos = (forXML ? 8 : mapping.length);
return encode(text, mapping, spos, epos);
}
/**
* Like {@link #encodeUnicode2html(String, boolean, boolean)} with <code>forXML = false</code>
*/
public static String encodeUnicode2html(final String text, final boolean includingAmpersand) {
return encodeUnicode2html(text, includingAmpersand, false);
}
import java.util.HashMap;
/** public class htmlFilterCharacterCoding {
* Replaces special entities ampersand, quotation marks, and less than/graiter than
* by the escaping entities allowed in XML documents.
*
* Like {@link #encodeUnicode2html(String, boolean, boolean)} with
* <code>includingAmpersand = true</code> and <code>foxXML = true</code>.
*
* @param text the original String
* @return the encoded String
*/
public static String encodeUnicode2xml(final String text) {
return encodeUnicode2html(text, true, true);
}
/** private static final char amp_unicode = "\u0026".charAt(0);
* Generic method that replaces occurences of special character entities defined in map private static final String amp_html = "&amp;";
* array with their corresponding mapping.
* @param text The String too process.
* @param map An array defining the entity mapping.
* @param spos It is possible to use a subset of the map only. This parameter defines the
* starting point in the map array.
* @param epos The ending point, see above.
* @return A copy of the original String with all entities defined in map replaced.
*/
public static String encode(final String text, final String[] map, final int spos, final int epos) {
final StringBuffer sb = new StringBuffer(text.length());
int textpos = 0;
search: while (textpos < text.length()) {
// find a (forward) mapping
loop: for (int i = spos; i < epos; i += 2) {
if (text.charAt(textpos) != map[i].charAt(0)) continue loop;
// found match
sb.append(map[i + 1]);
textpos++;
continue search;
}
// not found match
sb.append(text.charAt(textpos));
textpos++;
}
return sb.toString(); private static final String[] mapping4xml = {
}
public static String decodeHtml2Unicode(final String text) {
if (text == null) return null;
int pos = 0;
final StringBuffer sb = new StringBuffer(text.length());
search: while (pos < text.length()) {
// find a reverse mapping. TODO: replace matching with hashtable(s)
loop: for (int i = 0; i < mapping.length; i += 2) {
if (pos + mapping[i + 1].length() > text.length()) continue loop;
for (int j = mapping[i + 1].length() - 1; j >= 0; j--) {
if (text.charAt(pos + j) != mapping[i + 1].charAt(j)) continue loop;
}
// found match
sb.append(mapping[i]);
pos = pos + mapping[i + 1].length();
continue search;
}
// not found match
sb.append(text.charAt(pos));
pos++;
}
return new String(sb);
}
//This array contains codes (see http://mindprod.com/jgloss/unicode.html for details)
//that will be replaced. To add new codes or patterns, just put them at the end
//of the list. Codes or patterns in this list can not be escaped with [= or <pre>
private static final String[] mapping = {
// Ampersands _have_ to be replaced first. If they were replaced later,
// other replaced characters containing ampersands would get messed up.
"\u0026","&amp;", //ampersand
"\"","&quot;", //quotation mark "\"","&quot;", //quotation mark
"\u003C","&lt;", //less than "\u003C","&lt;", //less than
"\u003E","&gt;", //greater than "\u003E","&gt;", //greater than
};
private static final String[] mapping4html = {
"\\", "&#092;", // Backslash "\\", "&#092;", // Backslash
"\u005E","&#094;", // Caret "\u005E","&#094;", // Caret
@ -267,15 +170,109 @@ public class htmlTools {
"\u00FF","&yuml;" "\u00FF","&yuml;"
}; };
private final static HashMap<String, Character> html2unicode4xml = new HashMap<String, Character>();
private final static HashMap<String, Character> html2unicode4html = new HashMap<String, Character>();
private final static HashMap<Character, String> unicode2html4xml = new HashMap<Character, String>();
private final static HashMap<Character, String> unicode2html4html = new HashMap<Character, String>();
static {
Character c;
for (int i = 0; i < mapping4html.length; i += 2) {
c = new Character(mapping4html[i].charAt(0));
html2unicode4html.put(mapping4html[i + 1], c);
unicode2html4html.put(c, mapping4html[i + 1]);
}
for (int i = 0; i < mapping4xml.length; i += 2) {
c = new Character(mapping4xml[i].charAt(0));
html2unicode4xml.put(mapping4xml[i + 1], c);
unicode2html4xml.put(c, mapping4xml[i + 1]);
}
}
public static String unicode2xml(final String text, boolean amp) {
return unicode2html(text, amp, false);
}
public static String unicode2html(final String text, boolean amp) {
return unicode2html(text, amp, true);
}
private static String unicode2html(final String text, boolean amp, boolean html) {
if (text == null) return null;
final StringBuffer sb = new StringBuffer(text.length() * 12 / 10);
int textpos = 0;
String r;
char c;
while (textpos < text.length()) {
// find a (forward) mapping
c = text.charAt(textpos);
if (amp && c == amp_unicode) {
sb.append(amp_html);
textpos++;
continue;
}
if ((r = unicode2html4xml.get(c)) != null) {
sb.append(r);
textpos++;
continue;
}
if (html && (r = unicode2html4html.get(c)) != null) {
sb.append(r);
textpos++;
continue;
}
sb.append(c);
textpos++;
}
return sb.toString();
}
public static String html2unicode(final String text) {
if (text == null) return null;
int p = 0, p1, q;
final StringBuffer sb = new StringBuffer(text.length());
String s;
Character r;
while (p < text.length()) {
p1 = text.indexOf('&', p);
if (p1 < 0) p1 = text.length();
sb.append(text.subSequence(p, p1));
p = p1;
if (p >= text.length()) break;
q = text.indexOf(';', p);
if (q < 0) {
p++;
continue;
}
s = text.substring(p, q + 1);
if (s.equals(amp_html)) {
sb.append(amp_unicode);
p = q + 1;
continue;
}
if ((r = html2unicode4xml.get(s)) != null) {
sb.append(r.charValue());
p = q + 1;
continue;
}
if ((r = html2unicode4html.get(s)) != null) {
sb.append(r);
p = q + 1;
continue;
}
// the entity is unknown, skip it
}
return new String(sb);
}
public static void main(final String[] args) { public static void main(final String[] args) {
final String text = "Test-Text mit & um zyklische &uuml; &amp; Ersetzungen auszuschliessen"; final String text = "Test-Text mit & um zyklische &uuml; &amp; Ersetzungen auszuschliessen";
final String txet = encodeUnicode2html(text, true); final String txet = unicode2html(text, true);
System.out.println(txet); System.out.println(txet);
System.out.println(decodeHtml2Unicode(txet)); System.out.println(html2unicode(txet));
if (decodeHtml2Unicode(txet).equals(text)) System.out.println("correct"); if (html2unicode(txet).equals(text)) System.out.println("correct");
final String text2 = "encodeUnicode2xml: & \" < >"; final String text2 = "encodeUnicode2xml: & \" < >";
System.out.println(text2); System.out.println(text2);
System.out.println(encodeUnicode2xml(text2)); System.out.println(unicode2xml(text2, true));
} }
} }

@ -44,7 +44,6 @@ import java.util.Properties;
import javax.swing.event.EventListenerList; import javax.swing.event.EventListenerList;
import de.anomic.crawler.HTTPLoader; import de.anomic.crawler.HTTPLoader;
import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient; import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader; import de.anomic.http.httpRequestHeader;
import de.anomic.server.serverCharBuffer; import de.anomic.server.serverCharBuffer;
@ -166,11 +165,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (tagname.equalsIgnoreCase("meta")) { if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", ""); String name = tagopts.getProperty("name", "");
if (name.length() > 0) { if (name.length() > 0) {
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content",""))); metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
} else { } else {
name = tagopts.getProperty("http-equiv", ""); name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) { if (name.length() > 0) {
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content",""))); metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
} }
} }
} }

@ -59,8 +59,8 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.httpclient.ChunkedInputStream; import org.apache.commons.httpclient.ChunkedInputStream;
import org.apache.commons.httpclient.ContentLengthInputStream; import org.apache.commons.httpclient.ContentLengthInputStream;
import de.anomic.data.htmlTools;
import de.anomic.data.userDB; import de.anomic.data.userDB;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer; import de.anomic.server.serverByteBuffer;
@ -850,7 +850,7 @@ public final class httpd implements serverHandler, Cloneable {
// 06.01.2007: decode HTML entities by [FB] // 06.01.2007: decode HTML entities by [FB]
public static String decodeHtmlEntities(String s) { public static String decodeHtmlEntities(String s) {
// replace all entities defined in wikiCode.characters and htmlentities // replace all entities defined in wikiCode.characters and htmlentities
s = htmlTools.decodeHtml2Unicode(s); s = htmlFilterCharacterCoding.html2unicode(s);
// replace all other // replace all other
final CharArrayWriter b = new CharArrayWriter(s.length()); final CharArrayWriter b = new CharArrayWriter(s.length());

@ -39,7 +39,7 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.data.htmlTools; import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.JakartaCommonsHttpClient; import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse; import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpRemoteProxyConfig;
@ -452,14 +452,14 @@ public final class indexRepositoryReference {
pw.println(url); pw.println(url);
} }
if (format == 1) { if (format == 1) {
pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>"); pw.println("<a href=\"" + url + "\">" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + "</a><br>");
} }
if (format == 2) { if (format == 2) {
pw.println("<item>"); pw.println("<item>");
pw.println("<title>" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>"); pw.println("<title>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + "</title>");
pw.println("<link>" + yacyURL.escape(url) + "</link>"); pw.println("<link>" + yacyURL.escape(url) + "</link>");
if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, true) + "</author>"); if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_creator(), true) + "</author>");
if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(), true, true) + "</description>"); if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_subject(), true) + "</description>");
pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>"); pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>"); pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>");
pw.println("</item>"); pw.println("</item>");

@ -26,8 +26,8 @@ import java.util.Iterator;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterAbstractScraper; import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.index.indexWord; import de.anomic.index.indexWord;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
@ -266,7 +266,7 @@ public final class plasmaSearchQuery {
public String queryString(final boolean encodeHTML) { public String queryString(final boolean encodeHTML) {
if(encodeHTML){ if(encodeHTML){
return htmlTools.encodeUnicode2html(this.queryString, true); return htmlFilterCharacterCoding.unicode2html(this.queryString, true);
} }
return this.queryString; return this.queryString;
} }

@ -52,7 +52,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import de.anomic.data.htmlTools; import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.tools.yFormatter; import de.anomic.tools.yFormatter;
@ -146,10 +146,10 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
* @param key key name as String. * @param key key name as String.
* @param value a String that will be reencoded for HTML output. * @param value a String that will be reencoded for HTML output.
* @return the modified String that was added to the map. * @return the modified String that was added to the map.
* @see htmlTools#encodeUnicode2html(String, boolean) * @see htmlFilterCharacterCoding#encodeUnicode2html(String, boolean)
*/ */
public String putHTML(final String key, final String value) { public String putHTML(final String key, final String value) {
return putHTML(key, value, false); return put(key, htmlFilterCharacterCoding.unicode2html(value, true));
} }
/** /**
@ -158,8 +158,8 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
* If forXML is <code>true</code>, then only the characters <b>&amp; &quot; &lt; &gt;</b> will be * If forXML is <code>true</code>, then only the characters <b>&amp; &quot; &lt; &gt;</b> will be
* replaced in the returned String. * replaced in the returned String.
*/ */
public String putHTML(final String key, final String value, final boolean forXML) { public String putXML(final String key, final String value) {
return put(key, htmlTools.encodeUnicode2html(value, true, forXML)); return put(key, htmlFilterCharacterCoding.unicode2xml(value, true));
} }
/** /**

Loading…
Cancel
Save