*) Second try: replaced replaceHTML again. There should be no problem this time.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1359 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 19 years ago
parent d45b1162e2
commit 7d5af75d11

@ -42,7 +42,7 @@
// Contributions and changes to the program code must be marked as such.
// Contains contributions from Alexander Schier [AS]
// and Marc Nause [MN]
// Franz Brausse [FB] and Marc Nause [MN]
package de.anomic.data;
@ -109,21 +109,134 @@ public class wikiCode {
}
}
public static String replaceHTML(String result) {
if (result == null) return null;
int p0;
//The following method has been submitted by [FB] (added and a few changes by MN)
/** Replaces special characters from a string. Otherwise they might cause ugly output on some systems.
* This code is also important to avoid XSS attacks.
*
* @param text a string that possibly contains special characters
* @return the string with all special characters encoded so they will look right on every system
*/
public static String replaceHTML(String text) {
if (text==null) { return null; }
for (int x=0;x<=htmlentities.length-1;x=x+2) {
int p=0;
while ((p=text.indexOf(htmlentities[x],p))>=0) {
text=text.substring(0,p)+htmlentities[x+1]+text.substring(p+htmlentities[x].length());
p+=htmlentities[x+1].length();
}
}
return text;
}
// Ampersands have to be replaced first. If they were replaced later,
//This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and
//patterns that will be replaced. To add new codes or patterns, just put them at the end
//of the list.
public static String[] htmlentities={
// Ampersands _have_ to be replaced first. If they were replaced later,
// other replaced characters containing ampersands would get messed up.
p0 = 0; while ((p0 = result.indexOf("&", p0)) >= 0) {result = result.substring(0, p0) + "&amp;" + result.substring(p0 + 1); p0++;}
p0 = 0; while ((p0 = result.indexOf('"', p0)) >= 0) result = result.substring(0, p0) + "&quot;" + result.substring(p0 + 1);
p0 = 0; while ((p0 = result.indexOf("<", p0)) >= 0) result = result.substring(0, p0) + "&lt;" + result.substring(p0 + 1);
p0 = 0; while ((p0 = result.indexOf(">", p0)) >= 0) result = result.substring(0, p0) + "&gt;" + result.substring(p0 + 1);
//p0 = 0; while ((p0 = result.indexOf("*", p0)) >= 0) result = result.substring(0, p0) + "&#149;" + result.substring(p0 + 1);
p0 = 0; while ((p0 = result.indexOf("(C)", p0)) >= 0) result = result.substring(0, p0) + "&copy;" + result.substring(p0 + 3);
return result;
}
"\u0026","&amp;", //ampersand
"\\u0022","&quot;", //quotation mark
"\u003c","&lt;", //less than
"\u003e","&gt;", //greater than
"\u00a1","&iexcl;", //inverted (spanish) exclamation mark
"\u00a2","&cent;", //cent
"\u00a3","&pound;", //pound
"\u00a4","&curren;", //currency
"\u00a5","&yen;", //yen
"\u00a6","&brvbar;", //broken vertical bar
"\u00a7","&sect;", //section sign
"\u00a8","&uml;", //diaeresis (umlaut)
"\u00a9","&copy;", //copyright sign
"\u00aa","&ordf;", //feminine ordinal indicator
"\u00ab","&laquo;", //left-pointing double angle quotation mark
"\u00ac","&not;", //not sign
"\u00ad","&shy;", //soft hyphen
"\u00ae","&reg;", //registered sign
"\u00af","&macr;", //macron
"\u00b0","&deg;", //degree sign
"\u00b1","&plusmn;", //plus-minus sign
"\u00b2","&sup2;", //superscript two
"\u00b3","&sup3;", //superscript three
"\u00b4","&acute;", //acute accent
"\u00b5","&micro;", //micro sign
"\u00b6","&para;", //paragraph sign
"\u00b7","&middot;", //middle dot
"\u00b8","&cedil;", //cedilla
"\u00b9","&sup1;", //superscript one
"\u00ba","&ordm;", //masculine ordinal indicator
"\u00bb","&raquo;", //right-pointing double angle quotation mark
"\u00bc","&frac14;", //fraction 1/4
"\u00bd","&frac12;", //fraction 1/2
"\u00be","&frac34;", //fraction 3/4
"\u00bf","&iquest;", //inverted (spanisch) questionmark
"\u00c0","&Agrave;",
"\u00c1","&Aacute;",
"\u00c2","&Acirc;",
"\u00c3","&Atilde;",
"\u00c4","&Auml;",
"\u00c5","&Aring;",
"\u00c6","&AElig;",
"\u00c7","&Ccedil;",
"\u00c8","&Egrave;",
"\u00c9","&Eacute;",
"\u00ca","&Ecirc;",
"\u00cb","&Euml;",
"\u00cc","&Igrave;",
"\u00cd","&Iacute;",
"\u00ce","&Icirc;",
"\u00cf","&Iuml;",
"\u00d0","&ETH;",
"\u00d1","&Ntilde;",
"\u00d2","&Ograve;",
"\u00d3","&Oacute;",
"\u00d4","&Ocirc;",
"\u00d5","&Otilde;",
"\u00d6","&Ouml;",
"\u00d7","&times;",
"\u00d8","&Oslash;",
"\u00d9","&Ugrave;",
"\u00da","&Uacute;",
"\u00db","&Ucirc;",
"\u00dc","&Uuml;",
"\u00dd","&Yacute;",
"\u00de","&THORN;",
"\u00df","&szlig;",
"\u00e0","&agrave;",
"\u00e1","&aacute;",
"\u00e2","&acirc;",
"\u00e3","&atilde;",
"\u00e4","&auml;",
"\u00e5","&aring;",
"\u00e6","&aelig;",
"\u00e7","&ccedil;",
"\u00e8","&egrave;",
"\u00e9","&eacute;",
"\u00ea","&ecirc;",
"\u00eb","&euml;",
"\u00ec","&igrave;",
"\u00ed","&iacute;",
"\u00ee","&icirc;",
"\u00ef","&iuml;",
"\u00f0","&eth;",
"\u00e1","&ntilde;",
"\u00e2","&ograve;",
"\u00e3","&oacute;",
"\u00e4","&ocirc;",
"\u00e5","&otilde;",
"\u00e6","&ouml;",
"\u00e7","&divide;",
"\u00e8","&oslash;",
"\u00e9","&ugrave;",
"\u00ea","&uacute;",
"\u00eb","&ucirc;",
"\u00ec","&uuml;",
"\u00ed","&yacute;",
"\u00ee","&thorn;",
"\u00ef","&yuml;",
"(C)","&copy;"
};
//end contrib [FB] and [MN]
/** Replaces wiki tags with HTML tags.
*

Loading…
Cancel
Save