package de.anomic.data; public class htmlTools { /** Replaces special characters from a string. Avoids XSS attacks and ensures correct display of * special characters in non UTF-8 capable browsers. * @param text a string that possibly contains HTML * @return the string with all special characters encoded */ //[MN] public static String replaceHTML(String text) { text = replace(text, xmlentities); text = replace(text, htmlentities); return text; } /** Replaces special characters from a string. Ensures correct display of * special characters in non UTF-8 capable browsers. * @param text a string that possibly contains special characters * @return the string with all special characters encoded */ //[MN] public static String replaceHTMLEntities(String text) { text = replace(text, htmlentities); return text; } /** Replaces special characters from a string. Avoids XSS attacks. * @param text a string that possibly contains HTML * @return the string without any HTML-tags that can be used for XSS */ //[MN] public static String replaceXMLEntities(String text) { text = replace(text, xmlentities); return text; } /** Replaces characters in a string with other characters defined in an array. * @param text a string that possibly contains special characters * @param entities array that contains characters to be replaced and characters it will be replaced by * @return the string with all characters replaced by the corresponding character from array */ //[FB], changes by [MN] public static String replace(String text, String[] entities) { if (text==null) { return null; } for (int x=0;x<=entities.length-1;x=x+2) { int p=0; while ((p=text.indexOf(entities[x],p))>=0) { text=text.substring(0,p)+entities[x+1]+text.substring(p+entities[x].length()); p+=entities[x+1].length(); } } return text; } public static String deReplaceHTML(String text) { text = deReplaceHTMLEntities(text); text = deReplaceXMLEntities(text); return text; } public static String deReplaceHTMLEntities(String text) { return deReplace(text, htmlentities); } public static String deReplaceXMLEntities(String text) { return deReplace(text, xmlentities); } public static String deReplace(String text, String[] entities) { if (text == null) return null; for (int i=entities.length-1; i>0; i-=2) { int p = 0; while ((p = text.indexOf(entities[i])) >= 0) { text = text.substring(0, p) + entities[i - 1] + text.substring(p + entities[i].length()); p += entities[i - 1].length(); } } return text; } //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) //that will be replaced. To add new codes or patterns, just put them at the end //of the list. Codes or patterns in this list can not be escaped with [= or
public static final String[] xmlentities={ // Ampersands _have_ to be replaced first. If they were replaced later, // other replaced characters containing ampersands would get messed up. "\u0026","&", //ampersand "\"",""", //quotation mark "\u003C","<", //less than "\u003E",">", //greater than }; //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and //patterns that will be replaced. To add new codes or patterns, just put them at the end //of the list. Codes or patterns in this list can not be escaped with [= orpublic static final String[] htmlentities={ "\u005E","^", // Caret "\u0060","`", // Accent Grave ` "\u007B","{", // { "\u007C","|", // | "\u007D","}", // } "\u007E","~", // ~ "\u0082","", "\u0083","", "\u0084","", "\u0085"," ", "\u0086","", "\u0087","", "\u0088","", "\u0089","", "\u008A","", "\u008B","", "\u008C","", "\u008D","", "\u008E","", "\u0091","", "\u0092","", "\u0093","", "\u0094","", "\u0095","", "\u0096","", "\u0097","", "\u0098","", "\u0099","", "\u009A","", "\u009B","", "\u009C","", "\u009D","", "\u009E","", "\u009F","", "\u00A1","¡", //inverted (spanish) exclamation mark "\u00A2","¢", //cent "\u00A3","£", //pound "\u00A4","¤", //currency "\u00A5","¥", //yen "\u00A6","¦", //broken vertical bar "\u00A7","§", //section sign "\u00A8","¨", //diaeresis (umlaut) "\u00A9","©", //copyright sign "\u00AA","ª", //feminine ordinal indicator "\u00AB","«", //left-pointing double angle quotation mark "\u00AC","¬", //not sign "\u00AD","", //soft hyphen "\u00AE","®", //registered sign "\u00AF","¯", //macron "\u00B0","°", //degree sign "\u00B1","±", //plus-minus sign "\u00B2","²", //superscript two "\u00B3","³", //superscript three "\u00B4","´", //acute accent "\u00B5","µ", //micro sign "\u00B6","¶", //paragraph sign "\u00B7","·", //middle dot "\u00B8","¸", //cedilla "\u00B9","¹", //superscript one "\u00BA","º", //masculine ordinal indicator "\u00BB","»", //right-pointing double angle quotation mark "\u00BC","¼", //fraction 1/4 "\u00BD","½", //fraction 1/2 "\u00BE","¾", //fraction 3/4 "\u00BF","¿", //inverted (spanisch) questionmark "\u00C0","À", "\u00C1","Á", "\u00C2","Â", "\u00C3","Ã", "\u00C4","Ä", "\u00C5","Å", "\u00C6","Æ", "\u00C7","Ç", "\u00C8","È", "\u00C9","É", "\u00CA","Ê", "\u00CB","Ë", "\u00CC","Ì", "\u00CD","Í", "\u00CE","Î", "\u00CF","Ï", "\u00D0","Ð", "\u00D1","Ñ", "\u00D2","Ò", "\u00D3","Ó", "\u00D4","Ô", "\u00D5","Õ", "\u00D6","Ö", "\u00D7","×", "\u00D8","Ø", "\u00D9","Ù", "\u00DA","Ú", "\u00DB","Û", "\u00DC","Ü", "\u00DD","Ý", "\u00DE","Þ", "\u00DF","ß", "\u00E0","à", "\u00E1","á", "\u00E2","â", "\u00E3","ã", "\u00E4","ä", "\u00E5","å", "\u00E6","æ", "\u00E7","ç", "\u00E8","è", "\u00E9","é", "\u00EA","ê", "\u00EB","ë", "\u00EC","ì", "\u00ED","í", "\u00EE","î", "\u00EF","ï", "\u00F0","ð", "\u00F1","ñ", "\u00F2","ò", "\u00F3","ó", "\u00F4","ô", "\u00F5","õ", "\u00F6","ö", "\u00F7","÷", "\u00F8","ø", "\u00F9","ù", "\u00FA","ú", "\u00FB","û", "\u00FC","ü", "\u00FD","ý", "\u00FE","þ", "\u00FF","ÿ" }; }