package de.anomic.data; public class htmlTools { /** Replaces special characters from a string. Avoids XSS attacks and ensures correct display of * special characters in non UTF-8 capable browsers. * @param text a string that possibly contains HTML * @return the string with all special characters encoded */ //[MN] public static String replaceHTML(String text) { text = replace(text, xmlentities); text = replace(text, htmlentities); return text; } /** Replaces special characters from a string. Ensures correct display of * special characters in non UTF-8 capable browsers. * @param text a string that possibly contains special characters * @return the string with all special characters encoded */ //[MN] public static String replaceHTMLEntities(String text) { text = replace(text, htmlentities); return text; } /** Replaces special characters from a string. Avoids XSS attacks. * @param text a string that possibly contains HTML * @return the string without any HTML-tags that can be used for XSS */ //[MN] public static String replaceXMLEntities(String text) { text = replace(text, xmlentities); return text; } /** Replaces characters in a string with other characters defined in an array. * @param text a string that possibly contains special characters * @param entities array that contains characters to be replaced and characters it will be replaced by * @return the string with all characters replaced by the corresponding character from array */ //[FB], changes by [MN] public static String replace(String text, String[] entities) { if (text==null) { return null; } for (int x=0;x<=entities.length-1;x=x+2) { int p=0; while ((p=text.indexOf(entities[x],p))>=0) { text=text.substring(0,p)+entities[x+1]+text.substring(p+entities[x].length()); p+=entities[x+1].length(); } } return text; } public static String deReplaceHTML(String text) { text = deReplaceHTMLEntities(text); text = deReplaceXMLEntities(text); return text; } public static String deReplaceHTMLEntities(String text) { return deReplace(text, htmlentities); } public static String deReplaceXMLEntities(String text) { return deReplace(text, xmlentities); } public static String deReplace(String text, String[] entities) { if (text == null) return null; for (int i=entities.length-1; i>0; i-=2) { int p = 0; while ((p = text.indexOf(entities[i])) >= 0) { text = text.substring(0, p) + entities[i - 1] + text.substring(p + entities[i].length()); p += entities[i - 1].length(); } } return text; } //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) //that will be replaced. To add new codes or patterns, just put them at the end //of the list. Codes or patterns in this list can not be escaped with [= or
    public static final String[] xmlentities={
        // Ampersands _have_ to be replaced first. If they were replaced later,
        // other replaced characters containing ampersands would get messed up.
        "\u0026","&",      //ampersand
        "\"",""",         //quotation mark
        "\u003C","<",       //less than
        "\u003E",">",       //greater than
    };

    //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and
    //patterns that will be replaced. To add new codes or patterns, just put them at the end
    //of the list. Codes or patterns in this list can not be escaped with [= or 
    public static final String[] htmlentities={
        "\u005E","^",  // Caret

        "\u0060","`",  // Accent Grave `
        "\u007B","{",  // {
        "\u007C","|",  // |
        "\u007D","}",  // }
        "\u007E","~",  // ~

        "\u0082","‚",
        "\u0083","ƒ",
        "\u0084","„",
        "\u0085","…",
        "\u0086","†",
        "\u0087","‡",
        "\u0088","ˆ",
        "\u0089","‰",
        "\u008A","Š",
        "\u008B","‹",
        "\u008C","Œ",
        "\u008D","",
        "\u008E","Ž",

        "\u0091","‘",
        "\u0092","’",
        "\u0093","“",
        "\u0094","”",
        "\u0095","•",
        "\u0096","–",
        "\u0097","—",
        "\u0098","˜",
        "\u0099","™",
        "\u009A","š",
        "\u009B","›",
        "\u009C","œ",
        "\u009D","",
        "\u009E","ž",
        "\u009F","Ÿ",

        "\u00A1","¡",    //inverted (spanish) exclamation mark
        "\u00A2","¢",     //cent
        "\u00A3","£",    //pound
        "\u00A4","¤",   //currency
        "\u00A5","¥",      //yen
        "\u00A6","¦",   //broken vertical bar
        "\u00A7","§",     //section sign
        "\u00A8","¨",      //diaeresis (umlaut)
        "\u00A9","©",     //copyright sign
        "\u00AA","ª",     //feminine ordinal indicator
        "\u00AB","«",    //left-pointing double angle quotation mark
        "\u00AC","¬",      //not sign
        "\u00AD","­",      //soft hyphen
        "\u00AE","®",      //registered sign
        "\u00AF","¯",     //macron
        "\u00B0","°",      //degree sign
        "\u00B1","±",   //plus-minus sign
        "\u00B2","²",     //superscript two
        "\u00B3","³",     //superscript three
        "\u00B4","´",    //acute accent
        "\u00B5","µ",    //micro sign
        "\u00B6","¶",     //paragraph sign
        "\u00B7","·",   //middle dot
        "\u00B8","¸",    //cedilla
        "\u00B9","¹",     //superscript one
        "\u00BA","º",     //masculine ordinal indicator
        "\u00BB","»",    //right-pointing double angle quotation mark
        "\u00BC","¼",   //fraction 1/4
        "\u00BD","½",   //fraction 1/2
        "\u00BE","¾",   //fraction 3/4
        "\u00BF","¿",   //inverted (spanisch) questionmark
        "\u00C0","À",
        "\u00C1","Á",
        "\u00C2","Â",
        "\u00C3","Ã",
        "\u00C4","Ä",
        "\u00C5","Å",
        "\u00C6","Æ",
        "\u00C7","Ç",
        "\u00C8","È",
        "\u00C9","É",
        "\u00CA","Ê",
        "\u00CB","Ë",
        "\u00CC","Ì",
        "\u00CD","Í",
        "\u00CE","Î",
        "\u00CF","Ï",
        "\u00D0","Ð",
        "\u00D1","Ñ",
        "\u00D2","Ò",
        "\u00D3","Ó",
        "\u00D4","Ô",
        "\u00D5","Õ",
        "\u00D6","Ö",
        "\u00D7","×",
        "\u00D8","Ø",
        "\u00D9","Ù",
        "\u00DA","Ú",
        "\u00DB","Û",
        "\u00DC","Ü",
        "\u00DD","Ý",
        "\u00DE","Þ",
        "\u00DF","ß",
        "\u00E0","à",
        "\u00E1","á",
        "\u00E2","â",
        "\u00E3","ã",
        "\u00E4","ä",
        "\u00E5","å",
        "\u00E6","æ",
        "\u00E7","ç",
        "\u00E8","è",
        "\u00E9","é",
        "\u00EA","ê",
        "\u00EB","ë",
        "\u00EC","ì",
        "\u00ED","í",
        "\u00EE","î",
        "\u00EF","ï",
        "\u00F0","ð",
        "\u00F1","ñ",
        "\u00F2","ò",
        "\u00F3","ó",
        "\u00F4","ô",
        "\u00F5","õ",
        "\u00F6","ö",
        "\u00F7","÷",
        "\u00F8","ø",
        "\u00F9","ù",
        "\u00FA","ú",
        "\u00FB","û",
        "\u00FC","ü",
        "\u00FD","ý",
        "\u00FE","þ",
        "\u00FF","ÿ"
    };
}