From c45517db466ddf46c8f6c2289a395db2226fa3e9 Mon Sep 17 00:00:00 2001 From: low012 Date: Mon, 23 Jan 2006 13:59:40 +0000 Subject: [PATCH] *) replaced code for table with better version (by kane) *) split replaceHTML into replaceCharacters and replaceHTMLonly, replaceHTML can still be used to ensure compatibility git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1416 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/wikiCode.java | 147 +++++++++++++++++++--------- 1 file changed, 101 insertions(+), 46 deletions(-) diff --git a/source/de/anomic/data/wikiCode.java b/source/de/anomic/data/wikiCode.java index 2b236dac3..511625421 100644 --- a/source/de/anomic/data/wikiCode.java +++ b/source/de/anomic/data/wikiCode.java @@ -62,14 +62,16 @@ public class wikiCode { private String ListLevel=""; private String defListLevel=""; private plasmaSwitchboard sb; + private boolean cellprocessing=false; // needed for prevention of double-execution of replaceHTML private boolean escape = false; //needed for escape private boolean escaped = false; //needed for
 not getting in the way
     private boolean escapeSpan = false;       //needed for escape symbols [= and =] spanning over several lines
-    private boolean newrowstart=false;      // needed for the first row not to be empty
+    private boolean newrowstart=false;        // needed for the first row not to be empty
     private boolean preformatted = false;     //needed for preformatted text
     private boolean preformattedSpan = false; //needed for 
 and 
spanning over several lines - private boolean replaced = false; //indicates if method replaceHTML has been used with line already - private boolean table=false; // needed for tables, because they reach over several lines + private boolean replacedHTML = false; //indicates if method replaceHTML has been used with line already + private boolean replacedCharacters = false; //indicates if method replaceCharachters has been used with line + private boolean table=false; // needed for tables, because they reach over several lines private int preindented = 0; //needed for indented
s
     private int escindented = 0;              //needed for indented [=s
 
@@ -112,35 +114,73 @@ public class wikiCode {
         }
     }
 
-    /** Replaces special characters from a string. Otherwise they might cause ugly output on some systems.
-      * This code is also important to avoid XSS attacks.
-      *
+    /** Replaces special characters from a string. Avoids XSS attacks and ensures correct display of
+      * special characters in non UTF-8 capable browsers.
+      * @param text a string that possibly contains HTML
+      * @return the string with all special characters encoded
+      * @author Marc Nause, replaces code by Alexander Schier
+      */
+    public static String replaceHTML(String text) {
+        text = replace(text, characters);
+        text = replace(text, htmlentities);
+        return text;
+    }
+
+    /** Replaces special characters from a string. Ensures correct display of
+      * special characters in non UTF-8 capable browsers.
       * @param text a string that possibly contains special characters
-      * @return the string with all special characters encoded so they will look right on every system
+      * @return the string with all special characters encoded
+      * @author Marc Nause, replaces code by Alexander Schier
+      */
+    public static String replaceCharacters(String text) {
+        text = replace(text, characters);
+        return text;
+    }
+
+    /** Replaces special characters from a string. Avoids XSS attacks.
+      * @param text a string that possibly contains HTML
+      * @return the string without any HTML-tags that can be used for XSS
+      * @author Marc Nause, replaces code by Alexander Schier
+      */
+    public static String replaceHTMLonly(String text) {
+        text = replace(text, htmlentities);
+        return text;
+    }
+
+    /** Replaces characters in a string with other characters defined in an array.
+      * @param text a string that possibly contains special characters
+      * @param entities array that contains characters to be replaced and characters it will be replaced by
+      * @return the string with all characters replaced by the corresponding character from array
       * @author Franz Brausse, few changes by Marc Nause, replaces code by Alexander Schier
       */
-    public static String replaceHTML(String text) {
+    public static String replace(String text, String[] entities) {
         if (text==null) { return null; }
-        for (int x=0;x<=htmlentities.length-1;x=x+2) {
+        for (int x=0;x<=entities.length-1;x=x+2) {
             int p=0;
-            while ((p=text.indexOf(htmlentities[x],p))>=0) {
-                text=text.substring(0,p)+htmlentities[x+1]+text.substring(p+htmlentities[x].length());
-                p+=htmlentities[x+1].length();
+            while ((p=text.indexOf(entities[x],p))>=0) {
+                text=text.substring(0,p)+entities[x+1]+text.substring(p+entities[x].length());
+                p+=entities[x+1].length();
             }
         }
         return text;
     }
 
-    //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and
-    //patterns that will be replaced. To add new codes or patterns, just put them at the end
+    //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) 
+    //that will be replaced. To add new codes or patterns, just put them at the end
     //of the list. Codes or patterns in this list can not be escaped with [= or 
     public static String[] htmlentities={
         // Ampersands _have_ to be replaced first. If they were replaced later,
         // other replaced characters containing ampersands would get messed up.
-        "\u0026","&",      //ampersand
         "\"",""",         //quotation mark
         "\u003C","<",       //less than
         "\u003E",">",       //greater than
+    };
+
+    //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and
+    //patterns that will be replaced. To add new codes or patterns, just put them at the end
+    //of the list. Codes or patterns in this list can not be escaped with [= or 
+    public static String[] characters={
+        "\u0026","&",      //ampersand
         "\u00A1","¡",    //inverted (spanish) exclamation mark
         "\u00A2","¢",     //cent
         "\u00A3","£",    //pound
@@ -242,61 +282,74 @@ public class wikiCode {
     /** This method processes tables in the wiki code.
       * @param a string that might contain parts of a table
       * @return a string with wiki code of parts of table replaced by HTML code for table
-      * @author Franz Brausse, slight changes by Marc Nause
+      * @author Franz Brausse, changes by Marc Nause
       */
-    private String processTable(String result){
+    private String processTable(String result, plasmaSwitchboard switchboard){
         String line="";
-        if (result.startsWith("{|") && (!table)) {                // Table begin
+        if ((result.startsWith("{|")) && (!table)) {
             table=true;
             newrowstart=true;
-            line+="2) {
-                line+=result.substring(2);
+                line+=result.substring(2).replaceAll(""","\"");
             }
             line+=">";
-        } else if (result.startsWith("|-") && (table)) {          // new row
+            result=line;
+        }
+        else if (result.startsWith("|-") && (table)) {          // new row
             if (!newrowstart) {
                 line+="\t\n";
             } else {
                 newrowstart=false;
             }
             line=line+"\t";
-        } else if ((result.startsWith("| ")) && (table)) {        // new cell
+            result=line;
+        }
+        else if ((result.startsWith("||")) && (table)) {
+            result = replaceHTMLonly(result);
             line+="\t\t=0)?(result.indexOf("||")):(result.length());
-            if ((propEnd=result.indexOf(" | "))>0) {              // till result.indexOf(" | ") properties for cell
-                line+=result.substring(1,propEnd).replaceAll(""","\"");
+            int cellEnd=(result.indexOf("||",2)>0)?(result.indexOf("||",2)):(result.length());
+            int propEnd=(result.indexOf("|",2)>0)?(result.indexOf("|",2)):(cellEnd);
+            // both point at same place => new line
+            if (propEnd==cellEnd) {
+                propEnd=1;
+            } else {
+                line+=" "+result.substring(2,propEnd).trim().replaceAll(""","\"");
             }
-            // finish first cell
-            line+=">"+result.substring(propEnd+2,textEnd)+"";
-            if (textEnd0) {           // process other cells if existent
-                line+="\n"+result.substring(textEnd+1);
-            } 
-        } else if (result.startsWith("|}") && (table)) {          // Table end
+            table=false; cellprocessing=true;
+            line+=">"+processTable(result.substring(propEnd+1,cellEnd).trim(), switchboard)+"";
+            table=true; cellprocessing=false;
+            if (cellEnd wiki-tags in cells can be treated after that
+            result = this.processTable(result, switchboard);
+
             // format lines
             if (result.startsWith(" ")) result = "" + result + "";
             if (result.startsWith("----")) result = "
"; @@ -673,10 +729,9 @@ public class wikiCode { } //end contrib [MN] - result = this.processTable(result); - - replaced = false; - if ((result.endsWith(""))||(defList)||(escape)||(preformatted)||(table)) return result; + replacedHTML = false; + replacedCharacters = false; + if ((result.endsWith(""))||(defList)||(escape)||(preformatted)||(table)||(cellprocessing)) return result; return result + "
"; } }