From 39a2000d8bd70905cd2d6523e3c613618593382c Mon Sep 17 00:00:00 2001 From: karlchenofhell Date: Sat, 24 Feb 2007 21:26:48 +0000 Subject: [PATCH] - added support for [[Bookmark:$bookmarkTag|description]]-link-listings (requested by theli) to wiki-parser - added support for
-tags to wiki-parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3393 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 .../anomic/data/wiki/WikiParserException.java |  20 +++
 .../data/wiki/tokens/AbstractToken.java       |   4 +-
 .../de/anomic/data/wiki/tokens/LinkToken.java | 139 +++++++++++++-----
 .../de/anomic/data/wiki/tokens/ListToken.java |   3 +-
 .../anomic/data/wiki/tokens/SimpleToken.java  |  13 +-
 .../anomic/data/wiki/tokens/TableToken.java   |  41 ++----
 source/de/anomic/data/wiki/wikiParser.java    | 102 +++++++------
 7 files changed, 201 insertions(+), 121 deletions(-)
 create mode 100644 source/de/anomic/data/wiki/WikiParserException.java

diff --git a/source/de/anomic/data/wiki/WikiParserException.java b/source/de/anomic/data/wiki/WikiParserException.java
new file mode 100644
index 000000000..ce2769111
--- /dev/null
+++ b/source/de/anomic/data/wiki/WikiParserException.java
@@ -0,0 +1,20 @@
+package de.anomic.data.wiki;
+
+public class WikiParserException extends RuntimeException {
+    
+    private static final long serialVersionUID = 1L;
+    
+    public WikiParserException() {  }
+    
+    public WikiParserException(String message) {
+        super(message);
+    }
+    
+    public WikiParserException(Throwable cause) {
+        super(cause);
+    }
+    
+    public WikiParserException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
diff --git a/source/de/anomic/data/wiki/tokens/AbstractToken.java b/source/de/anomic/data/wiki/tokens/AbstractToken.java
index 018d9b7b6..dbffa1b6a 100644
--- a/source/de/anomic/data/wiki/tokens/AbstractToken.java
+++ b/source/de/anomic/data/wiki/tokens/AbstractToken.java
@@ -53,12 +53,12 @@ public abstract class AbstractToken implements Token {
 	protected String markup = null;
 	protected boolean parsed = false;
 	
-	protected abstract boolean parse();
+	protected abstract void parse();
 	
 	public String getMarkup() {
 		if (this.text == null)
 			throw new IllegalArgumentException();
-		if (!this.parsed && !parse()) return this.text;
+		if (!this.parsed) parse();
 		return this.markup;
 	}
 	
diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java
index 9ada22ef4..74e6aa84a 100644
--- a/source/de/anomic/data/wiki/tokens/LinkToken.java
+++ b/source/de/anomic/data/wiki/tokens/LinkToken.java
@@ -47,14 +47,23 @@
 
 package de.anomic.data.wiki.tokens;
 
+import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import de.anomic.data.bookmarksDB;
+import de.anomic.data.bookmarksDB.Bookmark;
+import de.anomic.data.bookmarksDB.Tag;
+import de.anomic.data.wiki.WikiParserException;
+import de.anomic.plasma.plasmaSwitchboard;
+
 public class LinkToken extends AbstractToken {
 	
 	private static final int IMG = 0;
-	private static final int INT = 1;
-	private static final int EXT = 2;
+    private static final int BKM = 1;
+	private static final int INT = 2;
+	private static final int EXT = 3;
 	
 	private static final Pattern imgPattern = Pattern.compile(
 			"\\[\\[" +											// begin
@@ -64,6 +73,12 @@ public class LinkToken extends AbstractToken {
 				"(\\|(([^\\]]|\\][^\\]])*))" +					// description
 			")?" +												// 
 			"\\]\\]");											// end
+    
+    private static final Pattern bkmPattern = Pattern.compile(
+            "\\[\\[" +                                          // begin
+            "(Bookmark:([^\\]|]|\\][^\\]])*)" +                 // "Bookmark:" + URL
+            "(\\|(([^\\]]|\\][^\\]])*?))?" +                    // optional description
+            "\\]\\]");                                          // end 
 	
 	private static final Pattern intPattern = Pattern.compile(
 			"\\[\\[" +											// begin
@@ -78,66 +93,114 @@ public class LinkToken extends AbstractToken {
 			"\\]");												// end
 	
 	private static final Pattern[] patterns = new Pattern[] {
-		imgPattern, intPattern, extPattern };
+		imgPattern, bkmPattern, intPattern, extPattern };
 	
 	private final String localhost;
 	private final String wikiPath;
+    private final plasmaSwitchboard sb;
 	private int patternNr = 0;
 	
-	public LinkToken(String localhost, String wikiPath) {
+	public LinkToken(String localhost, String wikiPath, plasmaSwitchboard sb) {
 		this.localhost = localhost;
 		this.wikiPath = wikiPath;
+        this.sb = sb;
 	}
 	
-	protected boolean parse() {
+	protected void parse() {
 		StringBuffer sb = new StringBuffer();
-		Matcher m;
-		switch (this.patternNr) {
+        if (this.patternNr < 0 || this.patternNr >= patterns.length)
+            throw new WikiParserException("patternNr was not set correctly: " + this.patternNr);
+		Matcher m = patterns[this.patternNr].matcher(this.text);
+        if (!m.find())
+            throw new WikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text);
+        
+        switch (this.patternNr) {
 			case IMG:
-				m = imgPattern.matcher(this.text);
-				if (!m.find()) return false;
-				sb.append("\"").append(m.group(7)).append("\"");");
 				break;
+                
+            case BKM:
+                Link[] links = getLinksFromBookmarkTag(m.group(2));
+                if (links == null) {
+                    sb.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'.");
+                } else {
+                    appendLinks(links, sb);
+                }
+                break;
 				
 			case INT:
-				m = intPattern.matcher(this.text);
-				if (!m.find()) return false;
-				sb.append("");
-				if (m.group(4) != null) sb.append(m.group(4)); else sb.append(m.group(1));
-				sb.append("");
+				sb.append(new Link(
+                                "http://" + this.localhost + "/" + this.wikiPath + m.group(1),
+                                m.group(4),
+                                (m.group(4) == null) ? m.group(1) : m.group(4)
+                        ).toString());
 				break;
 				
 			case EXT:
-				m = extPattern.matcher(this.text);
-				if (!m.find()) return false;
-				sb.append("");
-				if (m.group(3) != null) sb.append(m.group(3)); else sb.append(m.group(1));
-				sb.append("");
+				sb.append(new Link(
+                                m.group(1),
+                                m.group(3),
+                                (m.group(3) == null) ? m.group(1) : m.group(3)
+                        ).toString());
 				break;
-				
-			default: return false;
 		}
 		this.parsed = true;
 		this.markup = new String(sb);
-		return true;
-	}
-	
-	private String formatLink(String link) {
-		if (link.indexOf("://") == -1) {		// DATA/HTDOCS-link
-			return "http://" + this.localhost + "/" + link;
-		} else {								// 'normal' link
-			return link;
-		}
 	}
+    
+    private String formatHref(String link) {
+        if (link.indexOf("://") == -1) {        // DATA/HTDOCS-link
+            return "http://" + this.localhost + "/share/" + link;
+        } else {                                // 'normal' link
+            return link;
+        }
+    }
+    
+    private StringBuffer appendLinks(Link[] links, StringBuffer sb) {
+        for (int i=0; i");
+            if (this.desc == null) sb.append(this.href); else sb.append(this.desc);
+            sb.append("");
+            return new String(sb);
+        }
+    }
 	
 	public String[] getBlockElementNames() { return null; }
 	public Pattern[] getRegex() { return patterns; }
diff --git a/source/de/anomic/data/wiki/tokens/ListToken.java b/source/de/anomic/data/wiki/tokens/ListToken.java
index 2055cb9db..16b7d7113 100644
--- a/source/de/anomic/data/wiki/tokens/ListToken.java
+++ b/source/de/anomic/data/wiki/tokens/ListToken.java
@@ -104,12 +104,11 @@ public class ListToken extends AbstractToken {
 		blockElements = (String[])r.toArray(new String[r.size()]);
 	}
 	
-	protected boolean parse() {
+	protected void parse() {
 		StringBuffer sb = new StringBuffer(this.text.length());
 		parse(this.text.split("\n"), 0, sb);
 		this.markup = new String(sb);
 		this.parsed = true;
-		return true;
 	}
 	
 	protected StringBuffer parse(String[] t, int depth, StringBuffer sb) {
diff --git a/source/de/anomic/data/wiki/tokens/SimpleToken.java b/source/de/anomic/data/wiki/tokens/SimpleToken.java
index c2c3e4be1..2290b9d02 100644
--- a/source/de/anomic/data/wiki/tokens/SimpleToken.java
+++ b/source/de/anomic/data/wiki/tokens/SimpleToken.java
@@ -51,6 +51,8 @@ import java.util.ArrayList;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import de.anomic.data.wiki.WikiParserException;
+
 public class SimpleToken extends AbstractToken {
 	
 	protected String content = null;
@@ -94,19 +96,16 @@ public class SimpleToken extends AbstractToken {
 				setText(this.text, 0);
 			}
 		}
-		if (!this.parsed && !parse()) return this.text;
+		if (!this.parsed) try { parse(); } catch (WikiParserException e) { return this.text; }
 		return this.markup;
 	}
 	
-	protected boolean parse() {
+	protected void parse() {
 		String[] e;
-		if ((e = definitionList[this.grade]) == null || definitionList.length <= this.grade) {
-			System.err.println("token not defined for grade: " + this.grade);
-			return false;
-		}
+		if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null)
+		    throw new WikiParserException("Token not defined for grade: " + this.grade);
 		this.markup = getMarkup(e);
 		this.parsed = true;
-		return true;
 	}
 	
 	protected String getMarkup(String[] es) {
diff --git a/source/de/anomic/data/wiki/tokens/TableToken.java b/source/de/anomic/data/wiki/tokens/TableToken.java
index abf766476..a9aee689d 100644
--- a/source/de/anomic/data/wiki/tokens/TableToken.java
+++ b/source/de/anomic/data/wiki/tokens/TableToken.java
@@ -48,7 +48,6 @@
 package de.anomic.data.wiki.tokens;
 
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.regex.Pattern;
 
 public class TableToken extends AbstractToken {
@@ -61,7 +60,7 @@ public class TableToken extends AbstractToken {
 	};
 	private static final String[] blockElementNames = new String[] { "table", "tr", "td" };
 	
-	protected boolean parse() {
+	protected void parse() {
 		String[] t = text.split("\n");
 		String[] tds;
 		StringBuffer sb = new StringBuffer();
@@ -87,7 +86,6 @@ public class TableToken extends AbstractToken {
 		if (trOpen) sb.append("\t\n");
 		this.markup =  new String(sb.append(""));
 		this.parsed = true;
-		return true;
 	}
 	
     // from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB]
@@ -105,45 +103,38 @@ public class TableToken extends AbstractToken {
       * Valid in this case means if they are a property for the table, tr or td
       * tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
       * The method is important to avoid XSS attacks on the wiki via table properties.
-      * @param str A string that may contain several table properties and/or junk.
+      * @param properties A string that may contain several table properties and/or junk.
       * @return A string that only contains table properties.
       */
     private static StringBuffer parseTableProperties(final String properties){
         String[] values = properties.replaceAll(""", "").split("[= ]");     //splitting the string at = and blanks
         StringBuffer sb = new StringBuffer(properties.length());
-        Iterator it;
-        String key, valkey, value;
+        String key, value;
+        String[] posVals;
         int numberofvalues = values.length;
-        main: for (int i=0; i 1) {
-						r.add(tokens[i].getBlockElementNames()[j].substring(0, k));
-					} else {
-						r.add(tokens[i].getBlockElementNames()[j]);
-					}
-				}
-		r.add("hr");
-		BEs = (String[])r.toArray(new String[r.size()]);
-	}
+	public final Token[] tokens;
+	private final String[] BEs;
+    
+    public wikiParser(plasmaSwitchboard sb) {
+        tokens = new Token[] {
+                new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true),
+                new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false),
+                new LinkToken("localhost:8080"/*yacyCore.seedDB.mySeed.getAddress()*/, "Wiki.html?page=", sb),
+                new ListToken('*', "ul"),
+                new ListToken('#', "ol"),
+                new ListToken(':', "blockquote", null),
+                new ListToken(' ', null, "tt", false),
+                new DefinitionListToken(),
+                new TableToken()
+        };
+        ArrayList r = new ArrayList();
+        for (int i=0, k, j; i 1) {
+                        r.add(tokens[i].getBlockElementNames()[j].substring(0, k));
+                    } else {
+                        r.add(tokens[i].getBlockElementNames()[j]);
+                    }
+                }
+        r.add("hr");
+        BEs = (String[])r.toArray(new String[r.size()]);
+    }
 	
 	public static void main(String[] args) {
-		String text = "===Title===\n" +
-				"==blubb[== was ==ein '''shice'''==...och.bla\n" +
+		String text = "===T
itle===\n" +
+				"==blubb== was ==ein '''shice'''==...och.bla\n" +
 				"* ein \n" +
-				"*==test==\n" +
+				"*==test=
=\n" + "** doppelt\n" + "* ''tess*sst''\n" + "*** xyz\n" + @@ -118,29 +121,31 @@ public class wikiParser { ":doppel-blubb[= huch =]\n" + ";hier:da\n" + ";dort:und so\n" + - ";;und:doppelt"; + ";;und:doppelt\n\n\n\n" + + "[[Image:blubb|BLA]]"; // text = "[=\n=]* bla"; String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + "[=denk ich=] mal =]"; long l = System.currentTimeMillis(); - t = parse((args.length > 0) ? args[0] : text); + t = new wikiParser(null).parse((args.length > 0) ? args[0] : text); System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); System.out.println("--- --- ---"); System.out.println(t); } - // TODO: - // - preParse: - // -
~
- - public static String parse(String text) { + public String parse(String text) { Text[] tt = Text.split2Texts(text, "[=", "=]"); for (int i=0; i", "
"); + for (int i=0; i"); } - private static String replaceBRs(String text) { + private String replaceBRs(String text) { StringBuffer sb = new StringBuffer(text.length()); String[] tt = text.split("\n"); boolean replace; @@ -175,8 +180,10 @@ public class wikiParser { for (j=0; j")) { replace = false; break; } sb.append(tt[i]); - if (replace && i < tt.length - 1) sb.append("
"); - if (i < tt.length - 1) sb.append("\n"); + if (i < tt.length - 1) { + if (replace) sb.append("
"); + sb.append("\n"); + } } return new String(sb); } @@ -193,7 +200,7 @@ public class wikiParser { this.text = text; this.escaped = escaped; this.nl = newLineBefore; - } + } public String setTextPlain(String text) { return this.text = text; } public String setText(String text) { @@ -215,22 +222,23 @@ public class wikiParser { public String toString() { return this.text; } public boolean isEscaped() { return this.escaped; } public boolean isNewLineBefore() { return this.nl; } - + private static Text[] split2Texts(String text, String escapeBegin, String escapeEnd) { if (text == null) return null; if (text.length() < 2) return new Text[] { new Text(text, false, true) }; int startLen = escapeBegin.length(); + int endLen = escapeEnd.length(); ArrayList r = new ArrayList(); boolean escaped = text.startsWith(escapeBegin); if (escaped) r.add(new Text("", false, true)); int i, j = 0; while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { - r.add(resolve2Text(text, escaped, (j > 0) ? j + startLen : 0, i, escapeEnd)); + r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); j = i; escaped = !escaped; } - r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + startLen : 0, -1, escapeEnd)); + r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); return (Text[])r.toArray(new Text[r.size()]); }