diff --git a/source/de/anomic/data/wiki/WikiParserException.java b/source/de/anomic/data/wiki/WikiParserException.java new file mode 100644 index 000000000..ce2769111 --- /dev/null +++ b/source/de/anomic/data/wiki/WikiParserException.java @@ -0,0 +1,20 @@ +package de.anomic.data.wiki; + +public class WikiParserException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public WikiParserException() { } + + public WikiParserException(String message) { + super(message); + } + + public WikiParserException(Throwable cause) { + super(cause); + } + + public WikiParserException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/source/de/anomic/data/wiki/tokens/AbstractToken.java b/source/de/anomic/data/wiki/tokens/AbstractToken.java index 018d9b7b6..dbffa1b6a 100644 --- a/source/de/anomic/data/wiki/tokens/AbstractToken.java +++ b/source/de/anomic/data/wiki/tokens/AbstractToken.java @@ -53,12 +53,12 @@ public abstract class AbstractToken implements Token { protected String markup = null; protected boolean parsed = false; - protected abstract boolean parse(); + protected abstract void parse(); public String getMarkup() { if (this.text == null) throw new IllegalArgumentException(); - if (!this.parsed && !parse()) return this.text; + if (!this.parsed) parse(); return this.markup; } diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java index 9ada22ef4..74e6aa84a 100644 --- a/source/de/anomic/data/wiki/tokens/LinkToken.java +++ b/source/de/anomic/data/wiki/tokens/LinkToken.java @@ -47,14 +47,23 @@ package de.anomic.data.wiki.tokens; +import java.util.ArrayList; +import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; +import de.anomic.data.bookmarksDB; +import de.anomic.data.bookmarksDB.Bookmark; +import de.anomic.data.bookmarksDB.Tag; +import de.anomic.data.wiki.WikiParserException; +import de.anomic.plasma.plasmaSwitchboard; + public class LinkToken extends AbstractToken { private static final int IMG = 0; - private static final int INT = 1; - private static final int EXT = 2; + private static final int BKM = 1; + private static final int INT = 2; + private static final int EXT = 3; private static final Pattern imgPattern = Pattern.compile( "\\[\\[" + // begin @@ -64,6 +73,12 @@ public class LinkToken extends AbstractToken { "(\\|(([^\\]]|\\][^\\]])*))" + // description ")?" + // "\\]\\]"); // end + + private static final Pattern bkmPattern = Pattern.compile( + "\\[\\[" + // begin + "(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL + "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description + "\\]\\]"); // end private static final Pattern intPattern = Pattern.compile( "\\[\\[" + // begin @@ -78,66 +93,114 @@ public class LinkToken extends AbstractToken { "\\]"); // end private static final Pattern[] patterns = new Pattern[] { - imgPattern, intPattern, extPattern }; + imgPattern, bkmPattern, intPattern, extPattern }; private final String localhost; private final String wikiPath; + private final plasmaSwitchboard sb; private int patternNr = 0; - public LinkToken(String localhost, String wikiPath) { + public LinkToken(String localhost, String wikiPath, plasmaSwitchboard sb) { this.localhost = localhost; this.wikiPath = wikiPath; + this.sb = sb; } - protected boolean parse() { + protected void parse() { StringBuffer sb = new StringBuffer(); - Matcher m; - switch (this.patternNr) { + if (this.patternNr < 0 || this.patternNr >= patterns.length) + throw new WikiParserException("patternNr was not set correctly: " + this.patternNr); + Matcher m = patterns[this.patternNr].matcher(this.text); + if (!m.find()) + throw new WikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text); + + switch (this.patternNr) { case IMG: - m = imgPattern.matcher(this.text); - if (!m.find()) return false; - sb.append("\"").append(m.group(7)).append("\"");"); break; + + case BKM: + Link[] links = getLinksFromBookmarkTag(m.group(2)); + if (links == null) { + sb.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'."); + } else { + appendLinks(links, sb); + } + break; case INT: - m = intPattern.matcher(this.text); - if (!m.find()) return false; - sb.append(""); - if (m.group(4) != null) sb.append(m.group(4)); else sb.append(m.group(1)); - sb.append(""); + sb.append(new Link( + "http://" + this.localhost + "/" + this.wikiPath + m.group(1), + m.group(4), + (m.group(4) == null) ? m.group(1) : m.group(4) + ).toString()); break; case EXT: - m = extPattern.matcher(this.text); - if (!m.find()) return false; - sb.append(""); - if (m.group(3) != null) sb.append(m.group(3)); else sb.append(m.group(1)); - sb.append(""); + sb.append(new Link( + m.group(1), + m.group(3), + (m.group(3) == null) ? m.group(1) : m.group(3) + ).toString()); break; - - default: return false; } this.parsed = true; this.markup = new String(sb); - return true; - } - - private String formatLink(String link) { - if (link.indexOf("://") == -1) { // DATA/HTDOCS-link - return "http://" + this.localhost + "/" + link; - } else { // 'normal' link - return link; - } } + + private String formatHref(String link) { + if (link.indexOf("://") == -1) { // DATA/HTDOCS-link + return "http://" + this.localhost + "/share/" + link; + } else { // 'normal' link + return link; + } + } + + private StringBuffer appendLinks(Link[] links, StringBuffer sb) { + for (int i=0; i"); + if (this.desc == null) sb.append(this.href); else sb.append(this.desc); + sb.append(""); + return new String(sb); + } + } public String[] getBlockElementNames() { return null; } public Pattern[] getRegex() { return patterns; } diff --git a/source/de/anomic/data/wiki/tokens/ListToken.java b/source/de/anomic/data/wiki/tokens/ListToken.java index 2055cb9db..16b7d7113 100644 --- a/source/de/anomic/data/wiki/tokens/ListToken.java +++ b/source/de/anomic/data/wiki/tokens/ListToken.java @@ -104,12 +104,11 @@ public class ListToken extends AbstractToken { blockElements = (String[])r.toArray(new String[r.size()]); } - protected boolean parse() { + protected void parse() { StringBuffer sb = new StringBuffer(this.text.length()); parse(this.text.split("\n"), 0, sb); this.markup = new String(sb); this.parsed = true; - return true; } protected StringBuffer parse(String[] t, int depth, StringBuffer sb) { diff --git a/source/de/anomic/data/wiki/tokens/SimpleToken.java b/source/de/anomic/data/wiki/tokens/SimpleToken.java index c2c3e4be1..2290b9d02 100644 --- a/source/de/anomic/data/wiki/tokens/SimpleToken.java +++ b/source/de/anomic/data/wiki/tokens/SimpleToken.java @@ -51,6 +51,8 @@ import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; +import de.anomic.data.wiki.WikiParserException; + public class SimpleToken extends AbstractToken { protected String content = null; @@ -94,19 +96,16 @@ public class SimpleToken extends AbstractToken { setText(this.text, 0); } } - if (!this.parsed && !parse()) return this.text; + if (!this.parsed) try { parse(); } catch (WikiParserException e) { return this.text; } return this.markup; } - protected boolean parse() { + protected void parse() { String[] e; - if ((e = definitionList[this.grade]) == null || definitionList.length <= this.grade) { - System.err.println("token not defined for grade: " + this.grade); - return false; - } + if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null) + throw new WikiParserException("Token not defined for grade: " + this.grade); this.markup = getMarkup(e); this.parsed = true; - return true; } protected String getMarkup(String[] es) { diff --git a/source/de/anomic/data/wiki/tokens/TableToken.java b/source/de/anomic/data/wiki/tokens/TableToken.java index abf766476..a9aee689d 100644 --- a/source/de/anomic/data/wiki/tokens/TableToken.java +++ b/source/de/anomic/data/wiki/tokens/TableToken.java @@ -48,7 +48,6 @@ package de.anomic.data.wiki.tokens; import java.util.HashMap; -import java.util.Iterator; import java.util.regex.Pattern; public class TableToken extends AbstractToken { @@ -61,7 +60,7 @@ public class TableToken extends AbstractToken { }; private static final String[] blockElementNames = new String[] { "table", "tr", "td" }; - protected boolean parse() { + protected void parse() { String[] t = text.split("\n"); String[] tds; StringBuffer sb = new StringBuffer(); @@ -87,7 +86,6 @@ public class TableToken extends AbstractToken { if (trOpen) sb.append("\t\n"); this.markup = new String(sb.append("")); this.parsed = true; - return true; } // from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB] @@ -105,45 +103,38 @@ public class TableToken extends AbstractToken { * Valid in this case means if they are a property for the table, tr or td * tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition) * The method is important to avoid XSS attacks on the wiki via table properties. - * @param str A string that may contain several table properties and/or junk. + * @param properties A string that may contain several table properties and/or junk. * @return A string that only contains table properties. */ private static StringBuffer parseTableProperties(final String properties){ String[] values = properties.replaceAll(""", "").split("[= ]"); //splitting the string at = and blanks StringBuffer sb = new StringBuffer(properties.length()); - Iterator it; - String key, valkey, value; + String key, value; + String[] posVals; int numberofvalues = values.length; - main: for (int i=0; i 1) { - r.add(tokens[i].getBlockElementNames()[j].substring(0, k)); - } else { - r.add(tokens[i].getBlockElementNames()[j]); - } - } - r.add("hr"); - BEs = (String[])r.toArray(new String[r.size()]); - } + public final Token[] tokens; + private final String[] BEs; + + public wikiParser(plasmaSwitchboard sb) { + tokens = new Token[] { + new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), + new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), + new LinkToken("localhost:8080"/*yacyCore.seedDB.mySeed.getAddress()*/, "Wiki.html?page=", sb), + new ListToken('*', "ul"), + new ListToken('#', "ol"), + new ListToken(':', "blockquote", null), + new ListToken(' ', null, "tt", false), + new DefinitionListToken(), + new TableToken() + }; + ArrayList r = new ArrayList(); + for (int i=0, k, j; i 1) { + r.add(tokens[i].getBlockElementNames()[j].substring(0, k)); + } else { + r.add(tokens[i].getBlockElementNames()[j]); + } + } + r.add("hr"); + BEs = (String[])r.toArray(new String[r.size()]); + } public static void main(String[] args) { - String text = "===Title===\n" + - "==blubb[== was ==ein '''shice'''==...och.bla\n" + + String text = "===T
itle===\n" +
+				"==blubb== was ==ein '''shice'''==...och.bla\n" +
 				"* ein \n" +
-				"*==test==\n" +
+				"*==test=
=\n" + "** doppelt\n" + "* ''tess*sst''\n" + "*** xyz\n" + @@ -118,29 +121,31 @@ public class wikiParser { ":doppel-blubb[= huch =]\n" + ";hier:da\n" + ";dort:und so\n" + - ";;und:doppelt"; + ";;und:doppelt\n\n\n\n" + + "[[Image:blubb|BLA]]"; // text = "[=\n=]* bla"; String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + "[=denk ich=] mal =]"; long l = System.currentTimeMillis(); - t = parse((args.length > 0) ? args[0] : text); + t = new wikiParser(null).parse((args.length > 0) ? args[0] : text); System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); System.out.println("--- --- ---"); System.out.println(t); } - // TODO: - // - preParse: - // -
~
- - public static String parse(String text) { + public String parse(String text) { Text[] tt = Text.split2Texts(text, "[=", "=]"); for (int i=0; i", ""); + for (int i=0; i"); } - private static String replaceBRs(String text) { + private String replaceBRs(String text) { StringBuffer sb = new StringBuffer(text.length()); String[] tt = text.split("\n"); boolean replace; @@ -175,8 +180,10 @@ public class wikiParser { for (j=0; j")) { replace = false; break; } sb.append(tt[i]); - if (replace && i < tt.length - 1) sb.append("
"); - if (i < tt.length - 1) sb.append("\n"); + if (i < tt.length - 1) { + if (replace) sb.append("
"); + sb.append("\n"); + } } return new String(sb); } @@ -193,7 +200,7 @@ public class wikiParser { this.text = text; this.escaped = escaped; this.nl = newLineBefore; - } + } public String setTextPlain(String text) { return this.text = text; } public String setText(String text) { @@ -215,22 +222,23 @@ public class wikiParser { public String toString() { return this.text; } public boolean isEscaped() { return this.escaped; } public boolean isNewLineBefore() { return this.nl; } - + private static Text[] split2Texts(String text, String escapeBegin, String escapeEnd) { if (text == null) return null; if (text.length() < 2) return new Text[] { new Text(text, false, true) }; int startLen = escapeBegin.length(); + int endLen = escapeEnd.length(); ArrayList r = new ArrayList(); boolean escaped = text.startsWith(escapeBegin); if (escaped) r.add(new Text("", false, true)); int i, j = 0; while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { - r.add(resolve2Text(text, escaped, (j > 0) ? j + startLen : 0, i, escapeEnd)); + r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); j = i; escaped = !escaped; } - r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + startLen : 0, -1, escapeEnd)); + r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); return (Text[])r.toArray(new Text[r.size()]); }