diff --git a/source/de/anomic/data/list/ListAccumulator.java b/source/de/anomic/data/list/ListAccumulator.java index 7e3d2ac2b..e73d7b8ad 100644 --- a/source/de/anomic/data/list/ListAccumulator.java +++ b/source/de/anomic/data/list/ListAccumulator.java @@ -4,9 +4,9 @@ // // (C) 2009 by Marc Nause // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by diff --git a/source/de/anomic/data/list/XMLBlacklistImporter.java b/source/de/anomic/data/list/XMLBlacklistImporter.java index 5b09eba22..cbae9f235 100644 --- a/source/de/anomic/data/list/XMLBlacklistImporter.java +++ b/source/de/anomic/data/list/XMLBlacklistImporter.java @@ -4,9 +4,9 @@ // // (C) 2009 by Marc Nause // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by diff --git a/source/de/anomic/data/wiki/abstractWikiParser.java b/source/de/anomic/data/wiki/abstractWikiParser.java index a7255cb1c..9ac61b036 100644 --- a/source/de/anomic/data/wiki/abstractWikiParser.java +++ b/source/de/anomic/data/wiki/abstractWikiParser.java @@ -1,3 +1,28 @@ +// abstractWikiParser.java +// --------- +// part of YaCy +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2007 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + package de.anomic.data.wiki; import java.io.BufferedReader; diff --git a/source/de/anomic/data/wiki/knwikiParser.java b/source/de/anomic/data/wiki/knwikiParser.java index db67786fd..d067513ed 100644 --- a/source/de/anomic/data/wiki/knwikiParser.java +++ b/source/de/anomic/data/wiki/knwikiParser.java @@ -1,4 +1,4 @@ -// wikiParser.java +// knwikiParser.java // --------- // part of YaCy // (C) by Michael Peter Christen; mc@yacy.net @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -42,55 +42,55 @@ import de.anomic.search.Switchboard; public class knwikiParser implements wikiParser { - public Token[] tokens; - private String[] BEs; + public Token[] tokens; + private String[] BEs; private final Switchboard sb; private knwikiParser(final Switchboard sb) { this.sb = sb; } - public static void main(final String[] args) { - final String text = "===T
itle===\n" +
-				"==blubb== was ==ein '''shice'''==...och.bla\n" +
-				"* ein \n" +
-				"*==test=
=\n" + - "** doppelt\n" + - "* ''tess*sst''\n" + - "*** xyz\n" + - "=]*** huch\n" + - "* ehehe***\n" + - "* blubb\n" + - "bliblablo\n\n\n" + - "* blubb\n" + - "{|border=-1\n" + - "|-\n" + - "||bla|| blubb\n" + - "|-\n" + - "||align center|och||huch||\n" + - "|}\n" + - "\n" + - "# bla\n" + - "# blubb\n" + - "'''''ehehehe''''', ne?!\n" + - "[http://www/index.html,ne?!] -\n" + - "[[Image:blubb|BLA]] ---- och\n" + - " blubb1\n" + - " blubb2\n" + - ":doppel-blubb[= huch =]\n" + - ";hier:da\n" + - ";dort:und so\n" + - ";;und:doppelt\n\n\n\n" + - "[[Image:blubb|BLA]]"; - // text = "[=\n=]* bla"; - String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + - "[=denk ich=] mal =]"; - final long l = System.currentTimeMillis(); - t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080"); + public static void main(final String[] args) { + final String text = "===T
itle===\n" +
+                            "==blubb== was ==ein '''shice'''==...och.bla\n" +
+                            "* ein \n" +
+                            "*==test=
=\n" + + "** doppelt\n" + + "* ''tess*sst''\n" + + "*** xyz\n" + + "=]*** huch\n" + + "* ehehe***\n" + + "* blubb\n" + + "bliblablo\n\n\n" + + "* blubb\n" + + "{|border=-1\n" + + "|-\n" + + "||bla|| blubb\n" + + "|-\n" + + "||align center|och||huch||\n" + + "|}\n" + + "\n" + + "# bla\n" + + "# blubb\n" + + "'''''ehehehe''''', ne?!\n" + + "[http://www/index.html,ne?!] -\n" + + "[[Image:blubb|BLA]] ---- och\n" + + " blubb1\n" + + " blubb2\n" + + ":doppel-blubb[= huch =]\n" + + ";hier:da\n" + + ";dort:und so\n" + + ";;und:doppelt\n\n\n\n" + + "[[Image:blubb|BLA]]"; + // text = "[=\n=]* bla"; + String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + + "[=denk ich=] mal =]"; + final long l = System.currentTimeMillis(); + t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080"); System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); System.out.println("--- --- ---"); System.out.println(t); - } + } public String transform(final String content) { return parse(content, null); @@ -105,7 +105,7 @@ public class knwikiParser implements wikiParser { return parse(new String(content, encoding), null); } - private String parse(String text, final String publicAddress) { + private String parse(String text, final String publicAddress) { tokens = new Token[] { new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), @@ -130,124 +130,130 @@ public class knwikiParser implements wikiParser { } r.add("hr"); BEs = r.toArray(new String[r.size()]); - + Text[] tt = Text.split2Texts(text, "[=", "=]"); for (int i=0; i", ""); for (int i=0; i"); - } + text = new String(m.appendTail(stringBuffer)); + } + } + return text.replaceAll("----", "
"); + } - private String replaceBRs(final String text) { - final StringBuilder sb = new StringBuilder(text.length()); - final String[] tt = text.split("\n"); - boolean replace; - for (int i=0, j; i")) { replace = false; break; } - sb.append(tt[i]); + private String replaceBRs(final String text) { + final StringBuilder stringBuffer = new StringBuilder(text.length()); + final String[] tt = text.split("\n"); + boolean replace; + for (int i=0, j; i")) { replace = false; break; } + stringBuffer.append(tt[i]); if (i < tt.length - 1) { - if (replace) sb.append("
"); - sb.append("\n"); + if (replace) stringBuffer.append("
"); + stringBuffer.append("\n"); } - } - return new String(sb); - } + } + return new String(stringBuffer); + } - private static class Text { - - public static final String escapeNewLine = "@"; - - private String text; - private final boolean nl; - - public Text(final String text, final boolean escaped, final boolean newLineBefore) { - this.text = text; - this.nl = newLineBefore; + private static class Text { + + public static final String escapeNewLine = "@"; + + private String text; + private final boolean nl; + + public Text(final String text, final boolean escaped, final boolean newLineBefore) { + this.text = text; + this.nl = newLineBefore; } - public String setText(final String text) { - if (this.nl) - this.text = text.substring(escapeNewLine.length()); - else - this.text = text; - return this.text; - } + public String setText(final String text) { + if (this.nl) { + this.text = text.substring(escapeNewLine.length()); + } else { + this.text = text; + } + return this.text; + } - public String getTextPlain() { return this.text; } - public String getText() { - if (this.nl) - return escapeNewLine + this.text; - return this.text; - } + public String getTextPlain() { + return this.text; + } + + public String getText() { + if (this.nl) { + return escapeNewLine + this.text; + } + return this.text; + } - public String toString() { return this.text; } + @Override + public String toString() { + return this.text; + } - static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) { - if (text == null) return null; - if (text.length() < 2) return new Text[] { new Text(text, false, true) }; - - final int startLen = escapeBegin.length(); + static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) { + + if (text == null) return null; + + if (text.length() < 2) return new Text[] {new Text(text, false, true) }; + + final int startLen = escapeBegin.length(); final int endLen = escapeEnd.length(); - final ArrayList r = new ArrayList(); - boolean escaped = text.startsWith(escapeBegin); - if (escaped) r.add(new Text("", false, true)); - int i, j = 0; - while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { - r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); - j = i; - escaped = !escaped; - } - r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); - return r.toArray(new Text[r.size()]); - } + final ArrayList r = new ArrayList(); + boolean escaped = text.startsWith(escapeBegin); + if (escaped) r.add(new Text("", false, true)); + int i, j = 0; + while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { + r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); + j = i; + escaped = !escaped; + } + r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); + return r.toArray(new Text[r.size()]); + } - private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) { - if (to == -1) to = text.length(); - return new Text( - text.substring(from, to), - escaped, - from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n')); - } + private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) { + if (to == -1) to = text.length(); + return new Text( + text.substring(from, to), + escaped, + from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n')); + } - static String mergeTexts(final Text[] texts) { - final StringBuilder sb = new StringBuilder(2000); - for (int n=0; n < texts.length; n++) - sb.append(texts[n].getTextPlain()); - return new String(sb); - } - } + static String mergeTexts(final Text[] texts) { + final StringBuilder sb = new StringBuilder(2000); + for (int n=0; n < texts.length; n++) { + sb.append(texts[n].getTextPlain()); + } + return new String(sb); + } + } } diff --git a/source/de/anomic/data/wiki/tokens/AbstractToken.java b/source/de/anomic/data/wiki/tokens/AbstractToken.java index e7b4b9013..bf5d5c97a 100644 --- a/source/de/anomic/data/wiki/tokens/AbstractToken.java +++ b/source/de/anomic/data/wiki/tokens/AbstractToken.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -32,20 +32,32 @@ import de.anomic.data.wiki.wikiParserException; public abstract class AbstractToken implements Token { - protected String text = null; - protected String markup = null; - protected boolean parsed = false; - - protected abstract void parse() throws wikiParserException; - - public String getMarkup() throws wikiParserException { - if (this.text == null) - throw new IllegalArgumentException(); - if (!this.parsed) parse(); - return this.markup; - } + protected String text = null; + protected String markup = null; + protected boolean parsed = false; + + protected abstract void parse() throws wikiParserException; + + public String getMarkup() throws wikiParserException { + if (this.text == null) { + throw new IllegalArgumentException(); + } + if (!this.parsed) { + parse(); + } + return this.markup; + } - public String getText() { return this.text; } + public String getText() { + return this.text; + } - public String toString() { try { return getMarkup(); } catch (final wikiParserException e) { return null; } } + @Override + public String toString() { + try { + return getMarkup(); + } catch (final wikiParserException e) { + return null; + } + } } diff --git a/source/de/anomic/data/wiki/tokens/DefinitionListToken.java b/source/de/anomic/data/wiki/tokens/DefinitionListToken.java index 27dd80ad4..eff7648d7 100644 --- a/source/de/anomic/data/wiki/tokens/DefinitionListToken.java +++ b/source/de/anomic/data/wiki/tokens/DefinitionListToken.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -30,39 +30,45 @@ package de.anomic.data.wiki.tokens; public class DefinitionListToken extends ListToken { - //private static final String[] blockElements = { "dl", "dt", "dd" }; + //private static final String[] blockElements = { "dl", "dt", "dd" }; - public DefinitionListToken() { - super(';', null, null); - } + public DefinitionListToken() { + super(';', null, null); + } - protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) { - sb.append("
\n"); - while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) { - for (int j=0; j"); + @Override + protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) { + sb.append("
\n"); + while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) { + for (int j=0; j"); - if (getGrade(t[super.aktline]) > depth) { - parse(t, depth + 1, sb); - } else { - sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "
")); - } + if (getGrade(t[super.aktline]) > depth) { + parse(t, depth + 1, sb); + } else { + sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "
")); + } - sb.append(" depth) - sb.append("dt"); - else - sb.append("dd"); - sb.append(">\n"); - super.aktline++; - } - for (int j=0; j"); - super.aktline--; - return sb; - } + sb.append(" depth) { + sb.append("dt"); + } else { + sb.append("dd"); + } + sb.append(">\n"); + super.aktline++; + } + for (int j=0; j"); + super.aktline--; + return sb; + } - public String[] getBlockElementNames() { - return blockElements; - } + @Override + public String[] getBlockElementNames() { + return blockElements; + } + } diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java index 525ea7d44..2c65cab5a 100644 --- a/source/de/anomic/data/wiki/tokens/LinkToken.java +++ b/source/de/anomic/data/wiki/tokens/LinkToken.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -41,96 +41,102 @@ import de.anomic.search.Switchboard; public class LinkToken extends AbstractToken { - private static final int IMG = 0; + private static final int IMG = 0; private static final int BKM = 1; - private static final int INT = 2; - private static final int EXT = 3; + private static final int INT = 2; + private static final int EXT = 3; - private static final Pattern imgPattern = Pattern.compile( - "\\[\\[" + // begin - "(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL - "(" + // - "(\\|(bottom|left|center|right|middle|top))?" + // optional align - "(\\|(([^\\]]|\\][^\\]])*))" + // description - ")?" + // - "\\]\\]"); // end + private static final Pattern imgPattern = Pattern.compile( + "\\[\\[" + // begin + "(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL + "(" + // + "(\\|(bottom|left|center|right|middle|top))?" + // optional align + "(\\|(([^\\]]|\\][^\\]])*))" + // description + ")?" + // + "\\]\\]"); // end private static final Pattern bkmPattern = Pattern.compile( - "\\[\\[" + // begin - "(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL - "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description - "\\]\\]"); // end - - private static final Pattern intPattern = Pattern.compile( - "\\[\\[" + // begin - "(([^\\]|]|\\][^\\]])*?)" + // wiki-page - "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption - "\\]\\]"); // end + "\\[\\[" + // begin + "(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL + "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description + "\\]\\]"); // end + + private static final Pattern intPattern = Pattern.compile( + "\\[\\[" + // begin + "(([^\\]|]|\\][^\\]])*?)" + // wiki-page + "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption + "\\]\\]"); // end - private static final Pattern extPattern = Pattern.compile( - "\\[" + // begin - "([^\\] ]*)" + // URL - "( ([^\\]]*))?" + // optional description - "\\]"); // end + private static final Pattern extPattern = Pattern.compile( + "\\[" + // begin + "([^\\] ]*)" + // URL + "( ([^\\]]*))?" + // optional description + "\\]"); // end - private static final Pattern[] patterns = new Pattern[] { - imgPattern, bkmPattern, intPattern, extPattern }; + private static final Pattern[] patterns = new Pattern[] { imgPattern, bkmPattern, intPattern, extPattern }; - private final String localhost; - private final String wikiPath; + private final String localhost; + private final String wikiPath; private final Switchboard sb; - private int patternNr = 0; + private int patternNr = 0; - public LinkToken(final String localhost, final String wikiPath, final Switchboard sb) { - this.localhost = localhost; - this.wikiPath = wikiPath; + public LinkToken(final String localhost, final String wikiPath, final Switchboard sb) { + this.localhost = localhost; + this.wikiPath = wikiPath; this.sb = sb; - } + } - protected void parse() throws wikiParserException { - final StringBuilder sb = new StringBuilder(); - if (this.patternNr < 0 || this.patternNr >= patterns.length) + protected void parse() throws wikiParserException { + final StringBuilder stringBuilder = new StringBuilder(); + + if (this.patternNr < 0 || this.patternNr >= patterns.length) { throw new wikiParserException("patternNr was not set correctly: " + this.patternNr); - final Matcher m = patterns[this.patternNr].matcher(this.text); - if (!m.find()) + } + + final Matcher m = patterns[this.patternNr].matcher(this.text); + + if (!m.find()) { throw new wikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text); + } switch (this.patternNr) { - case IMG: - sb.append("\"").append((m.group(7)"); - break; + case IMG: + stringBuilder.append("\"").append((m.group(7)"); + break; case BKM: final Link[] links = getLinksFromBookmarkTag(m.group(2)); if (links == null) { - sb.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'."); + stringBuilder.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'."); } else { - appendLinks(links, sb); + appendLinks(links, stringBuilder); } break; - case INT: - sb.append(new Link( - "http://" + this.localhost + "/" + this.wikiPath + m.group(1), - m.group(4), - (m.group(4) == null) ? m.group(1) : m.group(4) - ).toString()); - break; + case INT: + stringBuilder.append(new Link( + "http://" + this.localhost + "/" + this.wikiPath + m.group(1), + m.group(4), + (m.group(4) == null) ? m.group(1) : m.group(4) + ).toString()); + break; - case EXT: - sb.append(new Link( - m.group(1), - m.group(3), - (m.group(3) == null) ? m.group(1) : m.group(3) - ).toString()); - break; - } - this.parsed = true; - this.markup = new String(sb); - } + case EXT: + stringBuilder.append(new Link( + m.group(1), + m.group(3), + (m.group(3) == null) ? m.group(1) : m.group(3) + ).toString()); + break; + } + this.parsed = true; + this.markup = new String(stringBuilder); + } private String formatHref(final String link) { if (link.indexOf("://") == -1) { // DATA/HTDOCS-link @@ -171,25 +177,35 @@ public class LinkToken extends AbstractToken { this.desc = desc; } + @Override public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append(""); - if (this.desc == null) sb.append(this.href); else sb.append(this.desc); - sb.append(""); - return new String(sb); + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(""); + if (this.desc == null) stringBuilder.append(this.href); else stringBuilder.append(this.desc); + stringBuilder.append(""); + return new String(stringBuilder); } } - public String[] getBlockElementNames() { return null; } - public Pattern[] getRegex() { return patterns; } + public String[] getBlockElementNames() { + return null; + } + + public Pattern[] getRegex() { + return patterns; + } - public boolean setText(final String text, final int patternNr) { - this.text = text; - this.patternNr = patternNr; - this.parsed = false; - if (text == null) { this.markup = null; this.patternNr = -1; } - return true; - } + public boolean setText(final String text, final int patternNr) { + this.text = text; + this.patternNr = patternNr; + this.parsed = false; + if (text == null) { + this.markup = null; + this.patternNr = -1; + } + return true; + } + } diff --git a/source/de/anomic/data/wiki/tokens/ListToken.java b/source/de/anomic/data/wiki/tokens/ListToken.java index 5062f9e62..7fc6bca52 100644 --- a/source/de/anomic/data/wiki/tokens/ListToken.java +++ b/source/de/anomic/data/wiki/tokens/ListToken.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -33,111 +33,97 @@ import java.util.regex.Pattern; public class ListToken extends AbstractToken { - protected final String[] blockElements; - - protected final char firstChar; - protected final String listBlockElement; - protected final String listElement; - protected final boolean recursion; - protected final Pattern[] pattern; - - protected int aktline = 0; - - public ListToken(final char firstChar, final String listBlockElement) { - this.firstChar = firstChar; - this.listBlockElement = listBlockElement; - this.listElement = "li"; - this.recursion = true; - this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) }; - final ArrayList r = new ArrayList(); - if (this.listBlockElement != null) { - if (this.recursion) r.add(this.listBlockElement); - if (this.listElement != null) r.add(this.listElement); - } - blockElements = r.toArray(new String[r.size()]); - } + protected final String[] blockElements; + + protected final char firstChar; + protected final String listBlockElement; + protected final String listElement; + protected final boolean recursion; + protected final Pattern[] pattern; + + protected int aktline = 0; + + public ListToken(final char firstChar, final String listBlockElement) { + this(firstChar, listBlockElement, "li"); + } - public ListToken(final char firstChar, final String listBlockElement, final String listElement) { - this.firstChar = firstChar; - this.listBlockElement = listBlockElement; - this.listElement = listElement; - this.recursion = true; - this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) }; - final ArrayList r = new ArrayList(); - if (this.listBlockElement != null) { - if (this.recursion) r.add(this.listBlockElement); - if (this.listElement != null) r.add(this.listElement); - } - blockElements = r.toArray(new String[r.size()]); - } + public ListToken(final char firstChar, final String listBlockElement, final String listElement) { + this(firstChar, listBlockElement, listElement, true); + } - public ListToken(final char firstChar, final String listBlockElement, final String listElement, final boolean recursion) { - this.firstChar = firstChar; - this.listBlockElement = listBlockElement; - this.listElement = listElement; - this.recursion = recursion; - this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) }; - final ArrayList r = new ArrayList(); - if (this.listBlockElement != null) { - if (this.recursion) r.add(this.listBlockElement); - if (this.listElement != null) r.add(this.listElement); - } - blockElements = r.toArray(new String[r.size()]); - } + public ListToken(final char firstChar, final String listBlockElement, final String listElement, final boolean recursion) { + this.firstChar = firstChar; + this.listBlockElement = listBlockElement; + this.listElement = listElement; + this.recursion = recursion; + this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) }; + final ArrayList r = new ArrayList(); + if (this.listBlockElement != null) { + if (this.recursion) { + r.add(this.listBlockElement); + } + if (this.listElement != null) { + r.add(this.listElement); + } + } + blockElements = r.toArray(new String[r.size()]); + } - protected void parse() { - final StringBuilder sb = new StringBuilder(this.text.length()); - parse(this.text.split("\n"), 0, sb); - this.markup = new String(sb); - this.parsed = true; - } + protected void parse() { + final StringBuilder sb = new StringBuilder(this.text.length()); + parse(this.text.split("\n"), 0, sb); + this.markup = new String(sb); + this.parsed = true; + } - protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) { - if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n"); - while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) { - if (recursion) for (int j=0; j"); - - if (this.recursion && getGrade(t[this.aktline]) > depth) { - parse(t, depth + 1, sb); - } else { - sb.append(t[this.aktline].substring(depth + 1)); - } - - if (this.listElement != null) sb.append(""); - sb.append("\n"); - this.aktline++; - } - if (this.recursion) for (int j=0; j"); - this.aktline--; - return sb; - } + protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) { + if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n"); + while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) { + if (recursion) for (int j=0; j"); + + if (this.recursion && getGrade(t[this.aktline]) > depth) { + parse(t, depth + 1, sb); + } else { + sb.append(t[this.aktline].substring(depth + 1)); + } + + if (this.listElement != null) sb.append(""); + sb.append("\n"); + this.aktline++; + } + if (this.recursion) for (int j=0; j"); + this.aktline--; + return sb; + } - protected int getGrade(final String t) { - int i = 0; - for (i=0; i r = new ArrayList(); - int j; - for (i = 0; i < definitionList.length; i++) - if (definitionList[i] != null) - for (j = 0; j < definitionList[i].length; j++) - r.add(definitionList[i][j]); - this.blockElements = r.toArray(new String[r.size()]); - } else { - this.blockElements = null; - } - - for (i=0; i r = new ArrayList(); + int j; + for (i = 0; i < definitionList.length; i++) + if (definitionList[i] != null) + for (j = 0; j < definitionList[i].length; j++) + r.add(definitionList[i][j]); + this.blockElements = r.toArray(new String[r.size()]); + } else { + this.blockElements = null; + } + + for (i=0; i= this.definitionList.length || (e = this.definitionList[this.grade]) == null) - throw new wikiParserException("Token not defined for grade: " + this.grade); - this.markup = getMarkup(e); - this.parsed = true; - } + @Override + public String getMarkup() throws wikiParserException { + if (this.content == null) { + if (this.text == null) { + throw new IllegalArgumentException(); + } + setText(this.text, 0); + } + if (!this.parsed) parse(); + return this.markup; + } + + protected void parse() throws wikiParserException { + String[] e; + if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null) + throw new wikiParserException("Token not defined for grade: " + this.grade); + this.markup = getMarkup(e); + this.parsed = true; + } - protected String getMarkup(final String[] es) { - return getMarkup(es, false) + this.content + getMarkup(es, true); - } + protected String getMarkup(final String[] es) { + return getMarkup(es, false) + this.content + getMarkup(es, true); + } - protected String getMarkup(final String[] es, final boolean closing) { - final StringBuilder result = new StringBuilder(); - // backwards if closing - for ( - int i = (closing) ? es.length - 1 : 0, j; - (closing && i >= 0) ^ (!closing && i < es.length); - i += (closing) ? -1 : +1 - ) { - result.append("<"); - if (closing) { - result.append("/"); - if ((j = es[i].indexOf(' ')) > -1) { - result.append(es[i].substring(0, j)); - } else { - result.append(es[i]); - } - } else { - result.append(es[i]); - } - result.append(">"); - } - return new String(result); - } + protected String getMarkup(final String[] es, final boolean closing) { + final StringBuilder result = new StringBuilder(); + // backwards if closing + for ( + int i = (closing) ? es.length - 1 : 0, j; + (closing && i >= 0) ^ (!closing && i < es.length); + i += (closing) ? -1 : +1 + ) { + result.append("<"); + if (closing) { + result.append("/"); + if ((j = es[i].indexOf(' ')) > -1) { + result.append(es[i].substring(0, j)); + } else { + result.append(es[i]); + } + } else { + result.append(es[i]); + } + result.append(">"); + } + return new String(result); + } - public boolean setText(final String text, final int patternNr) { - this.text = text; - this.markup = null; - this.parsed = false; - if (text != null) { - final Matcher m = getRegex()[0].matcher(text); - if ( - (m.matches()) && - (m.group(1).length() == m.group(3).length()) && - (definitionList.length >= m.group(1).length()) && - (definitionList[m.group(1).length() - 1] != null) - ) { - this.grade = m.group(1).length() - 1; - this.content = m.group(2); - return true; - } - } - return false; - } + public boolean setText(final String text, final int patternNr) { + this.text = text; + this.markup = null; + this.parsed = false; + if (text != null) { + final Matcher m = getRegex()[0].matcher(text); + if ( + (m.matches()) && + (m.group(1).length() == m.group(3).length()) && + (definitionList.length >= m.group(1).length()) && + (definitionList[m.group(1).length() - 1] != null) + ) { + this.grade = m.group(1).length() - 1; + this.content = m.group(2); + return true; + } + } + return false; + } - public Pattern[] getRegex() { return this.pattern; } - public String[] getBlockElementNames() { return this.blockElements; } + public Pattern[] getRegex() { + return this.pattern; + } + + public String[] getBlockElementNames() { + return this.blockElements; + } + } diff --git a/source/de/anomic/data/wiki/tokens/TableToken.java b/source/de/anomic/data/wiki/tokens/TableToken.java index f917983ab..8e5cbbc87 100644 --- a/source/de/anomic/data/wiki/tokens/TableToken.java +++ b/source/de/anomic/data/wiki/tokens/TableToken.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -34,46 +34,49 @@ import java.util.regex.Pattern; public class TableToken extends AbstractToken { - private static final Pattern[] pattern = new Pattern[] { - Pattern.compile( - "\\{\\|" + // "{|" - "([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-" - "\\|\\}") // "|}" - }; - private static final String[] blockElementNames = new String[] { "table", "tr", "td" }; + private static final Pattern[] pattern = new Pattern[] { + Pattern.compile( + "\\{\\|" + // "{|" + "([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-" + "\\|\\}") // "|}" + }; + + private static final String[] blockElementNames = new String[] { "table", "tr", "td" }; - protected void parse() { - final String[] t = text.split("\n"); - String[] tds; - final StringBuilder sb = new StringBuilder(); - sb.append(" 2) sb.append(parseTableProperties(t[0].substring(2))); - sb.append(">\n"); - boolean trOpen = false; - for (int i=1, j, a; i\n"); - trOpen = (i < t.length - 2); - if (trOpen) sb.append("\t\n"); - } else if (t[i].startsWith("||")) { - tds = t[i].split("\\|\\|"); - for (j=0; j (a = tds[j].indexOf('|')) + 1) { // don't print empty td's - sb.append("\t\t -1) sb.append(parseTableProperties(tds[j].substring(0, a))); - sb.append(">").append(tds[j].substring(a + 1)).append("\n"); - } - } - } - } - if (trOpen) sb.append("\t\n"); - this.markup = new String(sb.append("")); - this.parsed = true; - } + protected void parse() { + final String[] t = text.split("\n"); + String[] tds; + final StringBuilder sb = new StringBuilder(); + sb.append(" 2) sb.append(parseTableProperties(t[0].substring(2))); + sb.append(">\n"); + boolean trOpen = false; + for (int i=1, j, a; i\n"); + trOpen = (i < t.length - 2); + if (trOpen) sb.append("\t\n"); + } else if (t[i].startsWith("||")) { + tds = t[i].split("\\|\\|"); + for (j=0; j (a = tds[j].indexOf('|')) + 1) { // don't print empty td's + sb.append("\t\t -1) sb.append(parseTableProperties(tds[j].substring(0, a))); + sb.append(">").append(tds[j].substring(a + 1)).append("\n"); + } + } + } + } + if (trOpen) sb.append("\t\n"); + this.markup = new String(sb.append("")); + this.parsed = true; + } // from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB] - private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" }; + private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" }; + private static final HashMap ps = new HashMap(); + static { Arrays.sort(tps); String[] array; @@ -87,7 +90,7 @@ public class TableToken extends AbstractToken { ps.put("align", array); } - // contributed by [MN] + // contributed by [MN] /** This method takes possible table properties and tests if they are valid. * Valid in this case means if they are a property for the table, tr or td * tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition) @@ -102,21 +105,21 @@ public class TableToken extends AbstractToken { String[] posVals; final int numberofvalues = values.length; for (int i=0; i= 0) || - (Arrays.binarySearch(tps, key) >= 0 && value.matches("\\d+")) - ) { - addPair(key, value, sb); - } - } + value = values[++i].trim(); + if ( + (key.equals("summary")) || + (key.equals("bgcolor") && value.matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}")) || + ((key.equals("width") || key.equals("height")) && value.matches("\\d+%{0,1}")) || + ((posVals = ps.get(key)) != null && Arrays.binarySearch(posVals, value) >= 0) || + (Arrays.binarySearch(tps, key) >= 0 && value.matches("\\d+")) + ) { + addPair(key, value, sb); + } + } } return sb; } @@ -125,13 +128,19 @@ public class TableToken extends AbstractToken { return sb.append(" ").append(key).append("=\"").append(value).append("\""); } - public Pattern[] getRegex() { return pattern; } - public String[] getBlockElementNames() { return blockElementNames; } + public Pattern[] getRegex() { + return pattern; + } + + public String[] getBlockElementNames() { + return blockElementNames; + } - public boolean setText(final String text, final int patternNr) { - this.text = text; - this.parsed = false; - this.markup = null; - return true; - } + public boolean setText(final String text, final int patternNr) { + this.text = text; + this.parsed = false; + this.markup = null; + return true; + } + } diff --git a/source/de/anomic/data/wiki/tokens/Token.java b/source/de/anomic/data/wiki/tokens/Token.java index 89d6271b8..c12ae0cc6 100644 --- a/source/de/anomic/data/wiki/tokens/Token.java +++ b/source/de/anomic/data/wiki/tokens/Token.java @@ -8,9 +8,9 @@ // // This file is contributed by Franz Brausze // -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -34,9 +34,9 @@ import de.anomic.data.wiki.wikiParserException; public interface Token { - public Pattern[] getRegex(); - public boolean setText(String text, int patternNr); - public String getText(); - public String getMarkup() throws wikiParserException; - public String[] getBlockElementNames(); + public Pattern[] getRegex(); + public boolean setText(String text, int patternNr); + public String getText(); + public String getMarkup() throws wikiParserException; + public String[] getBlockElementNames(); } diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java index 563c6ea2f..ef7485d57 100644 --- a/source/de/anomic/data/wiki/wikiBoard.java +++ b/source/de/anomic/data/wiki/wikiBoard.java @@ -3,18 +3,21 @@ //(C) by Michael Peter Christen; mc@yacy.net //first published on http://www.anomic.de //Frankfurt, Germany, 2004 -//last major change: 20.07.2004 - +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// //This program is free software; you can redistribute it and/or modify //it under the terms of the GNU General Public License as published by //the Free Software Foundation; either version 2 of the License, or //(at your option) any later version. - +// //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU General Public License for more details. - +// //You should have received a copy of the GNU General Public License //along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -41,7 +44,7 @@ public class wikiBoard { public static final int keyLength = 64; private static final String dateFormat = "yyyyMMddHHmmss"; - static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat); + private static final SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat); static { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); diff --git a/source/de/anomic/data/wiki/wikiParser.java b/source/de/anomic/data/wiki/wikiParser.java index 5fa2d0d50..f63ee13d1 100644 --- a/source/de/anomic/data/wiki/wikiParser.java +++ b/source/de/anomic/data/wiki/wikiParser.java @@ -1,3 +1,28 @@ +// wikiParser.java +// --------- +// part of YaCy +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2007 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + package de.anomic.data.wiki; import java.io.UnsupportedEncodingException; diff --git a/source/de/anomic/data/wiki/wikiParserException.java b/source/de/anomic/data/wiki/wikiParserException.java index 7a7a7822b..1d6627b48 100644 --- a/source/de/anomic/data/wiki/wikiParserException.java +++ b/source/de/anomic/data/wiki/wikiParserException.java @@ -1,3 +1,28 @@ +// wikiParserException.java +// --------- +// part of YaCy +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2007 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + package de.anomic.data.wiki; public class wikiParserException extends Exception {