diff --git a/source/de/anomic/data/wiki/knwikiParser.java b/source/de/anomic/data/wiki/knwikiParser.java deleted file mode 100644 index 61b2c44ed..000000000 --- a/source/de/anomic/data/wiki/knwikiParser.java +++ /dev/null @@ -1,258 +0,0 @@ -// knwikiParser.java -// --------- -// part of YaCy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2007 -// Created 22.02.2007 -// -// This file is contributed by Franz Brausze -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.data.wiki; - -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import de.anomic.data.wiki.tokens.DefinitionListToken; -import de.anomic.data.wiki.tokens.LinkToken; -import de.anomic.data.wiki.tokens.ListToken; -import de.anomic.data.wiki.tokens.SimpleToken; -import de.anomic.data.wiki.tokens.TableToken; -import de.anomic.data.wiki.tokens.Token; -import de.anomic.search.Switchboard; - -public class knwikiParser implements wikiParser { - - public Token[] tokens; - private String[] BEs; - private final Switchboard sb; - - private knwikiParser(final Switchboard sb) { - this.sb = sb; - } - - public static void main(final String[] args) { - final String text = "===T
itle===\n" +
-                            "==blubb== was ==ein '''shice'''==...och.bla\n" +
-                            "* ein \n" +
-                            "*==test=
=\n" + - "** doppelt\n" + - "* ''tess*sst''\n" + - "*** xyz\n" + - "=]*** huch\n" + - "* ehehe***\n" + - "* blubb\n" + - "bliblablo\n\n\n" + - "* blubb\n" + - "{|border=-1\n" + - "|-\n" + - "||bla|| blubb\n" + - "|-\n" + - "||align center|och||huch||\n" + - "|}\n" + - "\n" + - "# bla\n" + - "# blubb\n" + - "'''''ehehehe''''', ne?!\n" + - "[http://www/index.html,ne?!] -\n" + - "[[Image:blubb|BLA]] ---- och\n" + - " blubb1\n" + - " blubb2\n" + - ":doppel-blubb[= huch =]\n" + - ";hier:da\n" + - ";dort:und so\n" + - ";;und:doppelt\n\n\n\n" + - "[[Image:blubb|BLA]]"; - // text = "[=\n=]* bla"; - String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + - "[=denk ich=] mal =]"; - final long l = System.currentTimeMillis(); - t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080"); - System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); - System.out.println("--- --- ---"); - System.out.println(t); - } - - public String transform(final String content) { - return parse(content, null); - } - - public String transform(final byte[] content) throws UnsupportedEncodingException { - return parse(new String(content, "UTF-8"), null); - } - - public String transform( - final byte[] content, final String encoding) throws UnsupportedEncodingException { - return parse(new String(content, encoding), null); - } - - private String parse(String text, final String publicAddress) { - tokens = new Token[] { - new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), - new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), - new LinkToken((publicAddress == null) ? sb.peers.mySeed().getPublicAddress() : publicAddress, "Wiki.html?page=", sb), - new ListToken('*', "ul"), - new ListToken('#', "ol"), - new ListToken(':', "blockquote", null), - new ListToken(' ', null, "tt", false), - new DefinitionListToken(), - new TableToken() - }; - final ArrayList r = new ArrayList(); - for (int i = 0, k, j; i < tokens.length; i++) - if (tokens[i].getBlockElementNames() != null) - for (j = 0; j < tokens[i].getBlockElementNames().length; j++) { - if (tokens[i].getBlockElementNames()[j] == null) continue; - if ((k = tokens[i].getBlockElementNames()[j].indexOf(' ')) > 1) { - r.add(tokens[i].getBlockElementNames()[j].substring(0, k)); - } else { - r.add(tokens[i].getBlockElementNames()[j]); - } - } - r.add("hr"); - BEs = r.toArray(new String[r.size()]); - - Text[] tt = Text.split2Texts(text, "[=", "=]"); - for (int i=0; i", ""); - for (int i=0; i"); - } - - private String replaceBRs(final String text) { - final StringBuilder stringBuffer = new StringBuilder(text.length()); - final String[] tt = text.split("\n"); - boolean replace; - for (int i=0, j; i")) { replace = false; break; } - stringBuffer.append(tt[i]); - if (i < tt.length - 1) { - if (replace) stringBuffer.append("
"); - stringBuffer.append("\n"); - } - } - return new String(stringBuffer); - } - - private static class Text { - - public static final String escapeNewLine = "@"; - - private String text; - private final boolean nl; - - public Text(final String text, final boolean newLineBefore) { - this.text = text; - this.nl = newLineBefore; - } - - public String setText(final String text) { - if (this.nl) { - this.text = text.substring(escapeNewLine.length()); - } else { - this.text = text; - } - return this.text; - } - - public String getTextPlain() { - return this.text; - } - - public String getText() { - if (this.nl) { - return escapeNewLine + this.text; - } - return this.text; - } - - @Override - public String toString() { - return this.text; - } - - static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) { - - if (text == null) return null; - - if (text.length() < 2) return new Text[] {new Text(text, true) }; - - final int startLen = escapeBegin.length(); - final int endLen = escapeEnd.length(); - final ArrayList r = new ArrayList(); - boolean escaped = text.startsWith(escapeBegin); - if (escaped) r.add(new Text("", true)); - int i, j = 0; - while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { - r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); - j = i; - escaped = !escaped; - } - r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); - return r.toArray(new Text[r.size()]); - } - - private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) { - if (to == -1) to = text.length(); - return new Text( - text.substring(from, to), - from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n')); - } - - static String mergeTexts(final Text[] texts) { - final StringBuilder sb = new StringBuilder(2000); - for (int n=0; n < texts.length; n++) { - sb.append(texts[n].getTextPlain()); - } - return new String(sb); - } - } -}