// wikiParser.java // --------- // part of YaCy // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2007 // Created 22.02.2007 // // This file is contributed by Franz Brausze // // $LastChangedDate: $ // $LastChangedRevision: $ // $LastChangedBy: $ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package de.anomic.data.wiki; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.regex.Matcher; import de.anomic.data.wiki.tokens.DefinitionListToken; import de.anomic.data.wiki.tokens.LinkToken; import de.anomic.data.wiki.tokens.ListToken; import de.anomic.data.wiki.tokens.SimpleToken; import de.anomic.data.wiki.tokens.TableToken; import de.anomic.data.wiki.tokens.Token; import de.anomic.plasma.plasmaSwitchboard; public class knwikiParser implements wikiParser { public Token[] tokens; private String[] BEs; private final plasmaSwitchboard sb; public knwikiParser(final plasmaSwitchboard sb) { this.sb = sb; } public static void main(final String[] args) { final String text = "===T
itle===\n" + "==blubb== was ==ein '''shice'''==...och.bla\n" + "* ein \n" + "*==test==\n" + "** doppelt\n" + "* ''tess*sst''\n" + "*** xyz\n" + "=]*** huch\n" + "* ehehe***\n" + "* blubb\n" + "bliblablo\n\n\n" + "* blubb\n" + "{|border=-1\n" + "|-\n" + "||bla|| blubb\n" + "|-\n" + "||align center|och||huch||\n" + "|}\n" + "\n" + "# bla\n" + "# blubb\n" + "'''''ehehehe''''', ne?!\n" + "[http://www/index.html,ne?!] -\n" + "[[Image:blubb|BLA]] ---- och\n" + " blubb1\n" + " blubb2\n" + ":doppel-blubb[= huch =]\n" + ";hier:da\n" + ";dort:und so\n" + ";;und:doppelt\n\n\n\n" + "[[Image:blubb|BLA]]"; // text = "[=\n=]* bla"; String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + "[=denk ich=] mal =]"; final long l = System.currentTimeMillis(); t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080"); System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); System.out.println("--- --- ---"); System.out.println(t); } public String transform(final String content) { return parse(content, null); } public String transform(final String content, final plasmaSwitchboard sb) { return parse(content, null); } public String transform(final byte[] content) throws UnsupportedEncodingException { return parse(new String(content, "UTF-8"), null); } public String transform( final byte[] content, final String encoding, final plasmaSwitchboard switchboard) throws UnsupportedEncodingException { return parse(new String(content, encoding), null); } public String transform(final byte[] content, final String encoding) throws UnsupportedEncodingException { return parse(new String(content, encoding), null); } public String transform(final byte[] text, final String encoding, final String publicAddress) throws UnsupportedEncodingException { return parse(new String(text, encoding), publicAddress); } public String transform(final String text, final String publicAddress) { return parse(text, publicAddress); } public String parse(String text, final String publicAddress) { tokens = new Token[] { new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), new LinkToken((publicAddress == null) ? sb.webIndex.seedDB.mySeed().getPublicAddress() : publicAddress, "Wiki.html?page=", sb), new ListToken('*', "ul"), new ListToken('#', "ol"), new ListToken(':', "blockquote", null), new ListToken(' ', null, "tt", false), new DefinitionListToken(), new TableToken() }; final ArrayList