git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7104 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
5f391fcfa9
commit
fa2eb9676e
@ -1,258 +0,0 @@
|
||||
// knwikiParser.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import de.anomic.data.wiki.tokens.DefinitionListToken;
|
||||
import de.anomic.data.wiki.tokens.LinkToken;
|
||||
import de.anomic.data.wiki.tokens.ListToken;
|
||||
import de.anomic.data.wiki.tokens.SimpleToken;
|
||||
import de.anomic.data.wiki.tokens.TableToken;
|
||||
import de.anomic.data.wiki.tokens.Token;
|
||||
import de.anomic.search.Switchboard;
|
||||
|
||||
public class knwikiParser implements wikiParser {
|
||||
|
||||
public Token[] tokens;
|
||||
private String[] BEs;
|
||||
private final Switchboard sb;
|
||||
|
||||
private knwikiParser(final Switchboard sb) {
|
||||
this.sb = sb;
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final String text = "===T<pre>itle===\n" +
|
||||
"==blubb== was ==ein '''shice'''==...och.bla\n" +
|
||||
"* ein \n" +
|
||||
"*==test=</pre>=\n" +
|
||||
"** doppelt\n" +
|
||||
"* ''tess*sst''\n" +
|
||||
"*** xyz\n" +
|
||||
"=]*** huch\n" +
|
||||
"* ehehe***\n" +
|
||||
"* blubb\n" +
|
||||
"bliblablo\n\n\n" +
|
||||
"* blubb\n" +
|
||||
"{|border=-1\n" +
|
||||
"|-\n" +
|
||||
"||bla|| blubb\n" +
|
||||
"|-\n" +
|
||||
"||align center|och||huch||\n" +
|
||||
"|}\n" +
|
||||
"\n" +
|
||||
"# bla\n" +
|
||||
"# blubb\n" +
|
||||
"'''''ehehehe''''', ne?!\n" +
|
||||
"[http://www/index.html,ne?!] -\n" +
|
||||
"[[Image:blubb|BLA]] ---- och\n" +
|
||||
" blubb1\n" +
|
||||
" blubb2\n" +
|
||||
":doppel-blubb[= huch =]\n" +
|
||||
";hier:da\n" +
|
||||
";dort:und so\n" +
|
||||
";;und:doppelt\n\n\n\n" +
|
||||
"[[Image:blubb|BLA]]";
|
||||
// text = "[=\n=]* bla";
|
||||
String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," +
|
||||
"[=denk ich=] mal =]";
|
||||
final long l = System.currentTimeMillis();
|
||||
t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080");
|
||||
System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms");
|
||||
System.out.println("--- --- ---");
|
||||
System.out.println(t);
|
||||
}
|
||||
|
||||
public String transform(final String content) {
|
||||
return parse(content, null);
|
||||
}
|
||||
|
||||
public String transform(final byte[] content) throws UnsupportedEncodingException {
|
||||
return parse(new String(content, "UTF-8"), null);
|
||||
}
|
||||
|
||||
public String transform(
|
||||
final byte[] content, final String encoding) throws UnsupportedEncodingException {
|
||||
return parse(new String(content, encoding), null);
|
||||
}
|
||||
|
||||
private String parse(String text, final String publicAddress) {
|
||||
tokens = new Token[] {
|
||||
new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true),
|
||||
new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false),
|
||||
new LinkToken((publicAddress == null) ? sb.peers.mySeed().getPublicAddress() : publicAddress, "Wiki.html?page=", sb),
|
||||
new ListToken('*', "ul"),
|
||||
new ListToken('#', "ol"),
|
||||
new ListToken(':', "blockquote", null),
|
||||
new ListToken(' ', null, "tt", false),
|
||||
new DefinitionListToken(),
|
||||
new TableToken()
|
||||
};
|
||||
final ArrayList<String> r = new ArrayList<String>();
|
||||
for (int i = 0, k, j; i < tokens.length; i++)
|
||||
if (tokens[i].getBlockElementNames() != null)
|
||||
for (j = 0; j < tokens[i].getBlockElementNames().length; j++) {
|
||||
if (tokens[i].getBlockElementNames()[j] == null) continue;
|
||||
if ((k = tokens[i].getBlockElementNames()[j].indexOf(' ')) > 1) {
|
||||
r.add(tokens[i].getBlockElementNames()[j].substring(0, k));
|
||||
} else {
|
||||
r.add(tokens[i].getBlockElementNames()[j]);
|
||||
}
|
||||
}
|
||||
r.add("hr");
|
||||
BEs = r.toArray(new String[r.size()]);
|
||||
|
||||
Text[] tt = Text.split2Texts(text, "[=", "=]");
|
||||
for (int i=0; i<tt.length; i+=2)
|
||||
tt[i].setText(parseUnescaped(tt[i].getText()));
|
||||
text = Text.mergeTexts(tt);
|
||||
|
||||
tt = Text.split2Texts(text, "<pre>", "</pre>");
|
||||
for (int i=0; i<tt.length; i+=2)
|
||||
tt[i].setText(replaceBRs(tt[i].getText()));
|
||||
return Text.mergeTexts(tt);
|
||||
}
|
||||
|
||||
private String parseUnescaped(String text) {
|
||||
Token st;
|
||||
Matcher m;
|
||||
StringBuffer stringBuffer;
|
||||
for (int i=0; i<tokens.length; i++) {
|
||||
st = tokens[i];
|
||||
for (int j=0; j<st.getRegex().length; j++) {
|
||||
m = st.getRegex()[j].matcher(text);
|
||||
stringBuffer = new StringBuffer();
|
||||
while (m.find()) try {
|
||||
if (!st.setText(m.group(), j)) {
|
||||
continue;
|
||||
}
|
||||
m.appendReplacement(stringBuffer, (st.getMarkup() == null) ? m.group() : st.getMarkup());
|
||||
} catch (final wikiParserException e) {
|
||||
m.appendReplacement(stringBuffer, st.getText());
|
||||
}
|
||||
text = new String(m.appendTail(stringBuffer));
|
||||
}
|
||||
}
|
||||
return text.replaceAll("----", "<hr />");
|
||||
}
|
||||
|
||||
private String replaceBRs(final String text) {
|
||||
final StringBuilder stringBuffer = new StringBuilder(text.length());
|
||||
final String[] tt = text.split("\n");
|
||||
boolean replace;
|
||||
for (int i=0, j; i<tt.length; i++) {
|
||||
replace = true;
|
||||
for (j=0; j<BEs.length; j++)
|
||||
if (tt[i].endsWith(BEs[j] + ">")) { replace = false; break; }
|
||||
stringBuffer.append(tt[i]);
|
||||
if (i < tt.length - 1) {
|
||||
if (replace) stringBuffer.append("<br />");
|
||||
stringBuffer.append("\n");
|
||||
}
|
||||
}
|
||||
return new String(stringBuffer);
|
||||
}
|
||||
|
||||
private static class Text {
|
||||
|
||||
public static final String escapeNewLine = "@";
|
||||
|
||||
private String text;
|
||||
private final boolean nl;
|
||||
|
||||
public Text(final String text, final boolean newLineBefore) {
|
||||
this.text = text;
|
||||
this.nl = newLineBefore;
|
||||
}
|
||||
|
||||
public String setText(final String text) {
|
||||
if (this.nl) {
|
||||
this.text = text.substring(escapeNewLine.length());
|
||||
} else {
|
||||
this.text = text;
|
||||
}
|
||||
return this.text;
|
||||
}
|
||||
|
||||
public String getTextPlain() {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
if (this.nl) {
|
||||
return escapeNewLine + this.text;
|
||||
}
|
||||
return this.text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) {
|
||||
|
||||
if (text == null) return null;
|
||||
|
||||
if (text.length() < 2) return new Text[] {new Text(text, true) };
|
||||
|
||||
final int startLen = escapeBegin.length();
|
||||
final int endLen = escapeEnd.length();
|
||||
final ArrayList<Text> r = new ArrayList<Text>();
|
||||
boolean escaped = text.startsWith(escapeBegin);
|
||||
if (escaped) r.add(new Text("", true));
|
||||
int i, j = 0;
|
||||
while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) {
|
||||
r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd));
|
||||
j = i;
|
||||
escaped = !escaped;
|
||||
}
|
||||
r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd));
|
||||
return r.toArray(new Text[r.size()]);
|
||||
}
|
||||
|
||||
private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) {
|
||||
if (to == -1) to = text.length();
|
||||
return new Text(
|
||||
text.substring(from, to),
|
||||
from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n'));
|
||||
}
|
||||
|
||||
static String mergeTexts(final Text[] texts) {
|
||||
final StringBuilder sb = new StringBuilder(2000);
|
||||
for (int n=0; n < texts.length; n++) {
|
||||
sb.append(texts[n].getTextPlain());
|
||||
}
|
||||
return new String(sb);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue