- added blacklist support to manual URLFetcher stack fill - fix for NPE: http://www.yacy-forum.de/viewtopic.php?t=3559 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3385 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
f7803a6ce4
commit
0c7b8cf632
@ -0,0 +1,23 @@
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
public abstract class AbstractToken implements Token {
|
||||
|
||||
protected String text = null;
|
||||
protected String markup = null;
|
||||
protected boolean parsed = false;
|
||||
|
||||
protected abstract boolean parse();
|
||||
|
||||
public String getMarkup() {
|
||||
if (this.text == null)
|
||||
throw new IllegalArgumentException();
|
||||
if (!this.parsed && !parse()) return this.text;
|
||||
return this.markup;
|
||||
}
|
||||
|
||||
public String getText() { return this.text; }
|
||||
|
||||
@Override
|
||||
public String toString() { return getMarkup(); }
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
public class DefinitionListToken extends ListToken {
|
||||
|
||||
private static final String[] blockElements = { "dl", "dt", "dd" };
|
||||
|
||||
public DefinitionListToken() {
|
||||
super(';', null, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StringBuffer parse(String[] t, int depth, StringBuffer sb) {
|
||||
sb.append("<dl>\n");
|
||||
while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) {
|
||||
for (int j=0; j<depth + 1; j++) sb.append("\t");
|
||||
sb.append("<dt>");
|
||||
|
||||
if (getGrade(t[super.aktline]) > depth) {
|
||||
parse(t, depth + 1, sb);
|
||||
} else {
|
||||
sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "</dt><dd>"));
|
||||
}
|
||||
|
||||
sb.append("</");
|
||||
if (t[super.aktline].indexOf(':') == -1 || getGrade(t[super.aktline]) > depth)
|
||||
sb.append("dt");
|
||||
else
|
||||
sb.append("dd");
|
||||
sb.append(">\n");
|
||||
super.aktline++;
|
||||
}
|
||||
for (int j=0; j<depth; j++) sb.append("\t");
|
||||
sb.append("</dl>");
|
||||
super.aktline--;
|
||||
return sb;
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return blockElements;
|
||||
}
|
||||
}
|
@ -0,0 +1,106 @@
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class LinkToken extends AbstractToken {
|
||||
|
||||
private static final int IMG = 0;
|
||||
private static final int INT = 1;
|
||||
private static final int EXT = 2;
|
||||
|
||||
private static final Pattern imgPattern = Pattern.compile(
|
||||
"\\[\\[" + // begin
|
||||
"(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL
|
||||
"(" + // <optional>
|
||||
"(\\|(bottom|left|center|right|middle|top))?" + // optional align
|
||||
"(\\|(([^\\]]|\\][^\\]])*))" + // description
|
||||
")?" + // </optional>
|
||||
"\\]\\]"); // end
|
||||
|
||||
private static final Pattern intPattern = Pattern.compile(
|
||||
"\\[\\[" + // begin
|
||||
"(([^\\]|]|\\][^\\]])*?)" + // wiki-page
|
||||
"(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption
|
||||
"\\]\\]"); // end
|
||||
|
||||
private static final Pattern extPattern = Pattern.compile(
|
||||
"\\[" + // begin
|
||||
"([^\\] ]*)" + // URL
|
||||
"( ([^\\]]*))?" + // optional description
|
||||
"\\]"); // end
|
||||
|
||||
private static final Pattern[] patterns = new Pattern[] {
|
||||
imgPattern, intPattern, extPattern };
|
||||
|
||||
private final String localhost;
|
||||
private final String wikiPath;
|
||||
private int patternNr = 0;
|
||||
|
||||
public LinkToken(String localhost, String wikiPath) {
|
||||
this.localhost = localhost;
|
||||
this.wikiPath = wikiPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean parse() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Matcher m;
|
||||
switch (this.patternNr) {
|
||||
case IMG:
|
||||
m = imgPattern.matcher(this.text);
|
||||
if (!m.find()) return false;
|
||||
sb.append("<img src=\"").append(formatLink(m.group(1))).append("\"");
|
||||
if (m.group(5) != null) sb.append(" align=\"").append(m.group(5)).append("\"");
|
||||
if (m.group(7) != null) sb.append(" alt=\"").append(m.group(7)).append("\"");
|
||||
sb.append(" />");
|
||||
break;
|
||||
|
||||
case INT:
|
||||
m = intPattern.matcher(this.text);
|
||||
if (!m.find()) return false;
|
||||
sb.append("<a href=\"").append("http://").append(this.localhost)
|
||||
.append("/").append(this.wikiPath).append(m.group(1))
|
||||
.append("\"");
|
||||
if (m.group(4) != null) sb.append(" title=\"").append(m.group(3)).append("\"");
|
||||
sb.append(">");
|
||||
if (m.group(4) != null) sb.append(m.group(4)); else sb.append(m.group(1));
|
||||
sb.append("</a>");
|
||||
break;
|
||||
|
||||
case EXT:
|
||||
m = extPattern.matcher(this.text);
|
||||
if (!m.find()) return false;
|
||||
sb.append("<a href=\"").append(formatLink(m.group(1))).append("\"");
|
||||
if (m.group(3) != null) sb.append(" title=\"").append(m.group(3)).append("\"");
|
||||
sb.append(">");
|
||||
if (m.group(3) != null) sb.append(m.group(3)); else sb.append(m.group(1));
|
||||
sb.append("</a>");
|
||||
break;
|
||||
|
||||
default: return false;
|
||||
}
|
||||
this.parsed = true;
|
||||
this.markup = new String(sb);
|
||||
return true;
|
||||
}
|
||||
|
||||
private String formatLink(String link) {
|
||||
if (link.indexOf("://") == -1) { // DATA/HTDOCS-link
|
||||
return "http://" + this.localhost + "/" + link;
|
||||
} else { // 'normal' link
|
||||
return link;
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() { return null; }
|
||||
public Pattern[] getRegex() { return patterns; }
|
||||
|
||||
public boolean setText(String text, int patternNr) {
|
||||
this.text = text;
|
||||
this.patternNr = patternNr;
|
||||
this.parsed = false;
|
||||
if (text == null) { this.markup = null; this.patternNr = -1; }
|
||||
return true;
|
||||
}
|
||||
}
|
@ -0,0 +1,118 @@
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ListToken extends AbstractToken {
|
||||
|
||||
protected final String[] blockElements;
|
||||
|
||||
protected final char firstChar;
|
||||
protected final String listBlockElement;
|
||||
protected final String listElement;
|
||||
protected final boolean recursion;
|
||||
protected final Pattern[] pattern;
|
||||
|
||||
protected int aktline = 0;
|
||||
|
||||
public ListToken(char firstChar, String listBlockElement) {
|
||||
this.firstChar = firstChar;
|
||||
this.listBlockElement = listBlockElement;
|
||||
this.listElement = "li";
|
||||
this.recursion = true;
|
||||
this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
|
||||
ArrayList<String> r = new ArrayList<String>();
|
||||
if (this.listBlockElement != null) {
|
||||
if (this.recursion) r.add(this.listBlockElement);
|
||||
if (this.listElement != null) r.add(this.listElement);
|
||||
}
|
||||
blockElements = r.toArray(new String[r.size()]);
|
||||
}
|
||||
|
||||
public ListToken(char firstChar, String listBlockElement, String listElement) {
|
||||
this.firstChar = firstChar;
|
||||
this.listBlockElement = listBlockElement;
|
||||
this.listElement = listElement;
|
||||
this.recursion = true;
|
||||
this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
|
||||
ArrayList<String> r = new ArrayList<String>();
|
||||
if (this.listBlockElement != null) {
|
||||
if (this.recursion) r.add(this.listBlockElement);
|
||||
if (this.listElement != null) r.add(this.listElement);
|
||||
}
|
||||
blockElements = r.toArray(new String[r.size()]);
|
||||
}
|
||||
|
||||
public ListToken(char firstChar, String listBlockElement, String listElement, boolean recursion) {
|
||||
this.firstChar = firstChar;
|
||||
this.listBlockElement = listBlockElement;
|
||||
this.listElement = listElement;
|
||||
this.recursion = recursion;
|
||||
this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
|
||||
ArrayList<String> r = new ArrayList<String>();
|
||||
if (this.listBlockElement != null) {
|
||||
if (this.recursion) r.add(this.listBlockElement);
|
||||
if (this.listElement != null) r.add(this.listElement);
|
||||
}
|
||||
blockElements = r.toArray(new String[r.size()]);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean parse() {
|
||||
StringBuffer sb = new StringBuffer(this.text.length());
|
||||
parse(this.text.split("\n"), 0, sb);
|
||||
this.markup = new String(sb);
|
||||
this.parsed = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected StringBuffer parse(String[] t, int depth, StringBuffer sb) {
|
||||
if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n");
|
||||
while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) {
|
||||
if (recursion) for (int j=0; j<depth + 1; j++) sb.append("\t");
|
||||
if (this.listElement != null) sb.append("<").append(this.listElement).append(">");
|
||||
|
||||
if (this.recursion && getGrade(t[this.aktline]) > depth) {
|
||||
parse(t, depth + 1, sb);
|
||||
} else {
|
||||
sb.append(t[this.aktline].substring(depth + 1));
|
||||
}
|
||||
|
||||
if (this.listElement != null) sb.append("</").append(this.listElement).append(">");
|
||||
sb.append("\n");
|
||||
this.aktline++;
|
||||
}
|
||||
if (this.recursion) for (int j=0; j<depth; j++) sb.append("\t");
|
||||
if (this.listBlockElement != null) sb.append("</").append(this.listBlockElement).append(">");
|
||||
this.aktline--;
|
||||
return sb;
|
||||
}
|
||||
|
||||
protected int getGrade(String t) {
|
||||
int i = 0;
|
||||
for (i=0; i<t.length(); i++)
|
||||
if (t.charAt(i) != this.firstChar) break;
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return blockElements;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() {
|
||||
return this.pattern;
|
||||
}
|
||||
|
||||
public char getFirstChar() {
|
||||
return this.firstChar;
|
||||
}
|
||||
|
||||
public boolean setText(String text, int patternNr) {
|
||||
this.text = text;
|
||||
this.markup = null;
|
||||
this.parsed = false;
|
||||
this.aktline = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class SimpleToken extends AbstractToken {
|
||||
|
||||
protected String content = null;
|
||||
protected int grade = 0;
|
||||
|
||||
protected final Pattern[] pattern;
|
||||
protected MatchResult mresult = null;
|
||||
private final String[][] definitionList;
|
||||
private final String[] blockElements;
|
||||
|
||||
public SimpleToken(char firstChar, char lastChar, String[][] definitionList, boolean isBlockElements) {
|
||||
this.definitionList = definitionList;
|
||||
int i;
|
||||
if (isBlockElements) {
|
||||
ArrayList<String> r = new ArrayList<String>();
|
||||
int j;
|
||||
for (i=0; i<definitionList.length; i++)
|
||||
if (definitionList[i] != null)
|
||||
for (j=0; j<definitionList[i].length; j++)
|
||||
r.add(definitionList[i][j]);
|
||||
this.blockElements = r.toArray(new String[r.size()]);
|
||||
} else {
|
||||
this.blockElements = null;
|
||||
}
|
||||
|
||||
for (i=0; i<definitionList.length; i++)
|
||||
if (definitionList[i] != null) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
this.pattern = new Pattern[] { Pattern.compile(String.format(
|
||||
"([\\%s]{%d,%d})(.*?)([\\%s]{%d,%d})",
|
||||
firstChar, i, definitionList.length,
|
||||
lastChar, i, definitionList.length)) };
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMarkup() {
|
||||
if (this.content == null) {
|
||||
if (this.text == null) {
|
||||
throw new IllegalArgumentException();
|
||||
} else {
|
||||
setText(this.text, 0);
|
||||
}
|
||||
}
|
||||
if (!this.parsed && !parse()) return this.text;
|
||||
return this.markup;
|
||||
}
|
||||
|
||||
protected boolean parse() {
|
||||
String[] e;
|
||||
if ((e = definitionList[this.grade]) == null || definitionList.length <= this.grade) {
|
||||
System.err.println("token not defined for grade: " + this.grade);
|
||||
return false;
|
||||
}
|
||||
this.markup = getMarkup(e);
|
||||
this.parsed = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected String getMarkup(String[] es) {
|
||||
return getMarkup(es, false) + this.content + getMarkup(es, true);
|
||||
}
|
||||
|
||||
protected String getMarkup(String[] es, boolean closing) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
// backwards if closing
|
||||
for (
|
||||
int i = (closing) ? es.length - 1 : 0, j;
|
||||
(closing && i >= 0) ^ (!closing && i < es.length);
|
||||
i += (closing) ? -1 : +1
|
||||
) {
|
||||
result.append("<");
|
||||
if (closing) {
|
||||
result.append("/");
|
||||
if ((j = es[i].indexOf(' ')) > -1) {
|
||||
result.append(es[i].substring(0, j));
|
||||
} else {
|
||||
result.append(es[i]);
|
||||
}
|
||||
} else {
|
||||
result.append(es[i]);
|
||||
}
|
||||
result.append(">");
|
||||
}
|
||||
return new String(result);
|
||||
}
|
||||
|
||||
public boolean setText(String text, int patternNr) {
|
||||
this.text = text;
|
||||
this.markup = null;
|
||||
this.parsed = false;
|
||||
if (text != null) {
|
||||
Matcher m = getRegex()[0].matcher(text);
|
||||
if (
|
||||
(m.matches()) &&
|
||||
(m.group(1).length() == m.group(3).length()) &&
|
||||
(definitionList.length >= m.group(1).length()) &&
|
||||
(definitionList[m.group(1).length() - 1] != null)
|
||||
) {
|
||||
this.grade = m.group(1).length() - 1;
|
||||
this.content = m.group(2);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() { return this.pattern; }
|
||||
public String[] getBlockElementNames() { return this.blockElements; }
|
||||
}
|
@ -0,0 +1,118 @@
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TableToken extends AbstractToken {
|
||||
|
||||
private static final Pattern[] pattern = new Pattern[] {
|
||||
Pattern.compile(
|
||||
"\\{\\|" + // "{|"
|
||||
"([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-"
|
||||
"\\|\\}") // "|}"
|
||||
};
|
||||
private static final String[] blockElementNames = new String[] { "table", "tr", "td" };
|
||||
|
||||
@Override
|
||||
protected boolean parse() {
|
||||
String[] t = text.split("\n");
|
||||
String[] tds;
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("<table");
|
||||
if (t[0].length() > 2) sb.append(parseTableProperties(t[0].substring(2)));
|
||||
sb.append(">\n");
|
||||
boolean trOpen = false;
|
||||
for (int i=1, j, a; i<t.length-1; i++) {
|
||||
if (t[i].startsWith("|-")) {
|
||||
if (trOpen) sb.append("\t</tr>\n");
|
||||
if (trOpen = (i < t.length - 2)) sb.append("\t<tr>\n");
|
||||
} else if (t[i].startsWith("||")) {
|
||||
tds = t[i].split("\\|\\|");
|
||||
for (j=0; j<tds.length; j++) {
|
||||
if (tds[j].length() > (a = tds[j].indexOf('|')) + 1) { // don't print empty td's
|
||||
sb.append("\t\t<td");
|
||||
if (a > -1) sb.append(parseTableProperties(tds[j].substring(0, a)));
|
||||
sb.append(">").append(tds[j].substring(a + 1)).append("</td>\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (trOpen) sb.append("\t</tr>\n");
|
||||
this.markup = new String(sb.append("</table>"));
|
||||
this.parsed = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB]
|
||||
private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" };
|
||||
private static final HashMap<String,String[]> ps = new HashMap<String,String[]>();
|
||||
static {
|
||||
ps.put("frame", new String[] { "void", "above", "below", "hsides", "lhs", "rhs", "vsides", "box", "border" });
|
||||
ps.put("rules", new String[] { "none", "groups", "rows", "cols", "all" });
|
||||
ps.put("valign", new String[] { "top", "middle", "bottom", "baseline" });
|
||||
ps.put("align", new String[] { "left", "right", "center" });
|
||||
}
|
||||
|
||||
// contributed by [MN]
|
||||
/** This method takes possible table properties and tests if they are valid.
|
||||
* Valid in this case means if they are a property for the table, tr or td
|
||||
* tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
|
||||
* The method is important to avoid XSS attacks on the wiki via table properties.
|
||||
* @param str A string that may contain several table properties and/or junk.
|
||||
* @return A string that only contains table properties.
|
||||
*/
|
||||
private static StringBuffer parseTableProperties(final String properties){
|
||||
String[] values = properties.replaceAll(""", "").split("[= ]"); //splitting the string at = and blanks
|
||||
StringBuffer sb = new StringBuffer(properties.length());
|
||||
Iterator<String> it;
|
||||
String key, valkey, value;
|
||||
int numberofvalues = values.length;
|
||||
main: for (int i=0; i<numberofvalues; i++) {
|
||||
valkey = values[i].trim();
|
||||
if (i + 1 < numberofvalues) {
|
||||
value = values[++i].trim();
|
||||
if (
|
||||
valkey.equals("summary") ||
|
||||
(valkey.equals("bgcolor") && value.matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}")) ||
|
||||
((valkey.equals("width") || valkey.equals("height")) && value.matches("\\d+%{0,1}")) ||
|
||||
(isInArray(tps, valkey) && value.matches("\\d+"))
|
||||
) {
|
||||
addPair(valkey, value, sb);
|
||||
continue;
|
||||
}
|
||||
it = ps.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
key = it.next();
|
||||
if (valkey.equals(key) && isInArray(ps.get(key), value)) {
|
||||
addPair(valkey, value, sb);
|
||||
continue main;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (valkey.equals("nowrap"))
|
||||
sb.append(" nowrap");
|
||||
}
|
||||
return sb;
|
||||
}
|
||||
|
||||
private static StringBuffer addPair(String val1, String val2, StringBuffer sb) {
|
||||
return sb.append(" ").append(val1).append("=\"").append(val2).append("\"");
|
||||
}
|
||||
|
||||
private static boolean isInArray(Object[] array, Object find) {
|
||||
for (int i=array.length-1; i>-1; i--)
|
||||
if (array[i].equals(find)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() { return pattern; }
|
||||
public String[] getBlockElementNames() { return blockElementNames; }
|
||||
|
||||
public boolean setText(String text, int patternNr) {
|
||||
this.text = text;
|
||||
this.parsed = false;
|
||||
this.markup = null;
|
||||
return true;
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public interface Token {
|
||||
|
||||
public Pattern[] getRegex();
|
||||
public boolean setText(String text, int patternNr);
|
||||
public String getText();
|
||||
public String getMarkup();
|
||||
public String[] getBlockElementNames();
|
||||
}
|
@ -0,0 +1,206 @@
|
||||
package de.anomic.data.wiki;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import de.anomic.data.wiki.tokens.DefinitionListToken;
|
||||
import de.anomic.data.wiki.tokens.LinkToken;
|
||||
import de.anomic.data.wiki.tokens.ListToken;
|
||||
import de.anomic.data.wiki.tokens.SimpleToken;
|
||||
import de.anomic.data.wiki.tokens.TableToken;
|
||||
import de.anomic.data.wiki.tokens.Token;
|
||||
|
||||
public class wikiParser {
|
||||
|
||||
public static final Token[] tokens = {
|
||||
new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true),
|
||||
new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false),
|
||||
new LinkToken("localhost:8080", "Wiki.html?page="),
|
||||
new ListToken('*', "ul"),
|
||||
new ListToken('#', "ol"),
|
||||
new ListToken(':', "blockquote", null),
|
||||
new ListToken(' ', null, "tt", false),
|
||||
new DefinitionListToken(),
|
||||
new TableToken()
|
||||
};
|
||||
|
||||
private static final String[] BEs;
|
||||
static {
|
||||
ArrayList<String> r = new ArrayList<String>();
|
||||
for (int i=0, k, j; i<tokens.length; i++)
|
||||
if (tokens[i].getBlockElementNames() != null)
|
||||
for (j=0; j<tokens[i].getBlockElementNames().length; j++) {
|
||||
if (tokens[i].getBlockElementNames()[j] == null) continue;
|
||||
if ((k = tokens[i].getBlockElementNames()[j].indexOf(' ')) > 1) {
|
||||
r.add(tokens[i].getBlockElementNames()[j].substring(0, k));
|
||||
} else {
|
||||
r.add(tokens[i].getBlockElementNames()[j]);
|
||||
}
|
||||
}
|
||||
r.add("hr");
|
||||
BEs = r.toArray(new String[r.size()]);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
String text = "===Title===\n" +
|
||||
"==blubb[== was ==ein '''shice'''==...och.bla\n" +
|
||||
"* ein \n" +
|
||||
"*==test==\n" +
|
||||
"** doppelt\n" +
|
||||
"* ''tess*sst''\n" +
|
||||
"*** xyz\n" +
|
||||
"=]*** huch\n" +
|
||||
"* ehehe***\n" +
|
||||
"* blubb\n" +
|
||||
"bliblablo\n\n\n" +
|
||||
"* blubb\n" +
|
||||
"{|border=-1\n" +
|
||||
"|-\n" +
|
||||
"||bla|| blubb\n" +
|
||||
"|-\n" +
|
||||
"||align center|och||huch||\n" +
|
||||
"|}\n" +
|
||||
"\n" +
|
||||
"# bla\n" +
|
||||
"# blubb\n" +
|
||||
"'''''ehehehe''''', ne?!\n" +
|
||||
"[http://www/index.html,ne?!] -\n" +
|
||||
"[[Image:blubb|BLA]] ---- och\n" +
|
||||
" blubb1\n" +
|
||||
" blubb2\n" +
|
||||
":doppel-blubb[= huch =]\n" +
|
||||
";hier:da\n" +
|
||||
";dort:und so\n" +
|
||||
";;und:doppelt";
|
||||
// text = "[=\n=]* bla";
|
||||
String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," +
|
||||
"[=denk ich=] mal =]";
|
||||
long l = System.currentTimeMillis();
|
||||
t = parse((args.length > 0) ? args[0] : text);
|
||||
System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms");
|
||||
System.out.println("--- --- ---");
|
||||
System.out.println(t);
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// - preParse:
|
||||
// - <pre>~</pre>
|
||||
|
||||
public static String parse(String text) {
|
||||
Text[] tt = Text.split2Texts(text, "[=", "=]");
|
||||
for (int i=0; i<tt.length; i+=2)
|
||||
tt[i].setText(parseUnescaped(tt[i].getText()));
|
||||
return replaceBRs(Text.mergeTexts(tt));
|
||||
}
|
||||
|
||||
public static String parseUnescaped(String text) {
|
||||
Token st;
|
||||
Matcher m;
|
||||
StringBuffer sb;
|
||||
for (int i=0; i<tokens.length; i++) {
|
||||
st = tokens[i];
|
||||
for (int j=0; j<st.getRegex().length; j++) {
|
||||
m = st.getRegex()[j].matcher(text);
|
||||
sb = new StringBuffer();
|
||||
while (m.find()) {
|
||||
//System.out.print("found " + st.getClass().getSimpleName() + ": " +
|
||||
// m.group().replaceAll("\n", "\\\\n").replaceAll("\t", " ") + ", ");
|
||||
if (st.setText(m.group(), j)) {
|
||||
// System.out.println("usable");
|
||||
} else {
|
||||
// System.out.println("not usable");
|
||||
continue;
|
||||
}
|
||||
m.appendReplacement(sb, (st.getMarkup() == null) ? m.group() : st.getMarkup());
|
||||
}
|
||||
text = new String(m.appendTail(sb));
|
||||
}
|
||||
}
|
||||
return text.replaceAll("----", "<hr />");
|
||||
}
|
||||
|
||||
private static String replaceBRs(String text) {
|
||||
StringBuffer sb = new StringBuffer(text.length());
|
||||
String[] tt = text.split("\n");
|
||||
boolean replace;
|
||||
for (int i=0, j; i<tt.length; i++) {
|
||||
replace = true;
|
||||
for (j=0; j<BEs.length; j++)
|
||||
if (tt[i].endsWith(BEs[j] + ">")) { replace = false; break; }
|
||||
sb.append(tt[i]);
|
||||
if (replace && i < tt.length - 1) sb.append("<br />");
|
||||
if (i < tt.length - 1) sb.append("\n");
|
||||
}
|
||||
return new String(sb);
|
||||
}
|
||||
|
||||
private static class Text {
|
||||
|
||||
public static final String escapeNewLine = "@";
|
||||
|
||||
private String text;
|
||||
private final boolean escaped;
|
||||
private final boolean nl;
|
||||
|
||||
public Text(String text, boolean escaped, boolean newLineBefore) {
|
||||
this.text = text;
|
||||
this.escaped = escaped;
|
||||
this.nl = newLineBefore;
|
||||
}
|
||||
|
||||
public String setTextPlain(String text) { return this.text = text; }
|
||||
public String setText(String text) {
|
||||
if (this.nl)
|
||||
this.text = text.substring(escapeNewLine.length());
|
||||
else
|
||||
this.text = text;
|
||||
return this.text;
|
||||
}
|
||||
|
||||
public String getTextPlain() { return this.text; }
|
||||
public String getText() {
|
||||
if (this.nl)
|
||||
return escapeNewLine + this.text;
|
||||
else
|
||||
return this.text;
|
||||
}
|
||||
|
||||
public String toString() { return this.text; }
|
||||
public boolean isEscaped() { return this.escaped; }
|
||||
public boolean isNewLineBefore() { return this.nl; }
|
||||
|
||||
private static Text[] split2Texts(String text, String escapeBegin, String escapeEnd) {
|
||||
if (text == null) return null;
|
||||
if (text.length() < 2) return new Text[] { new Text(text, false, true) };
|
||||
|
||||
int startLen = escapeBegin.length();
|
||||
ArrayList<Text> r = new ArrayList<Text>();
|
||||
boolean escaped = text.startsWith(escapeBegin);
|
||||
if (escaped) r.add(new Text("", false, true));
|
||||
int i, j = 0;
|
||||
while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) {
|
||||
r.add(resolve2Text(text, escaped, (j > 0) ? j + startLen : 0, i, escapeEnd));
|
||||
j = i;
|
||||
escaped = !escaped;
|
||||
}
|
||||
r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + startLen : 0, -1, escapeEnd));
|
||||
return r.toArray(new Text[r.size()]);
|
||||
}
|
||||
|
||||
private static Text resolve2Text(String text, boolean escaped, int from, int to, String escapeEnd) {
|
||||
if (to == -1) to = text.length();
|
||||
return new Text(
|
||||
text.substring(from, to),
|
||||
escaped,
|
||||
from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n'));
|
||||
}
|
||||
|
||||
private static String mergeTexts(Text[] texts) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int n=0; n < texts.length; n++)
|
||||
sb.append(texts[n].getTextPlain());
|
||||
return new String(sb);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue