diff --git a/source/de/anomic/data/list/ListAccumulator.java b/source/de/anomic/data/list/ListAccumulator.java
index 7e3d2ac2b..e73d7b8ad 100644
--- a/source/de/anomic/data/list/ListAccumulator.java
+++ b/source/de/anomic/data/list/ListAccumulator.java
@@ -4,9 +4,9 @@
//
// (C) 2009 by Marc Nause
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
diff --git a/source/de/anomic/data/list/XMLBlacklistImporter.java b/source/de/anomic/data/list/XMLBlacklistImporter.java
index 5b09eba22..cbae9f235 100644
--- a/source/de/anomic/data/list/XMLBlacklistImporter.java
+++ b/source/de/anomic/data/list/XMLBlacklistImporter.java
@@ -4,9 +4,9 @@
//
// (C) 2009 by Marc Nause
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
diff --git a/source/de/anomic/data/wiki/abstractWikiParser.java b/source/de/anomic/data/wiki/abstractWikiParser.java
index a7255cb1c..9ac61b036 100644
--- a/source/de/anomic/data/wiki/abstractWikiParser.java
+++ b/source/de/anomic/data/wiki/abstractWikiParser.java
@@ -1,3 +1,28 @@
+// abstractWikiParser.java
+// ---------
+// part of YaCy
+// (C) by Michael Peter Christen; mc@yacy.net
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2007
+//
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
package de.anomic.data.wiki;
import java.io.BufferedReader;
diff --git a/source/de/anomic/data/wiki/knwikiParser.java b/source/de/anomic/data/wiki/knwikiParser.java
index db67786fd..d067513ed 100644
--- a/source/de/anomic/data/wiki/knwikiParser.java
+++ b/source/de/anomic/data/wiki/knwikiParser.java
@@ -1,4 +1,4 @@
-// wikiParser.java
+// knwikiParser.java
// ---------
// part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -42,55 +42,55 @@ import de.anomic.search.Switchboard;
public class knwikiParser implements wikiParser {
- public Token[] tokens;
- private String[] BEs;
+ public Token[] tokens;
+ private String[] BEs;
private final Switchboard sb;
private knwikiParser(final Switchboard sb) {
this.sb = sb;
}
- public static void main(final String[] args) {
- final String text = "===T
itle===\n" +
- "==blubb== was ==ein '''shice'''==...och.bla\n" +
- "* ein \n" +
- "*==test=
=\n" +
- "** doppelt\n" +
- "* ''tess*sst''\n" +
- "*** xyz\n" +
- "=]*** huch\n" +
- "* ehehe***\n" +
- "* blubb\n" +
- "bliblablo\n\n\n" +
- "* blubb\n" +
- "{|border=-1\n" +
- "|-\n" +
- "||bla|| blubb\n" +
- "|-\n" +
- "||align center|och||huch||\n" +
- "|}\n" +
- "\n" +
- "# bla\n" +
- "# blubb\n" +
- "'''''ehehehe''''', ne?!\n" +
- "[http://www/index.html,ne?!] -\n" +
- "[[Image:blubb|BLA]] ---- och\n" +
- " blubb1\n" +
- " blubb2\n" +
- ":doppel-blubb[= huch =]\n" +
- ";hier:da\n" +
- ";dort:und so\n" +
- ";;und:doppelt\n\n\n\n" +
- "[[Image:blubb|BLA]]";
- // text = "[=\n=]* bla";
- String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," +
- "[=denk ich=] mal =]";
- final long l = System.currentTimeMillis();
- t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080");
+ public static void main(final String[] args) {
+ final String text = "===Title===\n" +
+ "==blubb== was ==ein '''shice'''==...och.bla\n" +
+ "* ein \n" +
+ "*==test=
=\n" +
+ "** doppelt\n" +
+ "* ''tess*sst''\n" +
+ "*** xyz\n" +
+ "=]*** huch\n" +
+ "* ehehe***\n" +
+ "* blubb\n" +
+ "bliblablo\n\n\n" +
+ "* blubb\n" +
+ "{|border=-1\n" +
+ "|-\n" +
+ "||bla|| blubb\n" +
+ "|-\n" +
+ "||align center|och||huch||\n" +
+ "|}\n" +
+ "\n" +
+ "# bla\n" +
+ "# blubb\n" +
+ "'''''ehehehe''''', ne?!\n" +
+ "[http://www/index.html,ne?!] -\n" +
+ "[[Image:blubb|BLA]] ---- och\n" +
+ " blubb1\n" +
+ " blubb2\n" +
+ ":doppel-blubb[= huch =]\n" +
+ ";hier:da\n" +
+ ";dort:und so\n" +
+ ";;und:doppelt\n\n\n\n" +
+ "[[Image:blubb|BLA]]";
+ // text = "[=\n=]* bla";
+ String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," +
+ "[=denk ich=] mal =]";
+ final long l = System.currentTimeMillis();
+ t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text, "localhost:8080");
System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms");
System.out.println("--- --- ---");
System.out.println(t);
- }
+ }
public String transform(final String content) {
return parse(content, null);
@@ -105,7 +105,7 @@ public class knwikiParser implements wikiParser {
return parse(new String(content, encoding), null);
}
- private String parse(String text, final String publicAddress) {
+ private String parse(String text, final String publicAddress) {
tokens = new Token[] {
new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true),
new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false),
@@ -130,124 +130,130 @@ public class knwikiParser implements wikiParser {
}
r.add("hr");
BEs = r.toArray(new String[r.size()]);
-
+
Text[] tt = Text.split2Texts(text, "[=", "=]");
for (int i=0; i", "");
for (int i=0; i");
- }
+ text = new String(m.appendTail(stringBuffer));
+ }
+ }
+ return text.replaceAll("----", "
");
+ }
- private String replaceBRs(final String text) {
- final StringBuilder sb = new StringBuilder(text.length());
- final String[] tt = text.split("\n");
- boolean replace;
- for (int i=0, j; i")) { replace = false; break; }
- sb.append(tt[i]);
+ private String replaceBRs(final String text) {
+ final StringBuilder stringBuffer = new StringBuilder(text.length());
+ final String[] tt = text.split("\n");
+ boolean replace;
+ for (int i=0, j; i")) { replace = false; break; }
+ stringBuffer.append(tt[i]);
if (i < tt.length - 1) {
- if (replace) sb.append("
");
- sb.append("\n");
+ if (replace) stringBuffer.append("
");
+ stringBuffer.append("\n");
}
- }
- return new String(sb);
- }
+ }
+ return new String(stringBuffer);
+ }
- private static class Text {
-
- public static final String escapeNewLine = "@";
-
- private String text;
- private final boolean nl;
-
- public Text(final String text, final boolean escaped, final boolean newLineBefore) {
- this.text = text;
- this.nl = newLineBefore;
+ private static class Text {
+
+ public static final String escapeNewLine = "@";
+
+ private String text;
+ private final boolean nl;
+
+ public Text(final String text, final boolean escaped, final boolean newLineBefore) {
+ this.text = text;
+ this.nl = newLineBefore;
}
- public String setText(final String text) {
- if (this.nl)
- this.text = text.substring(escapeNewLine.length());
- else
- this.text = text;
- return this.text;
- }
+ public String setText(final String text) {
+ if (this.nl) {
+ this.text = text.substring(escapeNewLine.length());
+ } else {
+ this.text = text;
+ }
+ return this.text;
+ }
- public String getTextPlain() { return this.text; }
- public String getText() {
- if (this.nl)
- return escapeNewLine + this.text;
- return this.text;
- }
+ public String getTextPlain() {
+ return this.text;
+ }
+
+ public String getText() {
+ if (this.nl) {
+ return escapeNewLine + this.text;
+ }
+ return this.text;
+ }
- public String toString() { return this.text; }
+ @Override
+ public String toString() {
+ return this.text;
+ }
- static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) {
- if (text == null) return null;
- if (text.length() < 2) return new Text[] { new Text(text, false, true) };
-
- final int startLen = escapeBegin.length();
+ static Text[] split2Texts(final String text, final String escapeBegin, final String escapeEnd) {
+
+ if (text == null) return null;
+
+ if (text.length() < 2) return new Text[] {new Text(text, false, true) };
+
+ final int startLen = escapeBegin.length();
final int endLen = escapeEnd.length();
- final ArrayList r = new ArrayList();
- boolean escaped = text.startsWith(escapeBegin);
- if (escaped) r.add(new Text("", false, true));
- int i, j = 0;
- while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) {
- r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd));
- j = i;
- escaped = !escaped;
- }
- r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd));
- return r.toArray(new Text[r.size()]);
- }
+ final ArrayList r = new ArrayList();
+ boolean escaped = text.startsWith(escapeBegin);
+ if (escaped) r.add(new Text("", false, true));
+ int i, j = 0;
+ while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) {
+ r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd));
+ j = i;
+ escaped = !escaped;
+ }
+ r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd));
+ return r.toArray(new Text[r.size()]);
+ }
- private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) {
- if (to == -1) to = text.length();
- return new Text(
- text.substring(from, to),
- escaped,
- from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n'));
- }
+ private static Text resolve2Text(final String text, final boolean escaped, final int from, int to, final String escapeEnd) {
+ if (to == -1) to = text.length();
+ return new Text(
+ text.substring(from, to),
+ escaped,
+ from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n'));
+ }
- static String mergeTexts(final Text[] texts) {
- final StringBuilder sb = new StringBuilder(2000);
- for (int n=0; n < texts.length; n++)
- sb.append(texts[n].getTextPlain());
- return new String(sb);
- }
- }
+ static String mergeTexts(final Text[] texts) {
+ final StringBuilder sb = new StringBuilder(2000);
+ for (int n=0; n < texts.length; n++) {
+ sb.append(texts[n].getTextPlain());
+ }
+ return new String(sb);
+ }
+ }
}
diff --git a/source/de/anomic/data/wiki/tokens/AbstractToken.java b/source/de/anomic/data/wiki/tokens/AbstractToken.java
index e7b4b9013..bf5d5c97a 100644
--- a/source/de/anomic/data/wiki/tokens/AbstractToken.java
+++ b/source/de/anomic/data/wiki/tokens/AbstractToken.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -32,20 +32,32 @@ import de.anomic.data.wiki.wikiParserException;
public abstract class AbstractToken implements Token {
- protected String text = null;
- protected String markup = null;
- protected boolean parsed = false;
-
- protected abstract void parse() throws wikiParserException;
-
- public String getMarkup() throws wikiParserException {
- if (this.text == null)
- throw new IllegalArgumentException();
- if (!this.parsed) parse();
- return this.markup;
- }
+ protected String text = null;
+ protected String markup = null;
+ protected boolean parsed = false;
+
+ protected abstract void parse() throws wikiParserException;
+
+ public String getMarkup() throws wikiParserException {
+ if (this.text == null) {
+ throw new IllegalArgumentException();
+ }
+ if (!this.parsed) {
+ parse();
+ }
+ return this.markup;
+ }
- public String getText() { return this.text; }
+ public String getText() {
+ return this.text;
+ }
- public String toString() { try { return getMarkup(); } catch (final wikiParserException e) { return null; } }
+ @Override
+ public String toString() {
+ try {
+ return getMarkup();
+ } catch (final wikiParserException e) {
+ return null;
+ }
+ }
}
diff --git a/source/de/anomic/data/wiki/tokens/DefinitionListToken.java b/source/de/anomic/data/wiki/tokens/DefinitionListToken.java
index 27dd80ad4..eff7648d7 100644
--- a/source/de/anomic/data/wiki/tokens/DefinitionListToken.java
+++ b/source/de/anomic/data/wiki/tokens/DefinitionListToken.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -30,39 +30,45 @@ package de.anomic.data.wiki.tokens;
public class DefinitionListToken extends ListToken {
- //private static final String[] blockElements = { "dl", "dt", "dd" };
+ //private static final String[] blockElements = { "dl", "dt", "dd" };
- public DefinitionListToken() {
- super(';', null, null);
- }
+ public DefinitionListToken() {
+ super(';', null, null);
+ }
- protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
- sb.append("\n");
- while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) {
- for (int j=0; j");
+ @Override
+ protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
+ sb.append("\n");
+ while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) {
+ for (int j=0; j");
- if (getGrade(t[super.aktline]) > depth) {
- parse(t, depth + 1, sb);
- } else {
- sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "- "));
- }
+ if (getGrade(t[super.aktline]) > depth) {
+ parse(t, depth + 1, sb);
+ } else {
+ sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "
- "));
+ }
- sb.append("");
- if (t[super.aktline].indexOf(':') == -1 || getGrade(t[super.aktline]) > depth)
- sb.append("dt");
- else
- sb.append("dd");
- sb.append(">\n");
- super.aktline++;
- }
- for (int j=0; j");
- super.aktline--;
- return sb;
- }
+ sb.append("");
+ if (t[super.aktline].indexOf(':') == -1 || getGrade(t[super.aktline]) > depth) {
+ sb.append("dt");
+ } else {
+ sb.append("dd");
+ }
+ sb.append(">\n");
+ super.aktline++;
+ }
+ for (int j=0; j");
+ super.aktline--;
+ return sb;
+ }
- public String[] getBlockElementNames() {
- return blockElements;
- }
+ @Override
+ public String[] getBlockElementNames() {
+ return blockElements;
+ }
+
}
diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java
index 525ea7d44..2c65cab5a 100644
--- a/source/de/anomic/data/wiki/tokens/LinkToken.java
+++ b/source/de/anomic/data/wiki/tokens/LinkToken.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -41,96 +41,102 @@ import de.anomic.search.Switchboard;
public class LinkToken extends AbstractToken {
- private static final int IMG = 0;
+ private static final int IMG = 0;
private static final int BKM = 1;
- private static final int INT = 2;
- private static final int EXT = 3;
+ private static final int INT = 2;
+ private static final int EXT = 3;
- private static final Pattern imgPattern = Pattern.compile(
- "\\[\\[" + // begin
- "(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL
- "(" + //
- "(\\|(bottom|left|center|right|middle|top))?" + // optional align
- "(\\|(([^\\]]|\\][^\\]])*))" + // description
- ")?" + //
- "\\]\\]"); // end
+ private static final Pattern imgPattern = Pattern.compile(
+ "\\[\\[" + // begin
+ "(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL
+ "(" + //
+ "(\\|(bottom|left|center|right|middle|top))?" + // optional align
+ "(\\|(([^\\]]|\\][^\\]])*))" + // description
+ ")?" + //
+ "\\]\\]"); // end
private static final Pattern bkmPattern = Pattern.compile(
- "\\[\\[" + // begin
- "(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL
- "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description
- "\\]\\]"); // end
-
- private static final Pattern intPattern = Pattern.compile(
- "\\[\\[" + // begin
- "(([^\\]|]|\\][^\\]])*?)" + // wiki-page
- "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption
- "\\]\\]"); // end
+ "\\[\\[" + // begin
+ "(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL
+ "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description
+ "\\]\\]"); // end
+
+ private static final Pattern intPattern = Pattern.compile(
+ "\\[\\[" + // begin
+ "(([^\\]|]|\\][^\\]])*?)" + // wiki-page
+ "(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption
+ "\\]\\]"); // end
- private static final Pattern extPattern = Pattern.compile(
- "\\[" + // begin
- "([^\\] ]*)" + // URL
- "( ([^\\]]*))?" + // optional description
- "\\]"); // end
+ private static final Pattern extPattern = Pattern.compile(
+ "\\[" + // begin
+ "([^\\] ]*)" + // URL
+ "( ([^\\]]*))?" + // optional description
+ "\\]"); // end
- private static final Pattern[] patterns = new Pattern[] {
- imgPattern, bkmPattern, intPattern, extPattern };
+ private static final Pattern[] patterns = new Pattern[] { imgPattern, bkmPattern, intPattern, extPattern };
- private final String localhost;
- private final String wikiPath;
+ private final String localhost;
+ private final String wikiPath;
private final Switchboard sb;
- private int patternNr = 0;
+ private int patternNr = 0;
- public LinkToken(final String localhost, final String wikiPath, final Switchboard sb) {
- this.localhost = localhost;
- this.wikiPath = wikiPath;
+ public LinkToken(final String localhost, final String wikiPath, final Switchboard sb) {
+ this.localhost = localhost;
+ this.wikiPath = wikiPath;
this.sb = sb;
- }
+ }
- protected void parse() throws wikiParserException {
- final StringBuilder sb = new StringBuilder();
- if (this.patternNr < 0 || this.patternNr >= patterns.length)
+ protected void parse() throws wikiParserException {
+ final StringBuilder stringBuilder = new StringBuilder();
+
+ if (this.patternNr < 0 || this.patternNr >= patterns.length) {
throw new wikiParserException("patternNr was not set correctly: " + this.patternNr);
- final Matcher m = patterns[this.patternNr].matcher(this.text);
- if (!m.find())
+ }
+
+ final Matcher m = patterns[this.patternNr].matcher(this.text);
+
+ if (!m.find()) {
throw new wikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text);
+ }
switch (this.patternNr) {
- case IMG:
- sb.append("
");
- break;
+ case IMG:
+ stringBuilder.append("
");
+ break;
case BKM:
final Link[] links = getLinksFromBookmarkTag(m.group(2));
if (links == null) {
- sb.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'.");
+ stringBuilder.append("Couldn't find Bookmark-Tag '").append(m.group(2)).append("'.");
} else {
- appendLinks(links, sb);
+ appendLinks(links, stringBuilder);
}
break;
- case INT:
- sb.append(new Link(
- "http://" + this.localhost + "/" + this.wikiPath + m.group(1),
- m.group(4),
- (m.group(4) == null) ? m.group(1) : m.group(4)
- ).toString());
- break;
+ case INT:
+ stringBuilder.append(new Link(
+ "http://" + this.localhost + "/" + this.wikiPath + m.group(1),
+ m.group(4),
+ (m.group(4) == null) ? m.group(1) : m.group(4)
+ ).toString());
+ break;
- case EXT:
- sb.append(new Link(
- m.group(1),
- m.group(3),
- (m.group(3) == null) ? m.group(1) : m.group(3)
- ).toString());
- break;
- }
- this.parsed = true;
- this.markup = new String(sb);
- }
+ case EXT:
+ stringBuilder.append(new Link(
+ m.group(1),
+ m.group(3),
+ (m.group(3) == null) ? m.group(1) : m.group(3)
+ ).toString());
+ break;
+ }
+ this.parsed = true;
+ this.markup = new String(stringBuilder);
+ }
private String formatHref(final String link) {
if (link.indexOf("://") == -1) { // DATA/HTDOCS-link
@@ -171,25 +177,35 @@ public class LinkToken extends AbstractToken {
this.desc = desc;
}
+ @Override
public String toString() {
- final StringBuilder sb = new StringBuilder();
- sb.append("");
- if (this.desc == null) sb.append(this.href); else sb.append(this.desc);
- sb.append("");
- return new String(sb);
+ final StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append("");
+ if (this.desc == null) stringBuilder.append(this.href); else stringBuilder.append(this.desc);
+ stringBuilder.append("");
+ return new String(stringBuilder);
}
}
- public String[] getBlockElementNames() { return null; }
- public Pattern[] getRegex() { return patterns; }
+ public String[] getBlockElementNames() {
+ return null;
+ }
+
+ public Pattern[] getRegex() {
+ return patterns;
+ }
- public boolean setText(final String text, final int patternNr) {
- this.text = text;
- this.patternNr = patternNr;
- this.parsed = false;
- if (text == null) { this.markup = null; this.patternNr = -1; }
- return true;
- }
+ public boolean setText(final String text, final int patternNr) {
+ this.text = text;
+ this.patternNr = patternNr;
+ this.parsed = false;
+ if (text == null) {
+ this.markup = null;
+ this.patternNr = -1;
+ }
+ return true;
+ }
+
}
diff --git a/source/de/anomic/data/wiki/tokens/ListToken.java b/source/de/anomic/data/wiki/tokens/ListToken.java
index 5062f9e62..7fc6bca52 100644
--- a/source/de/anomic/data/wiki/tokens/ListToken.java
+++ b/source/de/anomic/data/wiki/tokens/ListToken.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -33,111 +33,97 @@ import java.util.regex.Pattern;
public class ListToken extends AbstractToken {
- protected final String[] blockElements;
-
- protected final char firstChar;
- protected final String listBlockElement;
- protected final String listElement;
- protected final boolean recursion;
- protected final Pattern[] pattern;
-
- protected int aktline = 0;
-
- public ListToken(final char firstChar, final String listBlockElement) {
- this.firstChar = firstChar;
- this.listBlockElement = listBlockElement;
- this.listElement = "li";
- this.recursion = true;
- this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
- final ArrayList r = new ArrayList();
- if (this.listBlockElement != null) {
- if (this.recursion) r.add(this.listBlockElement);
- if (this.listElement != null) r.add(this.listElement);
- }
- blockElements = r.toArray(new String[r.size()]);
- }
+ protected final String[] blockElements;
+
+ protected final char firstChar;
+ protected final String listBlockElement;
+ protected final String listElement;
+ protected final boolean recursion;
+ protected final Pattern[] pattern;
+
+ protected int aktline = 0;
+
+ public ListToken(final char firstChar, final String listBlockElement) {
+ this(firstChar, listBlockElement, "li");
+ }
- public ListToken(final char firstChar, final String listBlockElement, final String listElement) {
- this.firstChar = firstChar;
- this.listBlockElement = listBlockElement;
- this.listElement = listElement;
- this.recursion = true;
- this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
- final ArrayList r = new ArrayList();
- if (this.listBlockElement != null) {
- if (this.recursion) r.add(this.listBlockElement);
- if (this.listElement != null) r.add(this.listElement);
- }
- blockElements = r.toArray(new String[r.size()]);
- }
+ public ListToken(final char firstChar, final String listBlockElement, final String listElement) {
+ this(firstChar, listBlockElement, listElement, true);
+ }
- public ListToken(final char firstChar, final String listBlockElement, final String listElement, final boolean recursion) {
- this.firstChar = firstChar;
- this.listBlockElement = listBlockElement;
- this.listElement = listElement;
- this.recursion = recursion;
- this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
- final ArrayList r = new ArrayList();
- if (this.listBlockElement != null) {
- if (this.recursion) r.add(this.listBlockElement);
- if (this.listElement != null) r.add(this.listElement);
- }
- blockElements = r.toArray(new String[r.size()]);
- }
+ public ListToken(final char firstChar, final String listBlockElement, final String listElement, final boolean recursion) {
+ this.firstChar = firstChar;
+ this.listBlockElement = listBlockElement;
+ this.listElement = listElement;
+ this.recursion = recursion;
+ this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
+ final ArrayList r = new ArrayList();
+ if (this.listBlockElement != null) {
+ if (this.recursion) {
+ r.add(this.listBlockElement);
+ }
+ if (this.listElement != null) {
+ r.add(this.listElement);
+ }
+ }
+ blockElements = r.toArray(new String[r.size()]);
+ }
- protected void parse() {
- final StringBuilder sb = new StringBuilder(this.text.length());
- parse(this.text.split("\n"), 0, sb);
- this.markup = new String(sb);
- this.parsed = true;
- }
+ protected void parse() {
+ final StringBuilder sb = new StringBuilder(this.text.length());
+ parse(this.text.split("\n"), 0, sb);
+ this.markup = new String(sb);
+ this.parsed = true;
+ }
- protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
- if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n");
- while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) {
- if (recursion) for (int j=0; j");
-
- if (this.recursion && getGrade(t[this.aktline]) > depth) {
- parse(t, depth + 1, sb);
- } else {
- sb.append(t[this.aktline].substring(depth + 1));
- }
-
- if (this.listElement != null) sb.append("").append(this.listElement).append(">");
- sb.append("\n");
- this.aktline++;
- }
- if (this.recursion) for (int j=0; j");
- this.aktline--;
- return sb;
- }
+ protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
+ if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n");
+ while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) {
+ if (recursion) for (int j=0; j");
+
+ if (this.recursion && getGrade(t[this.aktline]) > depth) {
+ parse(t, depth + 1, sb);
+ } else {
+ sb.append(t[this.aktline].substring(depth + 1));
+ }
+
+ if (this.listElement != null) sb.append("").append(this.listElement).append(">");
+ sb.append("\n");
+ this.aktline++;
+ }
+ if (this.recursion) for (int j=0; j");
+ this.aktline--;
+ return sb;
+ }
- protected int getGrade(final String t) {
- int i = 0;
- for (i=0; i r = new ArrayList();
- int j;
- for (i = 0; i < definitionList.length; i++)
- if (definitionList[i] != null)
- for (j = 0; j < definitionList[i].length; j++)
- r.add(definitionList[i][j]);
- this.blockElements = r.toArray(new String[r.size()]);
- } else {
- this.blockElements = null;
- }
-
- for (i=0; i r = new ArrayList();
+ int j;
+ for (i = 0; i < definitionList.length; i++)
+ if (definitionList[i] != null)
+ for (j = 0; j < definitionList[i].length; j++)
+ r.add(definitionList[i][j]);
+ this.blockElements = r.toArray(new String[r.size()]);
+ } else {
+ this.blockElements = null;
+ }
+
+ for (i=0; i= this.definitionList.length || (e = this.definitionList[this.grade]) == null)
- throw new wikiParserException("Token not defined for grade: " + this.grade);
- this.markup = getMarkup(e);
- this.parsed = true;
- }
+ @Override
+ public String getMarkup() throws wikiParserException {
+ if (this.content == null) {
+ if (this.text == null) {
+ throw new IllegalArgumentException();
+ }
+ setText(this.text, 0);
+ }
+ if (!this.parsed) parse();
+ return this.markup;
+ }
+
+ protected void parse() throws wikiParserException {
+ String[] e;
+ if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null)
+ throw new wikiParserException("Token not defined for grade: " + this.grade);
+ this.markup = getMarkup(e);
+ this.parsed = true;
+ }
- protected String getMarkup(final String[] es) {
- return getMarkup(es, false) + this.content + getMarkup(es, true);
- }
+ protected String getMarkup(final String[] es) {
+ return getMarkup(es, false) + this.content + getMarkup(es, true);
+ }
- protected String getMarkup(final String[] es, final boolean closing) {
- final StringBuilder result = new StringBuilder();
- // backwards if closing
- for (
- int i = (closing) ? es.length - 1 : 0, j;
- (closing && i >= 0) ^ (!closing && i < es.length);
- i += (closing) ? -1 : +1
- ) {
- result.append("<");
- if (closing) {
- result.append("/");
- if ((j = es[i].indexOf(' ')) > -1) {
- result.append(es[i].substring(0, j));
- } else {
- result.append(es[i]);
- }
- } else {
- result.append(es[i]);
- }
- result.append(">");
- }
- return new String(result);
- }
+ protected String getMarkup(final String[] es, final boolean closing) {
+ final StringBuilder result = new StringBuilder();
+ // backwards if closing
+ for (
+ int i = (closing) ? es.length - 1 : 0, j;
+ (closing && i >= 0) ^ (!closing && i < es.length);
+ i += (closing) ? -1 : +1
+ ) {
+ result.append("<");
+ if (closing) {
+ result.append("/");
+ if ((j = es[i].indexOf(' ')) > -1) {
+ result.append(es[i].substring(0, j));
+ } else {
+ result.append(es[i]);
+ }
+ } else {
+ result.append(es[i]);
+ }
+ result.append(">");
+ }
+ return new String(result);
+ }
- public boolean setText(final String text, final int patternNr) {
- this.text = text;
- this.markup = null;
- this.parsed = false;
- if (text != null) {
- final Matcher m = getRegex()[0].matcher(text);
- if (
- (m.matches()) &&
- (m.group(1).length() == m.group(3).length()) &&
- (definitionList.length >= m.group(1).length()) &&
- (definitionList[m.group(1).length() - 1] != null)
- ) {
- this.grade = m.group(1).length() - 1;
- this.content = m.group(2);
- return true;
- }
- }
- return false;
- }
+ public boolean setText(final String text, final int patternNr) {
+ this.text = text;
+ this.markup = null;
+ this.parsed = false;
+ if (text != null) {
+ final Matcher m = getRegex()[0].matcher(text);
+ if (
+ (m.matches()) &&
+ (m.group(1).length() == m.group(3).length()) &&
+ (definitionList.length >= m.group(1).length()) &&
+ (definitionList[m.group(1).length() - 1] != null)
+ ) {
+ this.grade = m.group(1).length() - 1;
+ this.content = m.group(2);
+ return true;
+ }
+ }
+ return false;
+ }
- public Pattern[] getRegex() { return this.pattern; }
- public String[] getBlockElementNames() { return this.blockElements; }
+ public Pattern[] getRegex() {
+ return this.pattern;
+ }
+
+ public String[] getBlockElementNames() {
+ return this.blockElements;
+ }
+
}
diff --git a/source/de/anomic/data/wiki/tokens/TableToken.java b/source/de/anomic/data/wiki/tokens/TableToken.java
index f917983ab..8e5cbbc87 100644
--- a/source/de/anomic/data/wiki/tokens/TableToken.java
+++ b/source/de/anomic/data/wiki/tokens/TableToken.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -34,46 +34,49 @@ import java.util.regex.Pattern;
public class TableToken extends AbstractToken {
- private static final Pattern[] pattern = new Pattern[] {
- Pattern.compile(
- "\\{\\|" + // "{|"
- "([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-"
- "\\|\\}") // "|}"
- };
- private static final String[] blockElementNames = new String[] { "table", "tr", "td" };
+ private static final Pattern[] pattern = new Pattern[] {
+ Pattern.compile(
+ "\\{\\|" + // "{|"
+ "([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-"
+ "\\|\\}") // "|}"
+ };
+
+ private static final String[] blockElementNames = new String[] { "table", "tr", "td" };
- protected void parse() {
- final String[] t = text.split("\n");
- String[] tds;
- final StringBuilder sb = new StringBuilder();
- sb.append(" 2) sb.append(parseTableProperties(t[0].substring(2)));
- sb.append(">\n");
- boolean trOpen = false;
- for (int i=1, j, a; i\n");
- trOpen = (i < t.length - 2);
- if (trOpen) sb.append("\t\n");
- } else if (t[i].startsWith("||")) {
- tds = t[i].split("\\|\\|");
- for (j=0; j (a = tds[j].indexOf('|')) + 1) { // don't print empty td's
- sb.append("\t\t -1) sb.append(parseTableProperties(tds[j].substring(0, a)));
- sb.append(">").append(tds[j].substring(a + 1)).append(" | \n");
- }
- }
- }
- }
- if (trOpen) sb.append("\t
\n");
- this.markup = new String(sb.append("
"));
- this.parsed = true;
- }
+ protected void parse() {
+ final String[] t = text.split("\n");
+ String[] tds;
+ final StringBuilder sb = new StringBuilder();
+ sb.append(" 2) sb.append(parseTableProperties(t[0].substring(2)));
+ sb.append(">\n");
+ boolean trOpen = false;
+ for (int i=1, j, a; i\n");
+ trOpen = (i < t.length - 2);
+ if (trOpen) sb.append("\t\n");
+ } else if (t[i].startsWith("||")) {
+ tds = t[i].split("\\|\\|");
+ for (j=0; j (a = tds[j].indexOf('|')) + 1) { // don't print empty td's
+ sb.append("\t\t -1) sb.append(parseTableProperties(tds[j].substring(0, a)));
+ sb.append(">").append(tds[j].substring(a + 1)).append(" | \n");
+ }
+ }
+ }
+ }
+ if (trOpen) sb.append("\t
\n");
+ this.markup = new String(sb.append("
"));
+ this.parsed = true;
+ }
// from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB]
- private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" };
+ private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" };
+
private static final HashMap ps = new HashMap();
+
static {
Arrays.sort(tps);
String[] array;
@@ -87,7 +90,7 @@ public class TableToken extends AbstractToken {
ps.put("align", array);
}
- // contributed by [MN]
+ // contributed by [MN]
/** This method takes possible table properties and tests if they are valid.
* Valid in this case means if they are a property for the table, tr or td
* tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
@@ -102,21 +105,21 @@ public class TableToken extends AbstractToken {
String[] posVals;
final int numberofvalues = values.length;
for (int i=0; i= 0) ||
- (Arrays.binarySearch(tps, key) >= 0 && value.matches("\\d+"))
- ) {
- addPair(key, value, sb);
- }
- }
+ value = values[++i].trim();
+ if (
+ (key.equals("summary")) ||
+ (key.equals("bgcolor") && value.matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}")) ||
+ ((key.equals("width") || key.equals("height")) && value.matches("\\d+%{0,1}")) ||
+ ((posVals = ps.get(key)) != null && Arrays.binarySearch(posVals, value) >= 0) ||
+ (Arrays.binarySearch(tps, key) >= 0 && value.matches("\\d+"))
+ ) {
+ addPair(key, value, sb);
+ }
+ }
}
return sb;
}
@@ -125,13 +128,19 @@ public class TableToken extends AbstractToken {
return sb.append(" ").append(key).append("=\"").append(value).append("\"");
}
- public Pattern[] getRegex() { return pattern; }
- public String[] getBlockElementNames() { return blockElementNames; }
+ public Pattern[] getRegex() {
+ return pattern;
+ }
+
+ public String[] getBlockElementNames() {
+ return blockElementNames;
+ }
- public boolean setText(final String text, final int patternNr) {
- this.text = text;
- this.parsed = false;
- this.markup = null;
- return true;
- }
+ public boolean setText(final String text, final int patternNr) {
+ this.text = text;
+ this.parsed = false;
+ this.markup = null;
+ return true;
+ }
+
}
diff --git a/source/de/anomic/data/wiki/tokens/Token.java b/source/de/anomic/data/wiki/tokens/Token.java
index 89d6271b8..c12ae0cc6 100644
--- a/source/de/anomic/data/wiki/tokens/Token.java
+++ b/source/de/anomic/data/wiki/tokens/Token.java
@@ -8,9 +8,9 @@
//
// This file is contributed by Franz Brausze
//
-// $LastChangedDate: $
-// $LastChangedRevision: $
-// $LastChangedBy: $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -34,9 +34,9 @@ import de.anomic.data.wiki.wikiParserException;
public interface Token {
- public Pattern[] getRegex();
- public boolean setText(String text, int patternNr);
- public String getText();
- public String getMarkup() throws wikiParserException;
- public String[] getBlockElementNames();
+ public Pattern[] getRegex();
+ public boolean setText(String text, int patternNr);
+ public String getText();
+ public String getMarkup() throws wikiParserException;
+ public String[] getBlockElementNames();
}
diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java
index 563c6ea2f..ef7485d57 100644
--- a/source/de/anomic/data/wiki/wikiBoard.java
+++ b/source/de/anomic/data/wiki/wikiBoard.java
@@ -3,18 +3,21 @@
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
-//last major change: 20.07.2004
-
+//
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
+//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
-
+//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
-
+//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -41,7 +44,7 @@ public class wikiBoard {
public static final int keyLength = 64;
private static final String dateFormat = "yyyyMMddHHmmss";
- static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat);
+ private static final SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat);
static {
SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
diff --git a/source/de/anomic/data/wiki/wikiParser.java b/source/de/anomic/data/wiki/wikiParser.java
index 5fa2d0d50..f63ee13d1 100644
--- a/source/de/anomic/data/wiki/wikiParser.java
+++ b/source/de/anomic/data/wiki/wikiParser.java
@@ -1,3 +1,28 @@
+// wikiParser.java
+// ---------
+// part of YaCy
+// (C) by Michael Peter Christen; mc@yacy.net
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2007
+//
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
package de.anomic.data.wiki;
import java.io.UnsupportedEncodingException;
diff --git a/source/de/anomic/data/wiki/wikiParserException.java b/source/de/anomic/data/wiki/wikiParserException.java
index 7a7a7822b..1d6627b48 100644
--- a/source/de/anomic/data/wiki/wikiParserException.java
+++ b/source/de/anomic/data/wiki/wikiParserException.java
@@ -1,3 +1,28 @@
+// wikiParserException.java
+// ---------
+// part of YaCy
+// (C) by Michael Peter Christen; mc@yacy.net
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2007
+//
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
package de.anomic.data.wiki;
public class wikiParserException extends Exception {