git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7327 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
db3db0fdb9
commit
3b9aa0504e
@ -1,63 +0,0 @@
|
||||
// AbstractToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import de.anomic.data.wiki.wikiParserException;
|
||||
|
||||
public abstract class AbstractToken implements Token {
|
||||
|
||||
protected String text = null;
|
||||
protected String markup = null;
|
||||
protected boolean parsed = false;
|
||||
|
||||
protected abstract void parse() throws wikiParserException;
|
||||
|
||||
public String getMarkup() throws wikiParserException {
|
||||
if (this.text == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
if (!this.parsed) {
|
||||
parse();
|
||||
}
|
||||
return this.markup;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return getMarkup();
|
||||
} catch (final wikiParserException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
// DefinitionListToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
public class DefinitionListToken extends ListToken {
|
||||
|
||||
//private static final String[] blockElements = { "dl", "dt", "dd" };
|
||||
|
||||
public DefinitionListToken() {
|
||||
super(';', null, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
|
||||
sb.append("<dl>\n");
|
||||
while (super.aktline < t.length && getGrade(t[super.aktline]) >= depth) {
|
||||
for (int j=0; j<depth + 1; j++) sb.append("\t");
|
||||
sb.append("<dt>");
|
||||
|
||||
if (getGrade(t[super.aktline]) > depth) {
|
||||
parse(t, depth + 1, sb);
|
||||
} else {
|
||||
sb.append(t[super.aktline].substring(depth + 1).replaceFirst(":", "</dt><dd>"));
|
||||
}
|
||||
|
||||
sb.append("</");
|
||||
if (t[super.aktline].indexOf(':') == -1 || getGrade(t[super.aktline]) > depth) {
|
||||
sb.append("dt");
|
||||
} else {
|
||||
sb.append("dd");
|
||||
}
|
||||
sb.append(">\n");
|
||||
super.aktline++;
|
||||
}
|
||||
for (int j=0; j<depth; j++) {
|
||||
sb.append("\t");
|
||||
}
|
||||
sb.append("</dl>");
|
||||
super.aktline--;
|
||||
return sb;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] getBlockElementNames() {
|
||||
return blockElements;
|
||||
}
|
||||
|
||||
}
|
@ -1,211 +0,0 @@
|
||||
// LinkToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import de.anomic.data.BookmarkHelper;
|
||||
import de.anomic.data.bookmarksDB;
|
||||
import de.anomic.data.bookmarksDB.Bookmark;
|
||||
import de.anomic.data.wiki.wikiParserException;
|
||||
import de.anomic.search.Switchboard;
|
||||
|
||||
public class LinkToken extends AbstractToken {
|
||||
|
||||
private static final int IMG = 0;
|
||||
private static final int BKM = 1;
|
||||
private static final int INT = 2;
|
||||
private static final int EXT = 3;
|
||||
|
||||
private static final Pattern imgPattern = Pattern.compile(
|
||||
"\\[\\[" + // begin
|
||||
"(Image:([^\\]|]|\\][^\\]])*)" + // "Image:" + URL
|
||||
"(" + // <optional>
|
||||
"(\\|(bottom|left|center|right|middle|top))?" + // optional align
|
||||
"(\\|(([^\\]]|\\][^\\]])*))" + // description
|
||||
")?" + // </optional>
|
||||
"\\]\\]"); // end
|
||||
|
||||
private static final Pattern bkmPattern = Pattern.compile(
|
||||
"\\[\\[" + // begin
|
||||
"(Bookmark:([^\\]|]|\\][^\\]])*)" + // "Bookmark:" + URL
|
||||
"(\\|(([^\\]]|\\][^\\]])*?))?" + // optional description
|
||||
"\\]\\]"); // end
|
||||
|
||||
private static final Pattern intPattern = Pattern.compile(
|
||||
"\\[\\[" + // begin
|
||||
"(([^\\]|]|\\][^\\]])*?)" + // wiki-page
|
||||
"(\\|(([^\\]]|\\][^\\]])*?))?" + // optional desciption
|
||||
"\\]\\]"); // end
|
||||
|
||||
private static final Pattern extPattern = Pattern.compile(
|
||||
"\\[" + // begin
|
||||
"([^\\] ]*)" + // URL
|
||||
"( ([^\\]]*))?" + // optional description
|
||||
"\\]"); // end
|
||||
|
||||
private static final Pattern[] patterns = new Pattern[] { imgPattern, bkmPattern, intPattern, extPattern };
|
||||
|
||||
private final String localhost;
|
||||
private final String wikiPath;
|
||||
private final Switchboard sb;
|
||||
private int patternNr = 0;
|
||||
|
||||
public LinkToken(final String localhost, final String wikiPath, final Switchboard sb) {
|
||||
this.localhost = localhost;
|
||||
this.wikiPath = wikiPath;
|
||||
this.sb = sb;
|
||||
}
|
||||
|
||||
protected void parse() throws wikiParserException {
|
||||
final StringBuilder stringBuilder = new StringBuilder(6000);
|
||||
|
||||
if (this.patternNr < 0 || this.patternNr >= patterns.length) {
|
||||
throw new wikiParserException("patternNr was not set correctly: " + this.patternNr);
|
||||
}
|
||||
|
||||
final Matcher m = patterns[this.patternNr].matcher(this.text);
|
||||
|
||||
if (!m.find()) {
|
||||
throw new wikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text);
|
||||
}
|
||||
|
||||
switch (this.patternNr) {
|
||||
case IMG:
|
||||
stringBuilder.append("<img src=\"").append(formatHref(m.group(1).substring(6))).append("\"");
|
||||
if (m.group(5) != null) {
|
||||
stringBuilder.append(" align=\"").append(m.group(5)).append("\"");
|
||||
}
|
||||
stringBuilder.append(" alt=\"").append((m.group(7) == null) ? formatHref(m.group(1).substring(6)) : m.group(7)).append("\"");
|
||||
stringBuilder.append(" />");
|
||||
break;
|
||||
|
||||
case BKM:
|
||||
final Link[] links = getLinksFromBookmarkTag(m.group(2));
|
||||
if (links == null) {
|
||||
stringBuilder.append("<span class=\"error\">Couldn't find Bookmark-Tag '").append(m.group(2)).append("'.</span>");
|
||||
} else {
|
||||
appendLinks(links, stringBuilder);
|
||||
}
|
||||
break;
|
||||
|
||||
case INT:
|
||||
stringBuilder.append(new Link(
|
||||
"http://" + this.localhost + "/" + this.wikiPath + m.group(1),
|
||||
m.group(4),
|
||||
(m.group(4) == null) ? m.group(1) : m.group(4)
|
||||
).toString());
|
||||
break;
|
||||
|
||||
case EXT:
|
||||
stringBuilder.append(new Link(
|
||||
m.group(1),
|
||||
m.group(3),
|
||||
(m.group(3) == null) ? m.group(1) : m.group(3)
|
||||
).toString());
|
||||
break;
|
||||
}
|
||||
this.parsed = true;
|
||||
this.markup = new String(stringBuilder);
|
||||
}
|
||||
|
||||
private String formatHref(final String link) {
|
||||
if (link.indexOf("://") == -1) { // DATA/HTDOCS-link
|
||||
return "http://" + this.localhost + "/share/" + link;
|
||||
}
|
||||
return link;
|
||||
}
|
||||
|
||||
private StringBuilder appendLinks(final Link[] links, final StringBuilder sb) {
|
||||
for (int i=0; i<links.length; i++)
|
||||
sb.append(links[i].toString());
|
||||
return sb;
|
||||
}
|
||||
|
||||
private Link[] getLinksFromBookmarkTag(final String tagName) {
|
||||
final bookmarksDB.Tag tag = this.sb.bookmarksDB.getTag(BookmarkHelper.tagHash(tagName));
|
||||
if (tag == null) return null;
|
||||
final ArrayList<Link> r = new ArrayList<Link>();
|
||||
final Iterator<String> it = tag.getUrlHashes().iterator();
|
||||
String hash;
|
||||
Bookmark bm;
|
||||
while (it.hasNext())
|
||||
if ((hash = it.next()) != null)
|
||||
if ((bm = this.sb.bookmarksDB.getBookmark(hash)) != null)
|
||||
r.add(new Link(bm.getUrl(), bm.getTitle(), bm.getDescription()));
|
||||
return r.toArray(new Link[r.size()]);
|
||||
}
|
||||
|
||||
private static class Link {
|
||||
|
||||
private final String href;
|
||||
private final String title;
|
||||
private final String desc;
|
||||
|
||||
public Link(final String href, final String title, final String desc) {
|
||||
this.href = href;
|
||||
this.title = title;
|
||||
this.desc = desc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder stringBuilder = new StringBuilder(300);
|
||||
stringBuilder.append("<a href=\"").append(this.href).append("\"");
|
||||
if (this.title != null) stringBuilder.append(" title=\"").append(this.title).append("\"");
|
||||
stringBuilder.append(">");
|
||||
if (this.desc == null) stringBuilder.append(this.href); else stringBuilder.append(this.desc);
|
||||
stringBuilder.append("</a>");
|
||||
return new String(stringBuilder);
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() {
|
||||
return patterns;
|
||||
}
|
||||
|
||||
public boolean setText(final String text, final int patternNr) {
|
||||
this.text = text;
|
||||
this.patternNr = patternNr;
|
||||
this.parsed = false;
|
||||
if (text == null) {
|
||||
this.markup = null;
|
||||
this.patternNr = -1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,129 +0,0 @@
|
||||
// ListToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ListToken extends AbstractToken {
|
||||
|
||||
protected final String[] blockElements;
|
||||
|
||||
protected final char firstChar;
|
||||
protected final String listBlockElement;
|
||||
protected final String listElement;
|
||||
protected final boolean recursion;
|
||||
protected final Pattern[] pattern;
|
||||
|
||||
protected int aktline = 0;
|
||||
|
||||
public ListToken(final char firstChar, final String listBlockElement) {
|
||||
this(firstChar, listBlockElement, "li");
|
||||
}
|
||||
|
||||
public ListToken(final char firstChar, final String listBlockElement, final String listElement) {
|
||||
this(firstChar, listBlockElement, listElement, true);
|
||||
}
|
||||
|
||||
public ListToken(final char firstChar, final String listBlockElement, final String listElement, final boolean recursion) {
|
||||
this.firstChar = firstChar;
|
||||
this.listBlockElement = listBlockElement;
|
||||
this.listElement = listElement;
|
||||
this.recursion = recursion;
|
||||
this.pattern = new Pattern[] { Pattern.compile("^[" + firstChar + "]([^\n]|\n[" + firstChar + "])*", Pattern.MULTILINE) };
|
||||
final ArrayList<String> r = new ArrayList<String>();
|
||||
if (this.listBlockElement != null) {
|
||||
if (this.recursion) {
|
||||
r.add(this.listBlockElement);
|
||||
}
|
||||
if (this.listElement != null) {
|
||||
r.add(this.listElement);
|
||||
}
|
||||
}
|
||||
blockElements = r.toArray(new String[r.size()]);
|
||||
}
|
||||
|
||||
protected void parse() {
|
||||
final StringBuilder sb = new StringBuilder(this.text.length());
|
||||
parse(this.text.split("\n"), 0, sb);
|
||||
this.markup = new String(sb);
|
||||
this.parsed = true;
|
||||
}
|
||||
|
||||
protected StringBuilder parse(final String[] t, final int depth, final StringBuilder sb) {
|
||||
if (this.listBlockElement != null) sb.append("<").append(this.listBlockElement).append(">\n");
|
||||
while (this.aktline < t.length && getGrade(t[this.aktline]) >= depth) {
|
||||
if (recursion) for (int j=0; j<depth + 1; j++) sb.append("\t");
|
||||
if (this.listElement != null) sb.append("<").append(this.listElement).append(">");
|
||||
|
||||
if (this.recursion && getGrade(t[this.aktline]) > depth) {
|
||||
parse(t, depth + 1, sb);
|
||||
} else {
|
||||
sb.append(t[this.aktline].substring(depth + 1));
|
||||
}
|
||||
|
||||
if (this.listElement != null) sb.append("</").append(this.listElement).append(">");
|
||||
sb.append("\n");
|
||||
this.aktline++;
|
||||
}
|
||||
if (this.recursion) for (int j=0; j<depth; j++) sb.append("\t");
|
||||
if (this.listBlockElement != null) sb.append("</").append(this.listBlockElement).append(">");
|
||||
this.aktline--;
|
||||
return sb;
|
||||
}
|
||||
|
||||
protected int getGrade(final String t) {
|
||||
int i = 0;
|
||||
for (i=0; i<t.length(); i++) {
|
||||
if (t.charAt(i) != this.firstChar) break;
|
||||
}
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return blockElements;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() {
|
||||
return this.pattern;
|
||||
}
|
||||
|
||||
public char getFirstChar() {
|
||||
return this.firstChar;
|
||||
}
|
||||
|
||||
public boolean setText(final String text, final int patternNr) {
|
||||
this.text = text;
|
||||
this.markup = null;
|
||||
this.parsed = false;
|
||||
this.aktline = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,152 +0,0 @@
|
||||
// SimpleToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import de.anomic.data.wiki.wikiParserException;
|
||||
|
||||
public class SimpleToken extends AbstractToken {
|
||||
|
||||
protected String content = null;
|
||||
protected int grade = 0;
|
||||
|
||||
protected final Pattern[] pattern;
|
||||
private final String[][] definitionList;
|
||||
private final String[] blockElements;
|
||||
|
||||
public SimpleToken(final char firstChar, final char lastChar, final String[][] definitionList, final boolean isBlockElements) {
|
||||
this.definitionList = definitionList;
|
||||
int i;
|
||||
if (isBlockElements) {
|
||||
final ArrayList<String> r = new ArrayList<String>();
|
||||
int j;
|
||||
for (i = 0; i < definitionList.length; i++)
|
||||
if (definitionList[i] != null)
|
||||
for (j = 0; j < definitionList[i].length; j++)
|
||||
r.add(definitionList[i][j]);
|
||||
this.blockElements = r.toArray(new String[r.size()]);
|
||||
} else {
|
||||
this.blockElements = null;
|
||||
}
|
||||
|
||||
for (i=0; i<definitionList.length; i++) {
|
||||
if (definitionList[i] != null) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this.pattern = new Pattern[] {
|
||||
Pattern.compile(
|
||||
"([\\" + firstChar + "]{" + i + "," + definitionList.length + "})" +
|
||||
"(.*?)" +
|
||||
"([\\" + lastChar + "]{" + i + "," + definitionList.length + "})")
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMarkup() throws wikiParserException {
|
||||
if (this.content == null) {
|
||||
if (this.text == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
setText(this.text, 0);
|
||||
}
|
||||
if (!this.parsed) parse();
|
||||
return this.markup;
|
||||
}
|
||||
|
||||
protected void parse() throws wikiParserException {
|
||||
String[] e;
|
||||
if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null)
|
||||
throw new wikiParserException("Token not defined for grade: " + this.grade);
|
||||
this.markup = getMarkup(e);
|
||||
this.parsed = true;
|
||||
}
|
||||
|
||||
protected String getMarkup(final String[] es) {
|
||||
return getMarkup(es, false) + this.content + getMarkup(es, true);
|
||||
}
|
||||
|
||||
protected String getMarkup(final String[] es, final boolean closing) {
|
||||
final StringBuilder result = new StringBuilder();
|
||||
// backwards if closing
|
||||
for (
|
||||
int i = (closing) ? es.length - 1 : 0, j;
|
||||
(closing && i >= 0) ^ (!closing && i < es.length);
|
||||
i += (closing) ? -1 : +1
|
||||
) {
|
||||
result.append("<");
|
||||
if (closing) {
|
||||
result.append("/");
|
||||
if ((j = es[i].indexOf(' ')) > -1) {
|
||||
result.append(es[i].substring(0, j));
|
||||
} else {
|
||||
result.append(es[i]);
|
||||
}
|
||||
} else {
|
||||
result.append(es[i]);
|
||||
}
|
||||
result.append(">");
|
||||
}
|
||||
return new String(result);
|
||||
}
|
||||
|
||||
public boolean setText(final String text, final int patternNr) {
|
||||
this.text = text;
|
||||
this.markup = null;
|
||||
this.parsed = false;
|
||||
if (text != null) {
|
||||
final Matcher m = getRegex()[0].matcher(text);
|
||||
if (
|
||||
(m.matches()) &&
|
||||
(m.group(1).length() == m.group(3).length()) &&
|
||||
(definitionList.length >= m.group(1).length()) &&
|
||||
(definitionList[m.group(1).length() - 1] != null)
|
||||
) {
|
||||
this.grade = m.group(1).length() - 1;
|
||||
this.content = m.group(2);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() {
|
||||
return this.pattern;
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return this.blockElements;
|
||||
}
|
||||
|
||||
}
|
@ -1,146 +0,0 @@
|
||||
// TableToken.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TableToken extends AbstractToken {
|
||||
|
||||
private static final Pattern[] pattern = new Pattern[] {
|
||||
Pattern.compile(
|
||||
"\\{\\|" + // "{|"
|
||||
"([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-"
|
||||
"\\|\\}") // "|}"
|
||||
};
|
||||
|
||||
private static final String[] blockElementNames = new String[] { "table", "tr", "td" };
|
||||
|
||||
protected void parse() {
|
||||
final String[] t = text.split("\n");
|
||||
String[] tds;
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append("<table");
|
||||
if (t[0].length() > 2) sb.append(parseTableProperties(t[0].substring(2)));
|
||||
sb.append(">\n");
|
||||
boolean trOpen = false;
|
||||
for (int i=1, j, a; i<t.length-1; i++) {
|
||||
if (t[i].startsWith("|-")) {
|
||||
if (trOpen) sb.append("\t</tr>\n");
|
||||
trOpen = (i < t.length - 2);
|
||||
if (trOpen) sb.append("\t<tr>\n");
|
||||
} else if (t[i].startsWith("||")) {
|
||||
tds = t[i].split("\\|\\|");
|
||||
for (j=0; j<tds.length; j++) {
|
||||
if (tds[j].length() > (a = tds[j].indexOf('|')) + 1) { // don't print empty td's
|
||||
sb.append("\t\t<td");
|
||||
if (a > -1) sb.append(parseTableProperties(tds[j].substring(0, a)));
|
||||
sb.append(">").append(tds[j].substring(a + 1)).append("</td>\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (trOpen) sb.append("\t</tr>\n");
|
||||
this.markup = new String(sb.append("</table>"));
|
||||
this.parsed = true;
|
||||
}
|
||||
|
||||
// from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB]
|
||||
private static final String[] tps = { "rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border" };
|
||||
|
||||
private static final HashMap<String, String[]> ps = new HashMap<String, String[]>();
|
||||
|
||||
static {
|
||||
Arrays.sort(tps);
|
||||
String[] array;
|
||||
Arrays.sort(array = new String[] { "void", "above", "below", "hsides", "lhs", "rhs", "vsides", "box", "border" });
|
||||
ps.put("frame", array);
|
||||
Arrays.sort(array = new String[] { "none", "groups", "rows", "cols", "all" });
|
||||
ps.put("rules", array);
|
||||
Arrays.sort(array = new String[] { "top", "middle", "bottom", "baseline" });
|
||||
ps.put("valign", array);
|
||||
Arrays.sort(array = new String[] { "left", "right", "center" });
|
||||
ps.put("align", array);
|
||||
}
|
||||
|
||||
// contributed by [MN]
|
||||
/** This method takes possible table properties and tests if they are valid.
|
||||
* Valid in this case means if they are a property for the table, tr or td
|
||||
* tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
|
||||
* The method is important to avoid XSS attacks on the wiki via table properties.
|
||||
* @param properties A string that may contain several table properties and/or junk.
|
||||
* @return A string that only contains table properties.
|
||||
*/
|
||||
private static StringBuilder parseTableProperties(final String properties) {
|
||||
final String[] values = properties.replaceAll(""", "").split("[= ]"); //splitting the string at = and blanks
|
||||
final StringBuilder sb = new StringBuilder(properties.length());
|
||||
String key, value;
|
||||
String[] posVals;
|
||||
final int numberofvalues = values.length;
|
||||
for (int i=0; i<numberofvalues; i++) {
|
||||
key = values[i].trim();
|
||||
if (key.equals("nowrap")) {
|
||||
addPair("nowrap", "nowrap", sb);
|
||||
} else if (i + 1 < numberofvalues) {
|
||||
value = values[++i].trim();
|
||||
if (
|
||||
(key.equals("summary")) ||
|
||||
(key.equals("bgcolor") && value.matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}")) ||
|
||||
((key.equals("width") || key.equals("height")) && value.matches("\\d+%{0,1}")) ||
|
||||
((posVals = ps.get(key)) != null && Arrays.binarySearch(posVals, value) >= 0) ||
|
||||
(Arrays.binarySearch(tps, key) >= 0 && value.matches("\\d+"))
|
||||
) {
|
||||
addPair(key, value, sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb;
|
||||
}
|
||||
|
||||
private static StringBuilder addPair(final String key, final String value, final StringBuilder sb) {
|
||||
return sb.append(" ").append(key).append("=\"").append(value).append("\"");
|
||||
}
|
||||
|
||||
public Pattern[] getRegex() {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
public String[] getBlockElementNames() {
|
||||
return blockElementNames;
|
||||
}
|
||||
|
||||
public boolean setText(final String text, final int patternNr) {
|
||||
this.text = text;
|
||||
this.parsed = false;
|
||||
this.markup = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
// Token.java
|
||||
// ---------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2007
|
||||
// Created 22.02.2007
|
||||
//
|
||||
// This file is contributed by Franz Brausze
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data.wiki.tokens;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import de.anomic.data.wiki.wikiParserException;
|
||||
|
||||
public interface Token {
|
||||
|
||||
public Pattern[] getRegex();
|
||||
public boolean setText(String text, int patternNr);
|
||||
public String getText();
|
||||
public String getMarkup() throws wikiParserException;
|
||||
public String[] getBlockElementNames();
|
||||
}
|
Loading…
Reference in new issue