You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1209 lines
61 KiB
1209 lines
61 KiB
// WikiCode.java
|
|
// -------------------------------------
|
|
// part of YACY
|
|
//
|
|
// (C) 2005, 2006 by Alexander Schier, Marc Nause, Franz Brausze
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
package net.yacy.data.wiki;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.HashMap;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.regex.Pattern;
|
|
|
|
import net.yacy.document.parser.html.CharacterCoding;
|
|
import net.yacy.server.serverCore;
|
|
|
|
/** Provides methods to handle texts that have been posted in the yacyWiki or other
|
|
* parts of YaCy which use wiki code, like the blog or the profile.
|
|
*
|
|
* @author Alexander Schier [AS], Franz Brausze [FB], Marc Nause [MN]
|
|
*/
|
|
public class WikiCode extends AbstractWikiParser implements WikiParser {
|
|
|
|
private static final String EMPTY = "";
|
|
private static final String PIPE_ESCAPED = "|";
|
|
private static final Pattern REGEX_NOT_CHAR_NUM_OR_UNDERSCORE_PATTERN = Pattern.compile("[^a-zA-Z0-9_]");
|
|
private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
|
|
|
|
private static enum Tags {
|
|
HEADLINE_1("=", "<h1>", "</h1>"),
|
|
HEADLINE_2("==", "<h2>", "</h2>"),
|
|
HEADLINE_3("===", "<h3>", "</h3>"),
|
|
HEADLINE_4("====", "<h4>", "</h4>"),
|
|
HEADLINE_5("=====", "<h5>", "</h5>"),
|
|
HEADLINE_6("======", "<h6>", "</h6>"),
|
|
|
|
EMPHASIZE_1("\'\'", "<i>", "</i>"),
|
|
EMPHASIZE_2("\'\'\'", "<b>", "</b>"),
|
|
EMPHASIZE_3("\'\'\'\'\'", "<b><i>", "</i></b>"),
|
|
|
|
STRIKE("<s>", "</s>", "<span class=\"strike\">", "</span>"),
|
|
UNDERLINE("<u>", "</u>", "<span class=\"underline\">", "</span>");
|
|
|
|
final String openHTML;
|
|
final String closeHTML;
|
|
final String openWiki;
|
|
final String closeWiki;
|
|
|
|
final int openWikiLength;
|
|
final int closeWikiLength;
|
|
|
|
Tags(final String openWiki, final String closeWiki, final String openHTML, final String closeHTML) {
|
|
if (openHTML == null || closeHTML == null || openWiki == null || closeWiki == null) {
|
|
throw new IllegalArgumentException("Parameter may not be null.");
|
|
}
|
|
|
|
this.openHTML = openHTML;
|
|
this.closeHTML = closeHTML;
|
|
this.openWiki = openWiki;
|
|
this.closeWiki = closeWiki;
|
|
|
|
this.openWikiLength = openWiki.length();
|
|
this.closeWikiLength = closeWiki.length();
|
|
}
|
|
|
|
Tags(final String wiki, final String openHTML, final String closeHTML) {
|
|
this(wiki, wiki, openHTML, closeHTML);
|
|
}
|
|
}
|
|
|
|
private static final String HTML_OPEN_DEFINITION_DESCRIPTION = "<dd>";
|
|
private static final String HTML_CLOSE_DEFINITION_DESCRIPTION = "</dd>";
|
|
private static final String HTML_OPEN_DEFINITION_ITEM = "<dt>";
|
|
private static final String HTML_CLOSE_DEFINITION_ITEM = "</dt>";
|
|
private static final String HTML_OPEN_DEFINITION_LIST = "<dl>";
|
|
private static final String HTML_CLOSE_DEFINITION_LIST = "</dl>";
|
|
private static final String HTML_OPEN_UNORDERED_LIST = "<ul>";
|
|
private static final String HTML_CLOSE_UNORDERED_LIST = "</ul>";
|
|
private static final String HTML_CLOSE_BLOCKQUOTE = "</blockquote>";
|
|
private static final String HTML_CLOSE_LIST_ELEMENT = "</li>";
|
|
private static final String HTML_CLOSE_ORDERED_LIST = "</ol>";
|
|
private static final String HTML_OPEN_BLOCKQUOTE = "<blockquote>";
|
|
private static final String HTML_OPEN_LIST_ELEMENT = "<li>";
|
|
private static final String HTML_OPEN_ORDERED_LIST = "<ol>";
|
|
|
|
private static final String WIKI_CLOSE_LINK = "]]";
|
|
private static final String WIKI_OPEN_LINK = "[[";
|
|
/** Wiki template inclusion closing tag */
|
|
private static final String WIKI_CLOSE_METADATA = "}}";
|
|
/** Wiki template inclusion opening tag */
|
|
private static final String WIKI_OPEN_METADATA = "{{";
|
|
private static final String WIKI_CLOSE_EXTERNAL_LINK = "]";
|
|
private static final String WIKI_OPEN_EXTERNAL_LINK = "[";
|
|
private static final String WIKI_CLOSE_PRE_ESCAPED = "</pre>";
|
|
private static final String WIKI_HR_LINE = "----";
|
|
private static final String WIKI_IMAGE = "Image:";
|
|
private static final String WIKI_VIDEO_YOUTUBE = "Youtube:";
|
|
private static final String WIKI_VIDEO_VIMEO = "Vimeo:";
|
|
private static final String WIKI_OPEN_PRE_ESCAPED = "<pre>";
|
|
|
|
private static final char ASTERISK = '*';
|
|
private static final char ONE = '1';
|
|
private static final char TWO = '2';
|
|
private static final char THREE = '3';
|
|
private static final char FOUR = '4';
|
|
private static final char FIVE = '5';
|
|
private static final char SIX = '6';
|
|
private static final char WIKI_FORMATTED = ' ';
|
|
private static final char WIKI_INDENTION = ':';
|
|
/** Wiki template parameter separator */
|
|
private static final char WIKI_METADATA_PARAMETER_SEPARATOR = '|';
|
|
|
|
private static final int LEN_WIKI_CLOSE_PRE_ESCAPED = WIKI_CLOSE_PRE_ESCAPED.length();
|
|
private static final int LEN_WIKI_OPEN_PRE_ESCAPED = WIKI_OPEN_PRE_ESCAPED.length();
|
|
private static final int LEN_WIKI_OPEN_LINK = WIKI_OPEN_LINK.length();
|
|
private static final int LEN_WIKI_CLOSE_LINK = WIKI_CLOSE_LINK.length();
|
|
private static final int LEN_WIKI_IMAGE = WIKI_IMAGE.length();
|
|
private static final int LEN_WIKI_VIDEO_YOUTUBE = WIKI_VIDEO_YOUTUBE.length();
|
|
private static final int LEN_WIKI_VIDEO_VIMEO = WIKI_VIDEO_VIMEO.length();
|
|
private static final int LEN_WIKI_OPEN_EXTERNAL_LINK = WIKI_OPEN_EXTERNAL_LINK.length();
|
|
private static final int LEN_WIKI_CLOSE_EXTERNAL_LINK = WIKI_CLOSE_EXTERNAL_LINK.length();
|
|
private static final int LEN_WIKI_HR_LINE = WIKI_HR_LINE.length();
|
|
private static final int LEN_PIPE_ESCAPED = PIPE_ESCAPED.length();
|
|
private static final int LEN_WIKI_OPEN_METADATA = WIKI_OPEN_METADATA.length();
|
|
private static final int LEN_WIKI_CLOSE_METADATA = WIKI_CLOSE_METADATA.length();
|
|
|
|
/** List of properties which can be used in tables. */
|
|
private final static String[] TABLE_PROPERTIES = {"rowspan", "colspan", "vspace", "hspace", "cellspacing", "cellpadding", "border"};
|
|
|
|
/** Map which contains possible values for several parameters. */
|
|
private final static Map<String, String[]> PROPERTY_VALUES = new HashMap<String, String[]>();
|
|
|
|
/** Tags for different types of headlines in wikiCode. */
|
|
private final static String[] HEADLINE_TAGS =
|
|
new String[]{Tags.HEADLINE_6.openWiki,
|
|
Tags.HEADLINE_5.openWiki,
|
|
Tags.HEADLINE_4.openWiki,
|
|
Tags.HEADLINE_3.openWiki,
|
|
Tags.HEADLINE_2.openWiki,
|
|
Tags.HEADLINE_1.openWiki};
|
|
|
|
private final static char[] HEADLINE_LEVEL = new char[]{ONE, TWO, THREE, FOUR, FIVE, SIX};
|
|
|
|
static {
|
|
/* Arrays must be sorted since Arrays.searchBinary() is used later. For more info go to
|
|
* http://java.sun.com/javase/6/docs/api/java/util/Arrays.html#binarySearch(T[], T, java.util.Comparator)
|
|
*/
|
|
Arrays.sort(HEADLINE_LEVEL);
|
|
Arrays.sort(HEADLINE_TAGS);
|
|
Arrays.sort(TABLE_PROPERTIES);
|
|
String[] array;
|
|
Arrays.sort(array = new String[]{"void", "above", "below", "hsides", "lhs", "rhs", "vsides", "box", "border"});
|
|
PROPERTY_VALUES.put("frame", array);
|
|
Arrays.sort(array = new String[]{"none", "groups", "rows", "cols", "all"});
|
|
PROPERTY_VALUES.put("rules", array);
|
|
Arrays.sort(array = new String[]{"top", "middle", "bottom", "baseline"});
|
|
PROPERTY_VALUES.put("valign", array);
|
|
Arrays.sort(array = new String[]{"left", "right", "center"});
|
|
PROPERTY_VALUES.put("align", array);
|
|
}
|
|
|
|
private final String tableStart = "{" + PIPE_ESCAPED; // {|
|
|
private final String newLine = PIPE_ESCAPED + "-"; // |-
|
|
private final String cellDivider = PIPE_ESCAPED + PIPE_ESCAPED; // ||
|
|
private final String tableEnd = PIPE_ESCAPED + "}"; // |}
|
|
private final String attribDivider = PIPE_ESCAPED; // |
|
|
private final int lenTableStart = this.tableStart.length();
|
|
private final int lenCellDivider = this.cellDivider.length();
|
|
private final int lenTableEnd = this.tableEnd.length();
|
|
private final int lenAttribDivider = this.attribDivider.length();
|
|
|
|
private enum ListType {
|
|
ORDERED, UNORDERED;
|
|
}
|
|
|
|
private String orderedListLevel = EMPTY;
|
|
private String unorderedListLevel = EMPTY;
|
|
private String defListLevel = EMPTY;
|
|
private boolean processingCell = false; //needed for prevention of double-execution of replaceHTML
|
|
private boolean processingDefList = false; //needed for definition lists
|
|
private final boolean escape = false; //needed for escape
|
|
private final boolean escaped = false; //needed for <pre> not getting in the way
|
|
private boolean newRowStart = false; //needed for the first row not to be empty
|
|
private boolean noList = false; //needed for handling of [= and <pre> in lists
|
|
private boolean processingPreformattedText = false; //needed for preformatted text
|
|
private boolean preformattedSpanning = false; //needed for <pre> and </pre> spanning over several lines
|
|
private boolean replacedHtmlAlready = false; //indicates if method replaceHTML has been used with line already
|
|
private boolean processingTable = false; //needed for tables, because they reach over several lines
|
|
private int preindented = 0; //needed for indented <pre>s
|
|
|
|
private final TableOfContent tableOfContents = new TableOfContent();
|
|
|
|
/**
|
|
* Transforms a text which contains wiki code to HTML fragment.
|
|
* @param hostport
|
|
* @param reader contains the text to be transformed.
|
|
* @param length expected length of text, used to create buffer with right size.
|
|
* @return HTML fragment.
|
|
* @throws IOException in case input from reader can not be read.
|
|
*/
|
|
@Override
|
|
protected String transform(final String hostport, final BufferedReader reader, final int length)
|
|
throws IOException {
|
|
final StringBuilder out = new StringBuilder(length);
|
|
String line;
|
|
while ((line = reader.readLine()) != null) {
|
|
out.append(processLineOfWikiCode(hostport, line)).append(serverCore.CRLF_STRING);
|
|
}
|
|
out.insert(0, createTableOfContents());
|
|
this.tableOfContents.clear();
|
|
return out.toString();
|
|
}
|
|
|
|
private void processHeadline(
|
|
final StringBuilder input,
|
|
final int firstPosition,
|
|
final Tags tags,
|
|
final int secondPosition,
|
|
String direlem)
|
|
{
|
|
//add anchor and create headline
|
|
if ((direlem = input.substring(firstPosition + tags.openWikiLength, secondPosition)) != null) {
|
|
//counting double headlines
|
|
int doubles = 0;
|
|
final Iterator<String> iterator = this.tableOfContents.iterator();
|
|
String element;
|
|
while (iterator.hasNext()) {
|
|
element = iterator.next();
|
|
// no element with null value should ever be in directory
|
|
assert (element != null);
|
|
if (element.substring(1).equals(direlem)) {
|
|
doubles++;
|
|
}
|
|
}
|
|
String anchor = REGEX_NOT_CHAR_NUM_OR_UNDERSCORE_PATTERN.matcher(SPACE_PATTERN.matcher(direlem).replaceAll("_")).replaceAll(EMPTY); //replace blanks with underscores and delete everything thats not a regular character, a number or _
|
|
//if there are doubles, add underscore and number of doubles plus one
|
|
if (doubles > 0) {
|
|
anchor = anchor + "_" + (doubles + 1);
|
|
}
|
|
final StringBuilder link = new StringBuilder();
|
|
link.append("<a name=\"");
|
|
link.append(anchor);
|
|
link.append("\"></a>");
|
|
link.append(tags.openHTML);
|
|
link.append(direlem);
|
|
link.append(tags.closeHTML);
|
|
|
|
input.replace(firstPosition, secondPosition + tags.closeWikiLength, link.toString());
|
|
|
|
//add headlines to list of headlines (so TOC can be created)
|
|
if (Arrays.binarySearch(HEADLINE_TAGS, tags.openWiki) >= 0) {
|
|
this.tableOfContents.add((tags.openWikiLength - 1) + direlem);
|
|
}
|
|
}
|
|
}
|
|
|
|
// contributed by [FB], changes by [MN]
|
|
/**
|
|
* Processes tags which are connected to tables.
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processTable(final String line) {
|
|
|
|
final StringBuilder out = new StringBuilder();
|
|
|
|
if (line.startsWith(this.tableStart) && !this.processingTable) {
|
|
this.processingTable = true;
|
|
this.newRowStart = true;
|
|
out.append("<table");
|
|
if (line.trim().length() > this.lenTableStart) {
|
|
out.append(filterTableProperties(line.substring(this.lenTableStart).trim()));
|
|
}
|
|
out.append(">");
|
|
} else if (line.startsWith(this.newLine) && this.processingTable) { // new row
|
|
if (!this.newRowStart) {
|
|
out.append("\t</tr>\n");
|
|
} else {
|
|
this.newRowStart = false;
|
|
}
|
|
out.append("\t<tr>");
|
|
} else if (line.startsWith(this.cellDivider) && this.processingTable) {
|
|
out.append("\t\t<td");
|
|
final int cellEnd = (line.indexOf(this.cellDivider, this.lenCellDivider) > 0) ? (line.indexOf(this.cellDivider, this.lenCellDivider)) : (line.length());
|
|
int propEnd = line.indexOf(this.attribDivider, this.lenCellDivider);
|
|
final int occImage = line.indexOf("[[Image:", this.lenCellDivider);
|
|
final int occEscape = line.indexOf("[=", this.lenCellDivider);
|
|
//If resultOf("[[Image:") is less than propEnd, that means that there is no
|
|
//property for this cell, only an image. Without this, YaCy could get confused
|
|
//by a | in [[Image:picture.png|alt-text]] or [[Image:picture.png|alt-text]]
|
|
//Same for [= (part of [= =])
|
|
if ((propEnd > this.lenCellDivider) && ((occImage > propEnd) || (occImage < 0)) && ((occEscape > propEnd) || (occEscape < 0))) {
|
|
propEnd = line.indexOf(this.attribDivider, this.lenCellDivider) + this.lenAttribDivider;
|
|
} else {
|
|
propEnd = cellEnd;
|
|
}
|
|
// both point at same place => new line
|
|
if (propEnd == cellEnd) {
|
|
propEnd = this.lenCellDivider;
|
|
} else {
|
|
out.append(filterTableProperties(line.substring(this.lenCellDivider, propEnd - this.lenAttribDivider).trim()));
|
|
}
|
|
// quick&dirty fix [MN]
|
|
if (propEnd > cellEnd) {
|
|
propEnd = this.lenCellDivider;
|
|
}
|
|
this.processingTable = false;
|
|
this.processingCell = true;
|
|
out.append(">");
|
|
out.append(processTable(line.substring(propEnd, cellEnd).trim()));
|
|
out.append("</td>");
|
|
this.processingTable = true;
|
|
this.processingCell = false;
|
|
if (cellEnd < line.length()) {
|
|
out.append("\n");
|
|
out.append(processTable(line.substring(cellEnd)));
|
|
}
|
|
} else if (line.startsWith(this.tableEnd) && (this.processingTable)) { // Table end
|
|
this.processingTable = false;
|
|
out.append("\t</tr>\n</table>");
|
|
out.append(line.substring(this.lenTableEnd));
|
|
} else {
|
|
out.append(line);
|
|
}
|
|
return out.toString();
|
|
}
|
|
|
|
// contributed by [MN], changes by [FB]
|
|
/** Takes possible table properties and tests if they are valid.
|
|
* Valid in this case means if they are a property for the table, tr or td
|
|
* tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
|
|
* The method is important to avoid XSS attacks on the wiki via table properties.
|
|
* @param properties String which may contain several table properties and/or junk.
|
|
* @return String containing only table properties.
|
|
*/
|
|
private static StringBuilder filterTableProperties(final String properties) {
|
|
final String[] values = properties.replaceAll(""", EMPTY).split("[= ]"); //splitting the string at = and blanks
|
|
final StringBuilder stringBuilder = new StringBuilder(properties.length());
|
|
String key, value;
|
|
String[] posVals;
|
|
final int numberOfValues = values.length;
|
|
for (int i = 0; i < numberOfValues; i++) {
|
|
key = values[i].trim();
|
|
if ("nowrap".equals(key)) {
|
|
appendKeyValuePair("nowrap", "nowrap", stringBuilder);
|
|
} else if (i + 1 < numberOfValues) {
|
|
value = values[++i].trim();
|
|
if (("summary".equals(key))
|
|
|| ("bgcolor".equals(key) && value.matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}"))
|
|
|| (("width".equals(key) || "height".equals(key)) && value.matches("\\d+%{0,1}"))
|
|
|| ((posVals = PROPERTY_VALUES.get(key)) != null && Arrays.binarySearch(posVals, value) >= 0)
|
|
|| (Arrays.binarySearch(TABLE_PROPERTIES, key) >= 0 && value.matches("\\d+"))) {
|
|
appendKeyValuePair(key, value, stringBuilder);
|
|
}
|
|
}
|
|
}
|
|
return stringBuilder;
|
|
}
|
|
|
|
/**
|
|
* Appends a key/value pair in HTML syntax to a given StringBuilder.
|
|
* @param key key to be appended.
|
|
* @param value value of key.
|
|
* @param stringBuilder this is what key/value are appended to.
|
|
* @return
|
|
*/
|
|
private static StringBuilder appendKeyValuePair(final String key, final String value, final StringBuilder stringBuilder) {
|
|
return stringBuilder.append(" ").append(key).append("=\"").append(value).append("\"");
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected to ordered lists.
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processOrderedList(final String line) {
|
|
return processList(line, ListType.ORDERED);
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected to unordered lists.
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processUnorderedList(final String line) {
|
|
return processList(line, ListType.UNORDERED);
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected to ordered or unordered lists.
|
|
* @author contains code by [AS]
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @param listType type of tags to be processed
|
|
* @return HTML fragment
|
|
*/
|
|
private String processList(final String line, final ListType listType) {
|
|
|
|
final String ret;
|
|
|
|
if (!this.noList) { //lists only get processed if not forbidden (see code for [= and <pre>).
|
|
|
|
String listLevel;
|
|
final String htmlOpenList;
|
|
final String htmlCloseList;
|
|
final char symbol;
|
|
|
|
if (ListType.ORDERED.equals(listType)) {
|
|
listLevel = this.orderedListLevel;
|
|
symbol = '#';
|
|
htmlOpenList = HTML_OPEN_ORDERED_LIST;
|
|
htmlCloseList = HTML_CLOSE_ORDERED_LIST;
|
|
} else if (ListType.UNORDERED.equals(listType)) {
|
|
listLevel = this.unorderedListLevel;
|
|
symbol = ASTERISK;
|
|
htmlOpenList = HTML_OPEN_UNORDERED_LIST;
|
|
htmlCloseList = HTML_CLOSE_UNORDERED_LIST;
|
|
} else {
|
|
throw new IllegalArgumentException("Unknown list type " + listType);
|
|
}
|
|
|
|
if (line.startsWith(listLevel + symbol)) { //more #
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(htmlOpenList);
|
|
stringBuilder.append(serverCore.CRLF_STRING);
|
|
stringBuilder.append(HTML_OPEN_LIST_ELEMENT);
|
|
stringBuilder.append(line.substring(listLevel.length() + 1).trim());
|
|
stringBuilder.append(HTML_CLOSE_LIST_ELEMENT);
|
|
ret = stringBuilder.toString();
|
|
listLevel += symbol;
|
|
} else if (!listLevel.isEmpty() && line.startsWith(listLevel)) { //equal number of #
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(HTML_OPEN_LIST_ELEMENT);
|
|
stringBuilder.append(line.substring(listLevel.length()).trim());
|
|
stringBuilder.append(HTML_CLOSE_LIST_ELEMENT);
|
|
ret = stringBuilder.toString();
|
|
} else if (!listLevel.isEmpty()) { //less #
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
final StringBuilder tmp = new StringBuilder();
|
|
|
|
int i = listLevel.length();
|
|
while (!line.startsWith(listLevel.substring(0, i))) {
|
|
tmp.append(htmlCloseList);
|
|
i--;
|
|
}
|
|
listLevel = listLevel.substring(0, i);
|
|
|
|
final int startOfContent = listLevel.length();
|
|
|
|
if (startOfContent > 0) {
|
|
stringBuilder.append(tmp);
|
|
stringBuilder.append(HTML_OPEN_LIST_ELEMENT);
|
|
stringBuilder.append(line.substring(startOfContent).trim());
|
|
stringBuilder.append(HTML_CLOSE_LIST_ELEMENT);
|
|
} else {
|
|
stringBuilder.append(tmp);
|
|
stringBuilder.append(line.substring(startOfContent).trim());
|
|
}
|
|
ret = stringBuilder.toString();
|
|
} else {
|
|
ret = line;
|
|
}
|
|
|
|
if (ListType.ORDERED.equals(listType)) {
|
|
this.orderedListLevel = listLevel;
|
|
} else if (ListType.UNORDERED.equals(listType)) {
|
|
this.unorderedListLevel = listLevel;
|
|
}
|
|
} else {
|
|
ret = line;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected to definition lists.
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processDefinitionList(final String line) {
|
|
final String ret;
|
|
|
|
if (!this.noList) { //lists only get processed if not forbidden (see code for [= and <pre>). [MN]
|
|
|
|
if (line.startsWith(this.defListLevel + ";")) { //more semicolons
|
|
final String copyOfLine = line.substring(this.defListLevel.length() + 1);
|
|
final int positionOfOpeningTag;
|
|
if ((positionOfOpeningTag = copyOfLine.indexOf(':',0)) > 0) {
|
|
final String definitionItem = copyOfLine.substring(0, positionOfOpeningTag);
|
|
final String definitionDescription = copyOfLine.substring(positionOfOpeningTag + 1);
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_LIST);
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_ITEM);
|
|
stringBuilder.append(definitionItem);
|
|
stringBuilder.append(HTML_CLOSE_DEFINITION_ITEM);
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_DESCRIPTION);
|
|
stringBuilder.append(definitionDescription);
|
|
this.processingDefList = true;
|
|
ret = stringBuilder.toString();
|
|
} else {
|
|
ret = line;
|
|
}
|
|
this.defListLevel += ";";
|
|
} else if (!this.defListLevel.isEmpty() && line.startsWith(this.defListLevel)) { //equal number of semicolons
|
|
final String copyOfLine = line.substring(this.defListLevel.length());
|
|
final int positionOfOpeningTag;
|
|
if ((positionOfOpeningTag = copyOfLine.indexOf(':',0)) > 0) {
|
|
final String definitionItem = copyOfLine.substring(0, positionOfOpeningTag);
|
|
final String definitionDescription = copyOfLine.substring(positionOfOpeningTag + 1);
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_ITEM);
|
|
stringBuilder.append(definitionItem);
|
|
stringBuilder.append(HTML_CLOSE_DEFINITION_ITEM);
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_DESCRIPTION);
|
|
stringBuilder.append(definitionDescription);
|
|
this.processingDefList = true;
|
|
ret = stringBuilder.toString();
|
|
} else {
|
|
ret = line;
|
|
}
|
|
} else if (!this.defListLevel.isEmpty()) { //less semicolons
|
|
int i = this.defListLevel.length();
|
|
String tmp = EMPTY;
|
|
while (!line.startsWith(this.defListLevel.substring(0, i))) {
|
|
tmp = HTML_CLOSE_DEFINITION_DESCRIPTION + HTML_CLOSE_DEFINITION_LIST;
|
|
i--;
|
|
}
|
|
this.defListLevel = this.defListLevel.substring(0, i);
|
|
int positionOfOpeningTag = this.defListLevel.length();
|
|
if (!this.defListLevel.isEmpty()) {
|
|
final String copyOfLine = line.substring(positionOfOpeningTag);
|
|
if ((positionOfOpeningTag = copyOfLine.indexOf(':',0)) > 0) {
|
|
final String definitionItem = copyOfLine.substring(0, positionOfOpeningTag);
|
|
final String definitionDescription = copyOfLine.substring(positionOfOpeningTag + 1);
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(tmp);
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_ITEM);
|
|
stringBuilder.append(definitionItem);
|
|
stringBuilder.append(HTML_CLOSE_DEFINITION_ITEM);
|
|
stringBuilder.append(HTML_OPEN_DEFINITION_DESCRIPTION);
|
|
stringBuilder.append(definitionDescription);
|
|
this.processingDefList = true;
|
|
ret = stringBuilder.toString();
|
|
} else {
|
|
ret = line;
|
|
}
|
|
} else {
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.append(tmp);
|
|
stringBuilder.append(line.substring(positionOfOpeningTag));
|
|
ret = stringBuilder.toString();
|
|
}
|
|
} else {
|
|
ret = line;
|
|
}
|
|
} else {
|
|
ret = line;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected to links and images.
|
|
* @author [AS], [MN]
|
|
* @param hostport (optional) host and port, added when not empty as the base of relative Wiki link URLs.
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private static String processLinksAndImages(final String hostport, String line) {
|
|
|
|
// create links
|
|
String kl, kv, alt, align;
|
|
int p;
|
|
int positionOfOpeningTag;
|
|
int positionOfClosingTag;
|
|
int fromIndex = 0;
|
|
// internal links and images
|
|
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_LINK, fromIndex)) >= 0) {
|
|
positionOfClosingTag = line.indexOf(WIKI_CLOSE_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_LINK);
|
|
if (positionOfClosingTag <= positionOfOpeningTag) {
|
|
break;
|
|
}
|
|
kl = line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_LINK, positionOfClosingTag);
|
|
|
|
// this is the part of the code that's responsible for images
|
|
if (kl.startsWith(WIKI_IMAGE)) {
|
|
alt = EMPTY;
|
|
align = EMPTY;
|
|
kv = EMPTY;
|
|
kl = kl.substring(LEN_WIKI_IMAGE);
|
|
|
|
// are there any arguments for the image?
|
|
if ((p = kl.indexOf(PIPE_ESCAPED)) > 0) {
|
|
kv = kl.substring(p + LEN_WIKI_IMAGE);
|
|
kl = kl.substring(0, p);
|
|
// if there are 2 arguments, write them into ALIGN and ALT
|
|
if ((p = kv.indexOf(PIPE_ESCAPED)) > 0) {
|
|
align = kv.substring(0, p);
|
|
//checking validity of value for align. Only non browser specific
|
|
//values get supported. Not supported: absmiddle, baseline, texttop
|
|
if (("bottom".equals(align))
|
|
|| ("center".equals(align))
|
|
|| ("left".equals(align))
|
|
|| ("middle".equals(align))
|
|
|| ("right".equals(align))
|
|
|| ("top".equals(align))) {
|
|
align = " align=\"" + align + "\"";
|
|
} else {
|
|
align = EMPTY;
|
|
}
|
|
alt = " alt=\"" + kv.substring(p + LEN_WIKI_IMAGE) + "\"";
|
|
} // if there is just one, put it into ALT
|
|
else {
|
|
alt = " alt=\"" + kv + "\"";
|
|
}
|
|
}
|
|
|
|
// eventually replace incomplete URLs and make them point to http://peerip:port/...
|
|
// with this feature you can access an image in DATA/HTDOCS/share/yacy.gif
|
|
// using the wikicode [[Image:share/yacy.gif]]
|
|
// or an image DATA/HTDOCS/grafics/kaskelix.jpg with [[Image:grafics/kaskelix.jpg]]
|
|
// you are free to use other sub-paths of DATA/HTDOCS
|
|
if (kl.indexOf("://",0) < 1 && hostport != null && !hostport.isEmpty()) {
|
|
kl = "http://" + hostport + "/" + kl;
|
|
}
|
|
|
|
line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK);
|
|
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
|
|
}
|
|
// this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter
|
|
else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) {
|
|
kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE);
|
|
line = line.substring(0, positionOfOpeningTag) + "" + "<object width=\"425\" height=\"350\"><param name=\"movie\" value=\"http://www.youtube.com/v/" + kl + "\"></param><param name=\"wmode\" value=\"transparent\"></param><embed src=\"http://www.youtube.com/v/" + kl + "\" type=\"application/x-shockwave-flash\" wmode=\"transparent\" width=\"425\" height=\"350\"></embed></object>";
|
|
break;
|
|
}
|
|
// this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter
|
|
else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
|
|
kl = kl.substring(LEN_WIKI_VIDEO_VIMEO);
|
|
line = line.substring(0, positionOfOpeningTag) + "" + "<iframe src=\"http://player.vimeo.com/video/" + kl + "\" width=\"425\" height=\"350\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>";
|
|
break;
|
|
}
|
|
// if it's no image, it might be an internal link
|
|
else {
|
|
if ((p = kl.indexOf(PIPE_ESCAPED)) > 0) {
|
|
kv = kl.substring(p + LEN_PIPE_ESCAPED);
|
|
kl = kl.substring(0, p);
|
|
} else {
|
|
kv = kl;
|
|
}
|
|
line = line.substring(0, positionOfOpeningTag) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK); // oob exception in append() !
|
|
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
|
|
}
|
|
}
|
|
|
|
fromIndex = 0;
|
|
// external links
|
|
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_EXTERNAL_LINK, fromIndex)) >= 0) {
|
|
positionOfClosingTag = line.indexOf(WIKI_CLOSE_EXTERNAL_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_EXTERNAL_LINK);
|
|
if (positionOfClosingTag <= positionOfOpeningTag) {
|
|
break;
|
|
}
|
|
kl = line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_EXTERNAL_LINK, positionOfClosingTag);
|
|
if ((p = kl.indexOf(' ',0)) > 0) {
|
|
kv = kl.substring(p + 1);
|
|
kl = kl.substring(0, p);
|
|
} // No text for the link? -> <a href="http://www.url.com/">http://www.url.com/</a>
|
|
else {
|
|
kv = kl;
|
|
}
|
|
// eventually replace incomplete URLs and make them point to http://peerip:port/...
|
|
// with this feature you can access a file at DATA/HTDOCS/share/page.html
|
|
// using the wikicode [share/page.html]
|
|
// or a file DATA/HTDOCS/www/page.html with [www/page.html]
|
|
// you are free to use other sub-paths of DATA/HTDOCS
|
|
if (kl.indexOf("://",0) < 1 && hostport != null && !hostport.isEmpty()) {
|
|
kl = "http://" + hostport + "/" + kl;
|
|
}
|
|
line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK);
|
|
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK;
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/**
|
|
* Processes tags which are connected preformatted text (<pre> </pre>).
|
|
* @param hostport
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processPreformattedText(final String hostport, String line) {
|
|
if (!this.escaped) {
|
|
final int positionOfOpeningTag = line.indexOf(WIKI_OPEN_PRE_ESCAPED);
|
|
final int positionOfClosingTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED);
|
|
//both <pre> and </pre> in the same line
|
|
if (positionOfOpeningTag >= 0 && positionOfClosingTag > 0) {
|
|
if (positionOfOpeningTag < positionOfClosingTag) {
|
|
final StringBuilder preformattedText = new StringBuilder();
|
|
preformattedText.append("<pre style=\"border:dotted;border-width:thin;\">");
|
|
preformattedText.append(line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_PRE_ESCAPED, positionOfClosingTag));
|
|
preformattedText.append("</pre>");
|
|
|
|
line = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
|
|
line = line.replace("!pre!txt!", preformattedText.toString().replaceAll("!pre!", "!pre!!"));
|
|
line = line.replaceAll("!pre!!", "!pre!");
|
|
} //handles cases like <pre><pre> </pre></pre> <pre> </pre> that would cause an exception otherwise
|
|
else {
|
|
this.processingPreformattedText = true;
|
|
final String temp1 = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag - 1).replaceAll("!tmp!", "!tmp!!") + "!tmp!txt!");
|
|
this.noList = true;
|
|
final String temp2 = processLineOfWikiCode(hostport, line.substring(positionOfOpeningTag));
|
|
this.noList = false;
|
|
line = temp1.replaceAll("!tmp!txt!", temp2);
|
|
line = line.replaceAll("!tmp!!", "!tmp!");
|
|
this.processingPreformattedText = false;
|
|
}
|
|
} //start <pre>
|
|
else if (positionOfOpeningTag >= 0 && !this.preformattedSpanning) {
|
|
this.processingPreformattedText = true; //prevent surplus line breaks
|
|
final StringBuilder openBlockQuoteTags = new StringBuilder(); //gets filled with <blockquote>s as needed
|
|
String preformattedText = "<pre style=\"border:dotted;border-width:thin;\">" + line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_PRE_ESCAPED);
|
|
preformattedText = preformattedText.replaceAll("!pre!", "!pre!!");
|
|
//taking care of indented lines
|
|
while (this.preindented < positionOfOpeningTag && positionOfOpeningTag < line.length() &&
|
|
line.substring(this.preindented, positionOfOpeningTag).charAt(0) == WIKI_INDENTION) {
|
|
this.preindented++;
|
|
openBlockQuoteTags.append(HTML_OPEN_BLOCKQUOTE);
|
|
}
|
|
line = processLineOfWikiCode(hostport, line.substring(this.preindented, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!");
|
|
line = openBlockQuoteTags + line.replace("!pre!txt!", preformattedText);
|
|
line = line.replaceAll("!pre!!", "!pre!");
|
|
this.preformattedSpanning = true;
|
|
} //end </pre>
|
|
else if (positionOfClosingTag >= 0 && this.preformattedSpanning) {
|
|
this.preformattedSpanning = false;
|
|
final StringBuilder endBlockQuoteTags = new StringBuilder(); //gets filled with </blockquote>s as needed
|
|
String preformattedText = line.substring(0, positionOfClosingTag) + "</pre>";
|
|
preformattedText = preformattedText.replaceAll("!pre!", "!pre!!");
|
|
//taking care of indented lines
|
|
while (this.preindented > 0) {
|
|
endBlockQuoteTags.append(HTML_CLOSE_BLOCKQUOTE);
|
|
this.preindented--;
|
|
}
|
|
line = processLineOfWikiCode(hostport, "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
|
|
line = line.replace("!pre!txt!", preformattedText) + endBlockQuoteTags;
|
|
line = line.replaceAll("!pre!!", "!pre!");
|
|
this.processingPreformattedText = false;
|
|
} //Getting rid of surplus </pre>
|
|
else if (positionOfOpeningTag >= 0 && !this.preformattedSpanning) {
|
|
int posTag;
|
|
while ((posTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED)) >= 0) {
|
|
line = line.substring(0, posTag) + line.substring(posTag + LEN_WIKI_CLOSE_PRE_ESCAPED);
|
|
}
|
|
line = processLineOfWikiCode(hostport, line);
|
|
}
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/** Creates table of contents for a wiki page.
|
|
* @return HTML fragment
|
|
*/
|
|
private StringBuilder createTableOfContents() {
|
|
final StringBuilder directory = new StringBuilder();
|
|
String element;
|
|
int s = 0;
|
|
int level = 1;
|
|
int level1 = 0;
|
|
int level2 = 0;
|
|
int level3 = 0;
|
|
int level4 = 0;
|
|
int level5 = 0;
|
|
int level6 = 0;
|
|
int doubles = 0;
|
|
String anchorext = EMPTY;
|
|
if ((s = this.tableOfContents.size()) > 2) {
|
|
directory.append("<table><tr><td><div class=\"WikiTOCBox\">\n");
|
|
for (int i = 0; i < s; i++) {
|
|
if (i >= this.tableOfContents.size()) {
|
|
break;
|
|
}
|
|
element = this.tableOfContents.get(i);
|
|
if (element == null) {
|
|
continue;
|
|
}
|
|
//counting double headlines
|
|
doubles = 0;
|
|
for (int j = 0; j < i; j++) {
|
|
if (j >= this.tableOfContents.size()) {
|
|
break;
|
|
}
|
|
final String d = this.tableOfContents.get(j);
|
|
if (d == null || d.isEmpty()) {
|
|
continue;
|
|
}
|
|
final String a = REGEX_NOT_CHAR_NUM_OR_UNDERSCORE_PATTERN.matcher(SPACE_PATTERN.matcher(d.substring(1)).replaceAll("_")).replaceAll(EMPTY);
|
|
final String b = REGEX_NOT_CHAR_NUM_OR_UNDERSCORE_PATTERN.matcher(SPACE_PATTERN.matcher(element.substring(1)).replaceAll("_")).replaceAll(EMPTY);
|
|
if (a.equals(b)) {
|
|
doubles++;
|
|
}
|
|
}
|
|
//if there are doubles, create anchor extension
|
|
if (doubles > 0) {
|
|
anchorext = "_" + (doubles + 1);
|
|
}
|
|
|
|
final char l = element.charAt(0);
|
|
String temp = "";
|
|
if (Arrays.binarySearch(HEADLINE_LEVEL, l) >= 0 && !element.isEmpty()) {
|
|
|
|
switch (l) {
|
|
|
|
case SIX: {
|
|
if (level < 6) {
|
|
level = 6;
|
|
level6 = 0;
|
|
}
|
|
level6++;
|
|
temp = element.substring(1);
|
|
element = level1 + "." + level2 + "." + level3 + "." + level4 + "." + level5 + "." + level6 + " " + temp;
|
|
directory.append(" <a href=\"#");
|
|
break;
|
|
}
|
|
case FIVE: {
|
|
if (level == 1) {
|
|
level2 = 0;
|
|
level = 2;
|
|
}
|
|
if (level == 3) {
|
|
level = 2;
|
|
}
|
|
level5++;
|
|
temp = element.substring(1);
|
|
element = level1 + "." + level2 + "." + level3 + "." + level4 + "." + level5 + " " + temp;
|
|
directory.append(" <a href=\"#");
|
|
break;
|
|
}
|
|
case FOUR: {
|
|
if (level == 1) {
|
|
level2 = 0;
|
|
level = 2;
|
|
}
|
|
if (level == 3) {
|
|
level = 2;
|
|
}
|
|
level4++;
|
|
temp = element.substring(1);
|
|
element = level1 + "." + level2 + "." + level3 + "." + level4 + " " + temp;
|
|
directory.append(" <a href=\"#");
|
|
break;
|
|
}
|
|
case THREE: {
|
|
if (level == 1) {
|
|
level2 = 0;
|
|
level = 2;
|
|
}
|
|
if (level == 3) {
|
|
level = 2;
|
|
}
|
|
level3++;
|
|
temp = element.substring(1);
|
|
element = level1 + "." + level2 + "." + level3 + " " + temp;
|
|
directory.append(" <a href=\"#");
|
|
break;
|
|
}
|
|
case TWO: {
|
|
if (level == 1) {
|
|
level2 = 0;
|
|
level = 2;
|
|
}
|
|
if (level == 3) {
|
|
level = 2;
|
|
}
|
|
level2++;
|
|
temp = element.substring(1);
|
|
element = level1 + "." + level2 + " " + temp;
|
|
directory.append(" <a href=\"#");
|
|
break;
|
|
}
|
|
case ONE: {
|
|
if (level > 1) {
|
|
level = 1;
|
|
level2 = 0;
|
|
level3 = 0;
|
|
level4 = 0;
|
|
level5 = 0;
|
|
level6 = 0;
|
|
}
|
|
level1++;
|
|
temp = element.substring(1);
|
|
element = level1 + ". " + temp;
|
|
directory.append("<a href=\"#");
|
|
break;
|
|
}
|
|
default: {
|
|
throw new IllegalArgumentException("illegal headline level: " + l);
|
|
}
|
|
}
|
|
directory.append(REGEX_NOT_CHAR_NUM_OR_UNDERSCORE_PATTERN.matcher(SPACE_PATTERN.matcher(temp).replaceAll("_")).replaceAll(EMPTY));
|
|
directory.append(anchorext);
|
|
directory.append("\" class=\"WikiTOC\">");
|
|
directory.append(element);
|
|
directory.append("</a><br />\n");
|
|
}
|
|
anchorext = EMPTY;
|
|
}
|
|
directory.append("</div></td></tr></table>\n");
|
|
}
|
|
|
|
return directory;
|
|
}
|
|
|
|
/**
|
|
* Replaces the wiki representation of tags with the HTML representation.
|
|
* @param input String which potentially contains tags to be replaced.
|
|
* @param tags tags to be replaced.
|
|
* @return String with replaced tags.
|
|
*/
|
|
private String tagReplace(final String input, final Tags tags) {
|
|
final String direlem = null; //string to keep headlines until they get added to List dirElements
|
|
|
|
final StringBuilder stringBuilder = new StringBuilder(input);
|
|
|
|
int firstPosition = 0;
|
|
int secondPosition = 0;
|
|
//replace pattern if a pair of the pattern can be found in the line
|
|
while (((firstPosition = stringBuilder.indexOf(tags.openWiki, secondPosition)) >= 0) &&
|
|
((secondPosition = stringBuilder.indexOf(tags.closeWiki, firstPosition + tags.openWikiLength)) >= 0)) {
|
|
|
|
//extra treatment for headlines
|
|
if (Arrays.binarySearch(HEADLINE_TAGS, tags.openWiki) >= 0) {
|
|
// require line starts with headline markup (hdr e.g. " == Title == " but not "Seven = six plus one" )
|
|
int i = 0;
|
|
boolean beginsWith = true;
|
|
while (i < firstPosition) {
|
|
if (stringBuilder.charAt(i) > ' ') {
|
|
beginsWith = false;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
if (beginsWith) processHeadline(stringBuilder, firstPosition, tags, secondPosition, direlem);
|
|
} else {
|
|
final int oldLength = stringBuilder.length();
|
|
stringBuilder.replace(firstPosition, firstPosition + tags.openWikiLength, tags.openHTML);
|
|
secondPosition += stringBuilder.length() - oldLength;
|
|
stringBuilder.replace(secondPosition, secondPosition + tags.closeWikiLength, tags.closeHTML);
|
|
}
|
|
}
|
|
|
|
return stringBuilder.toString();
|
|
}
|
|
|
|
/** Replaces wiki tags with HTML tags in one line of text.
|
|
* @param hostport
|
|
* @param line line of text to be transformed from wiki code to HTML
|
|
* @return HTML fragment
|
|
*/
|
|
private String processLineOfWikiCode(final String hostport, String line) {
|
|
//If HTML has not been replaced yet (can happen if method gets called in recursion), replace now!
|
|
line = processMetadata(line);
|
|
if ((!this.replacedHtmlAlready || this.preformattedSpanning) && line.indexOf(WIKI_CLOSE_PRE_ESCAPED) < 0) {
|
|
line = CharacterCoding.unicode2html(line, true);
|
|
this.replacedHtmlAlready = true;
|
|
}
|
|
|
|
//check if line contains preformatted symbols or if we are in a preformatted sequence already.
|
|
if ((line.indexOf(WIKI_OPEN_PRE_ESCAPED) >= 0) ||
|
|
(line.indexOf(WIKI_CLOSE_PRE_ESCAPED) >= 0) ||
|
|
this.preformattedSpanning) {
|
|
line = processPreformattedText(hostport, line);
|
|
} else {
|
|
|
|
//tables first -> wiki-tags in cells can be treated after that
|
|
line = processTable(line);
|
|
|
|
// format lines
|
|
if (!line.isEmpty() && line.charAt(0) == WIKI_FORMATTED) {
|
|
line = "<tt>" + line.substring(1) + "</tt>";
|
|
}
|
|
if (line.startsWith(WIKI_HR_LINE)) {
|
|
line = "<hr />" + line.substring(LEN_WIKI_HR_LINE);
|
|
}
|
|
|
|
if (!line.isEmpty() && line.charAt(0) == WIKI_INDENTION) {
|
|
final StringBuilder head = new StringBuilder();
|
|
final StringBuilder tail = new StringBuilder();
|
|
while (!line.isEmpty() && line.charAt(0) == WIKI_INDENTION) {
|
|
head.append(HTML_OPEN_BLOCKQUOTE);
|
|
tail.append(HTML_CLOSE_BLOCKQUOTE);
|
|
line = line.substring(1);
|
|
}
|
|
line = head + line + tail;
|
|
}
|
|
|
|
// format headers
|
|
line = tagReplace(line, Tags.HEADLINE_6);
|
|
line = tagReplace(line, Tags.HEADLINE_5);
|
|
line = tagReplace(line, Tags.HEADLINE_4);
|
|
line = tagReplace(line, Tags.HEADLINE_3);
|
|
line = tagReplace(line, Tags.HEADLINE_2);
|
|
line = tagReplace(line, Tags.HEADLINE_1);
|
|
|
|
line = tagReplace(line, Tags.EMPHASIZE_3);
|
|
line = tagReplace(line, Tags.EMPHASIZE_2);
|
|
line = tagReplace(line, Tags.EMPHASIZE_1);
|
|
|
|
line = tagReplace(line, Tags.STRIKE);
|
|
line = tagReplace(line, Tags.UNDERLINE);
|
|
|
|
line = processUnorderedList(line);
|
|
line = processOrderedList(line);
|
|
line = processDefinitionList(line);
|
|
|
|
line = processLinksAndImages(hostport, line);
|
|
|
|
}
|
|
|
|
if (!this.processingPreformattedText) {
|
|
this.replacedHtmlAlready = false;
|
|
}
|
|
if (!(line.endsWith(HTML_CLOSE_LIST_ELEMENT) || this.processingDefList || this.escape || this.processingPreformattedText || this.processingTable || this.processingCell)) {
|
|
line += "<br />";
|
|
}
|
|
return line;
|
|
}
|
|
|
|
|
|
/**
|
|
* Process template inclusions in line, eventually with geo coordinate metadata
|
|
* @param line line of wiki text
|
|
* @return cleaned text with eventual geo coordinates formatted to be recognizable by parser
|
|
* @see <a href="https://en.wikipedia.org/wiki/Wikipedia:Transclusion">Wikipedia:Transclusion</a>
|
|
*/
|
|
protected static String processMetadata(final String line) {
|
|
StringBuilder processedLine = new StringBuilder(line);
|
|
int openIndex, closeIndex, fromIndex = 0;
|
|
while ((openIndex = processedLine.indexOf(WIKI_OPEN_METADATA, fromIndex)) >= 0) {
|
|
closeIndex = processedLine.indexOf(WIKI_CLOSE_METADATA, openIndex + LEN_WIKI_OPEN_METADATA);
|
|
/* Closing tag position : handle eventually nested tags */
|
|
int nextOpenIndex = processedLine.indexOf(WIKI_OPEN_METADATA, openIndex + LEN_WIKI_OPEN_METADATA);
|
|
while(nextOpenIndex >= 0 && nextOpenIndex < closeIndex) {
|
|
closeIndex = processedLine.indexOf(WIKI_CLOSE_METADATA, closeIndex + LEN_WIKI_CLOSE_METADATA);
|
|
if(closeIndex < 0) {
|
|
/* Parent closing mark is missing: likely a multi-line template inclusion */
|
|
break;
|
|
}
|
|
nextOpenIndex = processedLine.indexOf(WIKI_OPEN_METADATA, nextOpenIndex + LEN_WIKI_OPEN_METADATA);
|
|
}
|
|
if(closeIndex > 0) {
|
|
final String content = processedLine.substring(openIndex + LEN_WIKI_OPEN_METADATA, closeIndex);
|
|
if (content.toLowerCase(Locale.ROOT).startsWith("coordinate")) {
|
|
// parse Geographical Coordinates as described in
|
|
// http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style_%28dates_and_numbers%29#Geographical_coordinates
|
|
// looks like:
|
|
// {{Coord|57|18|22.5|N|4|27|32.7|W|display=title}}
|
|
// however, such information does not appear as defined above but as:
|
|
// {{coordinate|NS=52.205944|EW=0.117593|region=GB-CAM|type=landmark}}
|
|
// {{coordinate|NS=43/50/29/N|EW=73/23/17/W|type=landmark|region=US-NY}}
|
|
// and if passed through this parser:
|
|
// {{Coordinate |NS 45/37/43.0/N |EW. 07/58/41.0/E |type=landmark |region=IT-BI}} ## means: degree/minute/second
|
|
// {{Coordinate |NS 51.48994 |EW. 7.33249 |type=landmark |region=DE-NW}}
|
|
final String b[] = content.split("\\|");
|
|
float lon = Float.NaN, lat = Float.NaN; // degree
|
|
float lonm = 0.0f, latm = 0.0f; // minutes (including sec as fraction)
|
|
String lono = "E", lato = "N";
|
|
String name = "";
|
|
try {
|
|
for (final String c : b) {
|
|
if (c.toLowerCase(Locale.ROOT).startsWith("name=")) {
|
|
name = c.substring(5);
|
|
}
|
|
if (c.toUpperCase().startsWith("NS=")) {
|
|
final String d[] = c.substring(3).split("/");
|
|
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);}
|
|
else if (d.length > 1) { //format: NS deg/min/sec/N
|
|
lat = Float.parseFloat(d[0]); // degree
|
|
if (!d[1].isEmpty()) latm = Float.parseFloat(d[1]); // minutes
|
|
if (d.length >= 3 && !d[2].isEmpty()) {latm += (Float.parseFloat(d[2]) / 60.0f);} // sec (check empty because format found "45/10//N" )
|
|
if (d[d.length - 1].toUpperCase().equals("S")) lato = "S";
|
|
}
|
|
}
|
|
if (c.toUpperCase().startsWith("EW=")) {
|
|
final String d[] = c.substring(3).split("/");
|
|
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);}
|
|
else if (d.length > 1) {
|
|
lon = Float.parseFloat(d[0]);
|
|
if (!d[1].isEmpty()) lonm = Float.parseFloat(d[1]);
|
|
if (d.length >= 3 && !d[2].isEmpty()) {lonm += (Float.parseFloat(d[2]) / 60.0f);}
|
|
if (d[d.length-1].toUpperCase().equals("W")) {lono = "W";}
|
|
}
|
|
}
|
|
}
|
|
} catch (NumberFormatException nsExcept) {
|
|
// catch parseFloat exception (may still happen if wiki code contains expressions)
|
|
processedLine.delete(closeIndex, closeIndex + LEN_WIKI_CLOSE_METADATA);
|
|
processedLine.delete(openIndex, openIndex + LEN_WIKI_OPEN_METADATA);
|
|
fromIndex = openIndex;
|
|
continue;
|
|
}
|
|
if (!Float.isNaN(lon) && !Float.isNaN(lat)) {
|
|
// replace this with a format that the html parser can understand
|
|
final String htmlCoord = (name.length() > 0 ? (" " + name) : "") +
|
|
WIKI_FORMATTED +"<nobr> " + lato + " " + lat + "\u00B0 " + latm + "'</nobr><nobr>" + lono + " " + lon + "\u00B0 " + lonm + "'</nobr>" + WIKI_FORMATTED;
|
|
processedLine.replace(openIndex, closeIndex + LEN_WIKI_CLOSE_METADATA, htmlCoord);
|
|
|
|
/* Set next position to openIndex as some parameters can still contain nested template inclusion tags */
|
|
fromIndex = openIndex;
|
|
continue;
|
|
}
|
|
fromIndex = closeIndex; // continue with next position
|
|
} else {
|
|
String processedContent;
|
|
/* Any other template inclusion : only remove opening and closing tag and parameter separators */
|
|
int nestedOpenTagIndex = content.indexOf(WIKI_OPEN_METADATA);
|
|
int lastNestedCloseTagIndex = content.lastIndexOf(WIKI_CLOSE_METADATA);
|
|
if(nestedOpenTagIndex >= 0 && lastNestedCloseTagIndex > 0) {
|
|
processedContent = WIKI_FORMATTED + content.substring(0, nestedOpenTagIndex).replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' ')
|
|
+ content.substring(nestedOpenTagIndex, lastNestedCloseTagIndex)
|
|
+ content.substring(lastNestedCloseTagIndex).replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' ') + WIKI_FORMATTED;
|
|
fromIndex = openIndex; // continue with next nested position
|
|
} else {
|
|
/* No nested tag : we can now replace parameter separators with spaces in all remaining content */
|
|
processedContent = WIKI_FORMATTED + content.replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' ') + WIKI_FORMATTED;
|
|
fromIndex = openIndex + processedContent.length(); // continue with next position
|
|
}
|
|
processedLine.replace(openIndex, closeIndex + LEN_WIKI_CLOSE_METADATA, processedContent);
|
|
}
|
|
} else {
|
|
/* Multi-line template inclusion : only remove opening tag and parameter separators until eventually first nested tag */
|
|
int nestedOpenTagIndex = processedLine.indexOf(WIKI_OPEN_METADATA, openIndex + LEN_WIKI_OPEN_METADATA);
|
|
if(nestedOpenTagIndex >= 0) {
|
|
processedLine.replace(openIndex, nestedOpenTagIndex,
|
|
WIKI_FORMATTED
|
|
+ processedLine.substring(openIndex + LEN_WIKI_OPEN_METADATA, nestedOpenTagIndex)
|
|
.replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' '));
|
|
fromIndex = openIndex;
|
|
} else {
|
|
processedLine.replace(openIndex, processedLine.length(), WIKI_FORMATTED
|
|
+ processedLine.substring(openIndex + LEN_WIKI_OPEN_METADATA).replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' '));
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/* Handle any eventual multi-line template remaining closing tags */
|
|
fromIndex = 0;
|
|
while ((closeIndex = processedLine.indexOf(WIKI_CLOSE_METADATA, fromIndex)) >= 0) {
|
|
processedLine.replace(fromIndex, closeIndex, processedLine.substring(fromIndex, closeIndex).replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' '));
|
|
processedLine.delete(closeIndex, closeIndex + LEN_WIKI_CLOSE_METADATA);
|
|
fromIndex = closeIndex;
|
|
}
|
|
|
|
/* Handle any eventual multi-line template remaining parameter lines */
|
|
String result = processedLine.toString();
|
|
if(result.matches("^\\s*\\" + WIKI_METADATA_PARAMETER_SEPARATOR + "\\s*[^\\-\\}\\|].*")) {
|
|
result = result.replace(WIKI_METADATA_PARAMETER_SEPARATOR, ' ').replace('=', ' ');
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private class TableOfContent {
|
|
|
|
private final List<String> toc = new ArrayList<String>(); // needs to be list which ensures order
|
|
|
|
int size() {
|
|
return this.toc.size();
|
|
}
|
|
|
|
String get(final int index) {
|
|
return this.toc.get(index);
|
|
}
|
|
|
|
synchronized boolean add(final String element) {
|
|
return this.toc.add(element);
|
|
}
|
|
|
|
Iterator<String> iterator() {
|
|
return this.toc.iterator();
|
|
}
|
|
|
|
void clear() {
|
|
this.toc.clear();
|
|
}
|
|
}
|
|
}
|