- fixed missing save operation for peer name change

- fixed import of mediawiki dump files
- added script to add mediawiki dump files

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7609 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 2b5f8585bf
commit a50f28e6e7

@ -0,0 +1,3 @@
#!/bin/bash
cd "`dirname $0`"
./apicall.sh /IndexImportWikimedia_p.html?file=$1 > /dev/null

@ -191,7 +191,7 @@ public class Blog {
prop.putHTML("mode_author", UTF8.String(author)); prop.putHTML("mode_author", UTF8.String(author));
prop.putHTML("mode_subject", post.get("subject","")); prop.putHTML("mode_subject", post.get("subject",""));
prop.put("mode_date", dateString(new Date())); prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", "")); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
prop.putHTML("mode_page-code", post.get("content", "")); prop.putHTML("mode_page-code", post.get("content", ""));
} }
else { else {
@ -234,7 +234,7 @@ public class Blog {
else { else {
//only show 1 entry //only show 1 entry
prop.put("mode_entries", "1"); prop.put("mode_entries", "1");
putBlogEntry(prop, page, address, 0, hasRights, xml); putBlogEntry(sb, prop, page, address, 0, hasRights, xml);
} }
} }
@ -263,6 +263,7 @@ public class Blog {
while (i.hasNext() && (num == 0 || num > count)) { while (i.hasNext() && (num == 0 || num > count)) {
if(0 < start--) continue; if(0 < start--) continue;
putBlogEntry( putBlogEntry(
switchboard,
prop, prop,
switchboard.blogDB.readBlogEntry(i.next()), switchboard.blogDB.readBlogEntry(i.next()),
address, address,
@ -293,6 +294,7 @@ public class Blog {
} }
private static serverObjects putBlogEntry( private static serverObjects putBlogEntry(
final Switchboard sb,
final serverObjects prop, final serverObjects prop,
final BlogBoard.BlogEntry entry, final BlogBoard.BlogEntry entry,
final String address, final String address,
@ -324,7 +326,7 @@ public class Blog {
prop.put("mode_entries_" + number + "_page", entry.getPage()); prop.put("mode_entries_" + number + "_page", entry.getPage());
prop.put("mode_entries_" + number + "_timestamp", entry.getTimestamp()); prop.put("mode_entries_" + number + "_timestamp", entry.getTimestamp());
} else { } else {
prop.putWiki("mode_entries_" + number + "_page", entry.getPage()); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_entries_" + number + "_page", entry.getPage());
} }
if (hasRights) { if (hasRights) {

@ -175,7 +175,7 @@ public class BlogComments {
prop.putHTML("mode_allow_author", UTF8.String(author)); prop.putHTML("mode_allow_author", UTF8.String(author));
prop.putHTML("mode_subject", post.get("subject","")); prop.putHTML("mode_subject", post.get("subject",""));
prop.put("mode_date", dateString(new Date())); prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", "")); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
prop.put("mode_page-code", post.get("content", "")); prop.put("mode_page-code", post.get("content", ""));
} else { } else {
// show blog-entry/entries // show blog-entry/entries
@ -191,7 +191,7 @@ public class BlogComments {
prop.putHTML("mode_allow_author", UTF8.String(author)); prop.putHTML("mode_allow_author", UTF8.String(author));
prop.put("mode_comments", page.getCommentsSize()); prop.put("mode_comments", page.getCommentsSize());
prop.put("mode_date", dateString(page.getDate())); prop.put("mode_date", dateString(page.getDate()));
prop.putWiki("mode_page", page.getPage()); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", page.getPage());
if (hasRights) { if (hasRights) {
prop.put("mode_admin", "1"); prop.put("mode_admin", "1");
prop.put("mode_admin_pageid", page.getKey()); prop.put("mode_admin_pageid", page.getKey());
@ -234,7 +234,7 @@ public class BlogComments {
if (!xml) { if (!xml) {
prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject())); prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject()));
prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor())); prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor()));
prop.putWiki("mode_entries_"+count+"_page", entry.getPage()); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_entries_"+count+"_page", entry.getPage());
} else { } else {
prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject())); prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject()));
prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor())); prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor()));

@ -103,11 +103,11 @@ public class ConfigBasic {
// check if peer name already exists // check if peer name already exists
final yacySeed oldSeed = sb.peers.lookupByName(peerName); final yacySeed oldSeed = sb.peers.lookupByName(peerName);
if (oldSeed == null && !peerName.equals(sb.peers.mySeed().getName())) { if (oldSeed == null &&
// the name is new !peerName.equals(sb.peers.mySeed().getName()) &&
if (Pattern.compile("[A-Za-z0-9\\-_]{3,80}").matcher(peerName).matches()) { Pattern.compile("[A-Za-z0-9\\-_]{3,80}").matcher(peerName).matches()) {
sb.peers.mySeed().setName(peerName); sb.peers.mySeed().setName(peerName);
} sb.peers.saveMySeed();
} }
// UPnP config // UPnP config

@ -15,14 +15,14 @@
<form action="IndexImportWikimedia_p.html" method="get" accept-charset="UTF-8"> <form action="IndexImportWikimedia_p.html" method="get" accept-charset="UTF-8">
<!-- no post method here, we don't want to transmit the whole file, only the path--> <!-- no post method here, we don't want to transmit the whole file, only the path-->
<fieldset> <fieldset>
<legend>Wikimedia Dump File Selection: select a 'bz2' file</legend> <legend>Wikimedia Dump File Selection: select a xml file (which may be bz2- or gz-encoded)</legend>
You can import Wikipedia dumps here. An example is the file You can import Wikipedia dumps here. An example is the file
<a href="http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2"> <a href="http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2">
http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2</a>. http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2</a>.
<br /> <br />
Dumps must be in XML format and must be encoded in bz2. Do not decompress the file after downloading! Dumps must be in XML format and may be compressed in gz or bz2. Uncompressed XML is also ok.
<br /> <br />
<input name="file" type="text" value="DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2" size="80" /> <input name="file" type="text" value="" size="80" />
<input name="submit" type="submit" value="Import Wikimedia Dump" /> <input name="submit" type="submit" value="Import Wikimedia Dump" />
</fieldset> </fieldset>
</form> </form>

@ -57,16 +57,17 @@ public class IndexImportWikimedia_p {
} else { } else {
if (post.containsKey("file")) { if (post.containsKey("file")) {
final File sourcefile = new File(post.get("file")); final File sourcefile = new File(post.get("file"));
final String name = sourcefile.getName(); // i.e. dewiki-20090311-pages-articles.xml.bz2 //final String name = sourcefile.getName(); // i.e. dewiki-20090311-pages-articles.xml.bz2
/*
if (!name.endsWith("pages-articles.xml.bz2")) { if (!name.endsWith("pages-articles.xml.bz2")) {
prop.put("import", 0); prop.put("import", 0);
prop.put("import_status", 1); prop.put("import_status", 1);
prop.put("import_status_message", "file name must end with 'pages-articles.xml.bz2'"); prop.put("import_status_message", "file name must end with 'pages-articles.xml.bz2'");
return prop; return prop;
} }
final String lang = name.substring(0, 2); */
try { try {
MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath, "http://" + lang + ".wikipedia.org/wiki/"); MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath);
MediawikiImporter.job.start(); MediawikiImporter.job.start();
prop.put("import", 1); prop.put("import", 1);
prop.put("import_thread", "started"); prop.put("import_thread", "started");

@ -107,7 +107,7 @@ public class MessageSend_p {
prop.putXML("mode_permission_message", message); prop.putXML("mode_permission_message", message);
prop.putHTML("mode_permission_hash", hash); prop.putHTML("mode_permission_hash", hash);
if (post.containsKey("preview")) { if (post.containsKey("preview")) {
prop.putWiki("mode_permission_previewmessage", message); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_permission_previewmessage", message);
} }

@ -160,7 +160,7 @@ public class Messages_p {
prop.putXML("mode_subject", message.subject()); prop.putXML("mode_subject", message.subject());
String theMessage = null; String theMessage = null;
theMessage = UTF8.String(message.message()); theMessage = UTF8.String(message.message());
prop.putWiki("mode_message", theMessage); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_message", theMessage);
prop.put("mode_hash", message.authorHash()); prop.put("mode_hash", message.authorHash());
prop.putXML("mode_key", key); prop.putXML("mode_key", key);
} }

@ -162,7 +162,7 @@ public class ViewProfile {
prop.put("success_" + key, "1"); prop.put("success_" + key, "1");
// only comments get "wikified" // only comments get "wikified"
if(key.equals("comment")){ if(key.equals("comment")){
prop.putWiki( prop.putWiki(sb.peers.mySeed().getClusterAddress(),
"success_" + key + "_value", "success_" + key + "_value",
entry.getValue().replaceAll("\r", "").replaceAll("\\\\n", "\n")); entry.getValue().replaceAll("\r", "").replaceAll("\\\\n", "\n"));
prop.put("success_" + key + "_b64value", Base64Order.standardCoder.encodeString(entry.getValue())); prop.put("success_" + key + "_b64value", Base64Order.standardCoder.encodeString(entry.getValue()));

@ -152,7 +152,7 @@ public class Wiki {
prop.put("mode_display", display); prop.put("mode_display", display);
prop.putHTML("mode_author", author); prop.putHTML("mode_author", author);
prop.put("mode_date", dateString(new Date())); prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", "")); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
prop.putHTML("mode_page-code", post.get("content", "")); prop.putHTML("mode_page-code", post.get("content", ""));
} }
//end contrib of [MN] //end contrib of [MN]
@ -247,7 +247,7 @@ public class Wiki {
prop.put("mode_versioning_display", display); prop.put("mode_versioning_display", display);
prop.putHTML("mode_versioning_author", oentry.author()); prop.putHTML("mode_versioning_author", oentry.author());
prop.put("mode_versioning_date", dateString(oentry.date())); prop.put("mode_versioning_date", dateString(oentry.date()));
prop.putWiki("mode_versioning_page", oentry.page()); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_versioning_page", oentry.page());
prop.putHTML("mode_versioning_page-code", UTF8.String(oentry.page())); prop.putHTML("mode_versioning_page-code", UTF8.String(oentry.page()));
} }
} catch (final IOException e) { } catch (final IOException e) {
@ -263,7 +263,7 @@ public class Wiki {
prop.put("mode_display", display); prop.put("mode_display", display);
prop.putHTML("mode_author", page.author()); prop.putHTML("mode_author", page.author());
prop.put("mode_date", dateString(page.date())); prop.put("mode_date", dateString(page.date()));
prop.putWiki("mode_page", page.page()); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", page.page());
prop.put("controls", "0"); prop.put("controls", "0");
prop.putHTML("controls_pagename", pagename); prop.putHTML("controls_pagename", pagename);

@ -71,7 +71,7 @@ public class mediawiki_p {
page = page.substring(p, q); page = page.substring(p, q);
prop.putHTML("title", title); prop.putHTML("title", title);
prop.putWiki("page", page); prop.putWiki(sb.peers.mySeed().getClusterAddress(), "page", page);
return prop; return prop;
} }

@ -34,17 +34,12 @@ import java.io.UnsupportedEncodingException;
abstract class AbstractWikiParser implements WikiParser { abstract class AbstractWikiParser implements WikiParser {
final String address; protected abstract String transform(String hostport, BufferedReader reader, int length) throws IOException;
public AbstractWikiParser(final String address) {
this.address = address;
}
protected abstract String transform(BufferedReader reader, int length) throws IOException;
public String transform(final String content) { public String transform(String hostport, final String content) {
try { try {
return transform( return transform(
hostport,
new BufferedReader(new StringReader(content)), new BufferedReader(new StringReader(content)),
content.length()); content.length());
} catch (final IOException e) { } catch (final IOException e) {
@ -52,9 +47,10 @@ abstract class AbstractWikiParser implements WikiParser {
} }
} }
public String transform(final String content, final String publicAddress) { public String transform(String hostport, final String content, final String publicAddress) {
try { try {
return transform( return transform(
hostport,
new BufferedReader(new StringReader(content)), new BufferedReader(new StringReader(content)),
content.length()); content.length());
} catch (final IOException e) { } catch (final IOException e) {
@ -62,14 +58,15 @@ abstract class AbstractWikiParser implements WikiParser {
} }
} }
public String transform(final byte[] content) throws UnsupportedEncodingException { public String transform(String hostport, final byte[] content) throws UnsupportedEncodingException {
return transform(content, "UTF-8"); return transform(hostport, content, "UTF-8");
} }
public String transform(final byte[] content, final String encoding, final String publicAddress) { public String transform(String hostport, final byte[] content, final String encoding, final String publicAddress) {
final ByteArrayInputStream bais = new ByteArrayInputStream(content); final ByteArrayInputStream bais = new ByteArrayInputStream(content);
try { try {
return transform( return transform(
hostport,
new BufferedReader(new InputStreamReader(bais, encoding)), new BufferedReader(new InputStreamReader(bais, encoding)),
content.length); content.length);
} catch (final IOException e) { } catch (final IOException e) {
@ -77,10 +74,11 @@ abstract class AbstractWikiParser implements WikiParser {
} }
} }
public String transform(final byte[] content, final String encoding) throws UnsupportedEncodingException { public String transform(String hostport, final byte[] content, final String encoding) throws UnsupportedEncodingException {
final ByteArrayInputStream bais = new ByteArrayInputStream(content); final ByteArrayInputStream bais = new ByteArrayInputStream(content);
try { try {
return transform( return transform(
hostport,
new BufferedReader(new InputStreamReader(bais, encoding)), new BufferedReader(new InputStreamReader(bais, encoding)),
content.length); content.length);
} catch (final IOException e) { } catch (final IOException e) {

@ -190,8 +190,8 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
* Constructor * Constructor
* @param address * @param address
*/ */
public WikiCode(final String address) { public WikiCode() {
super(address); super();
} }
/** /**
@ -201,12 +201,12 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
* @return HTML fragment. * @return HTML fragment.
* @throws IOException in case input from reader can not be read. * @throws IOException in case input from reader can not be read.
*/ */
protected String transform(final BufferedReader reader, final int length) protected String transform(String hostport, final BufferedReader reader, final int length)
throws IOException { throws IOException {
final StringBuilder out = new StringBuilder(length); final StringBuilder out = new StringBuilder(length);
String line; String line;
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
out.append(processLineOfWikiCode(line)).append(serverCore.CRLF_STRING); out.append(processLineOfWikiCode(hostport, line)).append(serverCore.CRLF_STRING);
} }
return out.insert(0, createTableOfContents()).toString(); return out.insert(0, createTableOfContents()).toString();
} }
@ -531,7 +531,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
* @param line line of text to be transformed from wiki code to HTML * @param line line of text to be transformed from wiki code to HTML
* @return HTML fragment * @return HTML fragment
*/ */
private String processLinksAndImages(String line) { private String processLinksAndImages(String hostport, String line) {
// create links // create links
String kl, kv, alt, align; String kl, kv, alt, align;
@ -586,7 +586,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
// or an image DATA/HTDOCS/grafics/kaskelix.jpg with [[Image:grafics/kaskelix.jpg]] // or an image DATA/HTDOCS/grafics/kaskelix.jpg with [[Image:grafics/kaskelix.jpg]]
// you are free to use other sub-paths of DATA/HTDOCS // you are free to use other sub-paths of DATA/HTDOCS
if (kl.indexOf("://") < 1) { if (kl.indexOf("://") < 1) {
kl = "http://" + super.address + "/" + kl; kl = "http://" + hostport + "/" + kl;
} }
line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + 2); line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + 2);
@ -623,7 +623,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
// or a file DATA/HTDOCS/www/page.html with [www/page.html] // or a file DATA/HTDOCS/www/page.html with [www/page.html]
// you are free to use other sub-paths of DATA/HTDOCS // you are free to use other sub-paths of DATA/HTDOCS
if (kl.indexOf("://") < 1) { if (kl.indexOf("://") < 1) {
kl = "http://" + super.address + "/" + kl; kl = "http://" + hostport + "/" + kl;
} }
line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK); line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK);
} }
@ -635,7 +635,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
* @param line line of text to be transformed from wiki code to HTML * @param line line of text to be transformed from wiki code to HTML
* @return HTML fragment * @return HTML fragment
*/ */
private String processPreformattedText(String line) { private String processPreformattedText(String hostport, String line) {
if (!escaped) { if (!escaped) {
final int positionOfOpeningTag = line.indexOf(WIKI_OPEN_PRE_ESCAPED); final int positionOfOpeningTag = line.indexOf(WIKI_OPEN_PRE_ESCAPED);
final int positionOfClosingTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED); final int positionOfClosingTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED);
@ -647,15 +647,15 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
preformattedText.append(line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_PRE_ESCAPED, positionOfClosingTag)); preformattedText.append(line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_PRE_ESCAPED, positionOfClosingTag));
preformattedText.append("</pre>"); preformattedText.append("</pre>");
line = processLineOfWikiCode(line.substring(0, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!")); line = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
line = line.replaceAll("!pre!txt!", preformattedText.toString().replaceAll("!pre!", "!pre!!")); line = line.replaceAll("!pre!txt!", preformattedText.toString().replaceAll("!pre!", "!pre!!"));
line = line.replaceAll("!pre!!", "!pre!"); line = line.replaceAll("!pre!!", "!pre!");
} //handles cases like <pre><pre> </pre></pre> <pre> </pre> that would cause an exception otherwise } //handles cases like <pre><pre> </pre></pre> <pre> </pre> that would cause an exception otherwise
else { else {
processingPreformattedText = true; processingPreformattedText = true;
final String temp1 = processLineOfWikiCode(line.substring(0, positionOfOpeningTag - 1).replaceAll("!tmp!", "!tmp!!") + "!tmp!txt!"); final String temp1 = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag - 1).replaceAll("!tmp!", "!tmp!!") + "!tmp!txt!");
noList = true; noList = true;
final String temp2 = processLineOfWikiCode(line.substring(positionOfOpeningTag)); final String temp2 = processLineOfWikiCode(hostport, line.substring(positionOfOpeningTag));
noList = false; noList = false;
line = temp1.replaceAll("!tmp!txt!", temp2); line = temp1.replaceAll("!tmp!txt!", temp2);
line = line.replaceAll("!tmp!!", "!tmp!"); line = line.replaceAll("!tmp!!", "!tmp!");
@ -673,7 +673,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
preindented++; preindented++;
openBlockQuoteTags.append(HTML_OPEN_BLOCKQUOTE); openBlockQuoteTags.append(HTML_OPEN_BLOCKQUOTE);
} }
line = processLineOfWikiCode(line.substring(preindented, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!"); line = processLineOfWikiCode(hostport, line.substring(preindented, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!");
line = openBlockQuoteTags + line.replaceAll("!pre!txt!", preformattedText); line = openBlockQuoteTags + line.replaceAll("!pre!txt!", preformattedText);
line = line.replaceAll("!pre!!", "!pre!"); line = line.replaceAll("!pre!!", "!pre!");
preformattedSpanning = true; preformattedSpanning = true;
@ -688,7 +688,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
endBlockQuoteTags.append(HTML_CLOSE_BLOCKQUOTE); endBlockQuoteTags.append(HTML_CLOSE_BLOCKQUOTE);
preindented--; preindented--;
} }
line = processLineOfWikiCode("!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!")); line = processLineOfWikiCode(hostport, "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
line = line.replaceAll("!pre!txt!", preformattedText) + endBlockQuoteTags; line = line.replaceAll("!pre!txt!", preformattedText) + endBlockQuoteTags;
line = line.replaceAll("!pre!!", "!pre!"); line = line.replaceAll("!pre!!", "!pre!");
processingPreformattedText = false; processingPreformattedText = false;
@ -698,7 +698,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
while ((posTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED)) >= 0) { while ((posTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED)) >= 0) {
line = line.substring(0, posTag) + line.substring(posTag + LEN_WIKI_CLOSE_PRE_ESCAPED); line = line.substring(0, posTag) + line.substring(posTag + LEN_WIKI_CLOSE_PRE_ESCAPED);
} }
line = processLineOfWikiCode(line); line = processLineOfWikiCode(hostport, line);
} }
} }
return line; return line;
@ -914,7 +914,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
* @param line line of text to be transformed from wiki code to HTML * @param line line of text to be transformed from wiki code to HTML
* @return HTML fragment * @return HTML fragment
*/ */
public String processLineOfWikiCode(String line) { public String processLineOfWikiCode(String hostport, String line) {
//If HTML has not been replaced yet (can happen if method gets called in recursion), replace now! //If HTML has not been replaced yet (can happen if method gets called in recursion), replace now!
if ((!replacedHtmlAlready || preformattedSpanning) && line.indexOf(WIKI_CLOSE_PRE_ESCAPED) < 0) { if ((!replacedHtmlAlready || preformattedSpanning) && line.indexOf(WIKI_CLOSE_PRE_ESCAPED) < 0) {
line = CharacterCoding.unicode2html(line, true); line = CharacterCoding.unicode2html(line, true);
@ -925,7 +925,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
if ((line.indexOf(WIKI_OPEN_PRE_ESCAPED) >= 0) || if ((line.indexOf(WIKI_OPEN_PRE_ESCAPED) >= 0) ||
(line.indexOf(WIKI_CLOSE_PRE_ESCAPED) >= 0) || (line.indexOf(WIKI_CLOSE_PRE_ESCAPED) >= 0) ||
preformattedSpanning) { preformattedSpanning) {
line = processPreformattedText(line); line = processPreformattedText(hostport, line);
} else { } else {
//tables first -> wiki-tags in cells can be treated after that //tables first -> wiki-tags in cells can be treated after that
@ -970,7 +970,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
line = processOrderedList(line); line = processOrderedList(line);
line = processDefinitionList(line); line = processDefinitionList(line);
line = processLinksAndImages(line); line = processLinksAndImages(hostport, line);
} }

@ -29,8 +29,8 @@ import java.io.UnsupportedEncodingException;
public interface WikiParser { public interface WikiParser {
public String transform(String text); public String transform(String hostport, String text);
public String transform(byte[] text) throws UnsupportedEncodingException; public String transform(String hostport, byte[] text) throws UnsupportedEncodingException;
public String transform(byte[] text, String encoding) throws UnsupportedEncodingException; public String transform(String hostport, byte[] text, String encoding) throws UnsupportedEncodingException;
} }

@ -543,7 +543,7 @@ public final class Switchboard extends serverSwitch {
log.logConfig("Initializing Snippet Cache"); log.logConfig("Initializing Snippet Cache");
// init the wiki // init the wiki
wikiParser = new WikiCode(this.peers.mySeed().getClusterAddress()); wikiParser = new WikiCode();
// initializing the resourceObserver // initializing the resourceObserver
InstantBusyThread.oneTimeJob(ResourceObserver.class, "initThread", ResourceObserver.log, 0); InstantBusyThread.oneTimeJob(ResourceObserver.class, "initThread", ResourceObserver.log, 0);
@ -822,7 +822,8 @@ public final class Switchboard extends serverSwitch {
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
// switch the networks // switch the networks
synchronized (this) { synchronized (this) {
// shut down // shut down
this.crawler.close(); this.crawler.close();
this.dhtDispatcher.close(); this.dhtDispatcher.close();
@ -859,10 +860,8 @@ public final class Switchboard extends serverSwitch {
// relocate // relocate
this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
final File mySeedFile = new File(this.networkRoot, yacySeedDB.DBFILE_OWN_SEED);
peers.relocate( peers.relocate(
this.networkRoot, this.networkRoot,
mySeedFile,
redundancy, redundancy,
partitionExponent, partitionExponent,
this.useTailCache, this.useTailCache,

@ -229,13 +229,13 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
} }
public String putWiki(final String key, final String wikiCode){ public String putWiki(String hostport, final String key, final String wikiCode){
return this.put(key, Switchboard.wikiParser.transform(wikiCode)); return this.put(key, Switchboard.wikiParser.transform(hostport, wikiCode));
} }
public String putWiki(final String key, final byte[] wikiCode) { public String putWiki(String hostport, final String key, final byte[] wikiCode) {
try { try {
return this.put(key, Switchboard.wikiParser.transform(wikiCode)); return this.put(key, Switchboard.wikiParser.transform(hostport, wikiCode));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
return this.put(key, "Internal error pasting wiki-code: " + e.getMessage()); return this.put(key, "Internal error pasting wiki-code: " + e.getMessage());
} }

@ -145,11 +145,11 @@ public final class yacySeedDB implements AlternativeDomainNames {
public void relocate( public void relocate(
File newNetworkRoot, File newNetworkRoot,
final File myOwnSeedFile,
final int redundancy, final int redundancy,
final int partitionExponent, final int partitionExponent,
final boolean useTailCache, final boolean useTailCache,
final boolean exceed134217727) { final boolean exceed134217727) {
// close old databases // close old databases
this.seedActiveDB.close(); this.seedActiveDB.close();
this.seedPassiveDB.close(); this.seedPassiveDB.close();
@ -161,8 +161,13 @@ public final class yacySeedDB implements AlternativeDomainNames {
this.seedActiveDBFile = new File(newNetworkRoot, seedActiveDBFile.getName()); this.seedActiveDBFile = new File(newNetworkRoot, seedActiveDBFile.getName());
this.seedPassiveDBFile = new File(newNetworkRoot, seedPassiveDBFile.getName()); this.seedPassiveDBFile = new File(newNetworkRoot, seedPassiveDBFile.getName());
this.seedPotentialDBFile = new File(newNetworkRoot, seedPotentialDBFile.getName()); this.seedPotentialDBFile = new File(newNetworkRoot, seedPotentialDBFile.getName());
// read current peer name
String peername = this.myName();
this.mySeed = null; // my own seed this.mySeed = null; // my own seed
this.myOwnSeedFile = myOwnSeedFile; this.myOwnSeedFile = new File(newNetworkRoot, yacySeedDB.DBFILE_OWN_SEED);
this.netRedundancy = redundancy; this.netRedundancy = redundancy;
this.scheme = new VerticalWordPartitionScheme(partitionExponent); this.scheme = new VerticalWordPartitionScheme(partitionExponent);
@ -275,7 +280,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
} catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); } } catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
} }
protected void saveMySeed() { public void saveMySeed() {
try { try {
this.mySeed().save(myOwnSeedFile); this.mySeed().save(myOwnSeedFile);
} catch (final IOException e) { Log.logWarning("yacySeedDB", "could not save mySeed '"+ myOwnSeedFile +"': "+ e.getMessage()); } } catch (final IOException e) { Log.logWarning("yacySeedDB", "could not save mySeed '"+ myOwnSeedFile +"': "+ e.getMessage()); }

@ -48,7 +48,6 @@ import java.io.PrintWriter;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date; import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
@ -59,6 +58,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import java.util.zip.GZIPInputStream;
import de.anomic.data.wiki.WikiCode; import de.anomic.data.wiki.WikiCode;
import de.anomic.data.wiki.WikiParser; import de.anomic.data.wiki.WikiParser;
@ -81,24 +81,25 @@ public class MediawikiImporter extends Thread implements Importer {
public static Importer job; // if started from a servlet, this object is used to store the thread public static Importer job; // if started from a servlet, this object is used to store the thread
protected WikiParser wparser; protected WikiParser wparser;
protected String urlStub;
public File sourcefile; public File sourcefile;
public File targetdir; public File targetdir;
public int count; public int count;
private long start; private long start;
private final long docsize; private final long docsize;
private final int approxdocs; private final int approxdocs;
private String hostport, urlStub;
public MediawikiImporter(File sourcefile, File targetdir, String baseURL) throws MalformedURLException { public MediawikiImporter(File sourcefile, File targetdir) throws MalformedURLException {
this.sourcefile = sourcefile; this.sourcefile = sourcefile;
this.docsize = sourcefile.length(); this.docsize = sourcefile.length();
this.approxdocs = (int) (this.docsize * (long) docspermbinxmlbz2 / 1024L / 1024L); this.approxdocs = (int) (this.docsize * (long) docspermbinxmlbz2 / 1024L / 1024L);
this.targetdir = targetdir; this.targetdir = targetdir;
this.urlStub = baseURL; this.wparser = new WikiCode();
this.wparser = new WikiCode(new URL(baseURL).getHost());
this.count = 0; this.count = 0;
this.start = 0; this.start = 0;
this.hostport = null;
this.urlStub = null;
} }
public int count() { public int count() {
@ -138,14 +139,17 @@ public class MediawikiImporter extends Thread implements Importer {
this.start = System.currentTimeMillis(); this.start = System.currentTimeMillis();
try { try {
String targetstub = sourcefile.getName(); String targetstub = sourcefile.getName();
targetstub = targetstub.substring(0, targetstub.length() - 8); int p = targetstub.lastIndexOf("\\.");
InputStream is = new BufferedInputStream(new FileInputStream(sourcefile), 1 * 1024 * 1024); if (p > 0) targetstub = targetstub.substring(0, p);
InputStream is = new BufferedInputStream(new FileInputStream(sourcefile), 1024 * 1024);
if (sourcefile.getName().endsWith(".bz2")) { if (sourcefile.getName().endsWith(".bz2")) {
int b = is.read(); int b = is.read();
if (b != 'B') throw new IOException("Invalid bz2 content."); if (b != 'B') throw new IOException("Invalid bz2 content.");
b = is.read(); b = is.read();
if (b != 'Z') throw new IOException("Invalid bz2 content."); if (b != 'Z') throw new IOException("Invalid bz2 content.");
is = new CBZip2InputStream(is); is = new CBZip2InputStream(is);
} else if (sourcefile.getName().endsWith(".gz")) {
is = new GZIPInputStream(is);
} }
BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8"), 4 * 1024 * 1024); BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8"), 4 * 1024 * 1024);
String t; String t;
@ -167,15 +171,27 @@ public class MediawikiImporter extends Thread implements Importer {
Future<Integer> writerResult = service.submit(writer); Future<Integer> writerResult = service.submit(writer);
wikiparserrecord record; wikiparserrecord record;
int p; int q;
while ((t = r.readLine()) != null) { while ((t = r.readLine()) != null) {
if ((p = t.indexOf("<base>")) >= 0 && (q = t.indexOf("</base>", p)) > 0) {
//urlStub = "http://" + lang + ".wikipedia.org/wiki/";
urlStub = t.substring(p + 6, q);
if (!urlStub.endsWith("/")) {
q = urlStub.lastIndexOf('/');
if (q > 0) urlStub = urlStub.substring(0, q + 1);
}
DigestURI uri = new DigestURI(urlStub);
hostport = uri.getHost();
if (uri.getPort() != 80) hostport += ":" + uri.getPort();
continue;
}
if (t.indexOf(pagestart) >= 0) { if (t.indexOf(pagestart) >= 0) {
page = true; page = true;
continue; continue;
} }
if ((p = t.indexOf(textstart)) >= 0) { if ((p = t.indexOf(textstart)) >= 0) {
text = page; text = page;
int q = t.indexOf('>', p + textstart.length()); q = t.indexOf('>', p + textstart.length());
if (q > 0) { if (q > 0) {
int u = t.indexOf(textend, q + 1); int u = t.indexOf(textend, q + 1);
if (u > q) { if (u > q) {
@ -185,7 +201,7 @@ public class MediawikiImporter extends Thread implements Importer {
Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content"); Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content");
continue; continue;
} }
record = newRecord(title, sb); record = newRecord(hostport, urlStub, title, sb);
try { try {
in.put(record); in.put(record);
this.count++; this.count++;
@ -207,7 +223,7 @@ public class MediawikiImporter extends Thread implements Importer {
Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content"); Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content");
continue; continue;
} }
record = newRecord(title, sb); record = newRecord(hostport, urlStub, title, sb);
try { try {
in.put(record); in.put(record);
this.count++; this.count++;
@ -223,7 +239,7 @@ public class MediawikiImporter extends Thread implements Importer {
} }
if ((p = t.indexOf("<title>")) >= 0) { if ((p = t.indexOf("<title>")) >= 0) {
title = t.substring(p + 7); title = t.substring(p + 7);
int q = title.indexOf("</title>"); q = title.indexOf("</title>");
if (q >= 0) title = title.substring(0, q); if (q >= 0) title = title.substring(0, q);
continue; continue;
} }
@ -461,25 +477,26 @@ public class MediawikiImporter extends Thread implements Importer {
} }
} }
public wikiparserrecord newRecord() { public wikiparserrecord newRecord() {
return new wikiparserrecord(null, null); return new wikiparserrecord(null, null, null, null);
} }
public wikiparserrecord newRecord(String title, StringBuilder sb) { public wikiparserrecord newRecord(String hostport, String urlStub, String title, StringBuilder sb) {
return new wikiparserrecord(title, sb); return new wikiparserrecord(hostport, urlStub, title, sb);
} }
public class wikiparserrecord { public class wikiparserrecord {
public String title; public String title;
String source; String source, html, hostport, urlStub;
String html;
DigestURI url; DigestURI url;
Document document; Document document;
public wikiparserrecord(String title, StringBuilder sb) { public wikiparserrecord(String hostport, String urlStub, String title, StringBuilder sb) {
this.title = title; this.title = title;
this.hostport = hostport;
this.urlStub = urlStub;
this.source = (sb == null) ? null : sb.toString(); this.source = (sb == null) ? null : sb.toString();
} }
public void genHTML() throws IOException { public void genHTML() throws IOException {
try { try {
html = wparser.transform(source); html = wparser.transform(hostport, source);
} catch (Exception e) { } catch (Exception e) {
Log.logException(e); Log.logException(e);
throw new IOException(e.getMessage()); throw new IOException(e.getMessage());
@ -734,13 +751,13 @@ public class MediawikiImporter extends Thread implements Importer {
// example: // example:
// java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/ // java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/
if (s[0].equals("-convert") && s.length > 2 && s[1].endsWith(".xml.bz2") && s[3].startsWith("http://")) { if (s[0].equals("-convert") && s.length > 2) {
File sourcefile = new File(s[1]); File sourcefile = new File(s[1]);
File targetdir = new File(s[2]); File targetdir = new File(s[2]);
String urlStub = s[3]; // i.e. http://de.wikipedia.org/wiki/ //String urlStub = s[3]; // i.e. http://de.wikipedia.org/wiki/
//String language = urlStub.substring(7,9); //String language = urlStub.substring(7,9);
try { try {
MediawikiImporter mi = new MediawikiImporter(sourcefile, targetdir, urlStub); MediawikiImporter mi = new MediawikiImporter(sourcefile, targetdir);
mi.start(); mi.start();
mi.join(); mi.join();
} catch (InterruptedException e) { } catch (InterruptedException e) {

Loading…
Cancel
Save