- fixed missing save operation for peer name change

- fixed import of mediawiki dump files - added script to add mediawiki dump files git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7609 6c8d7289-2bf4-0310-a012-ef5d649a1542
14 years ago · a50f28e6e7
parent 2b5f8585bf
commit a50f28e6e7
18 changed files with 115 additions and 90 deletions
--- a/bin/importmediawiki.sh
+++ b/bin/importmediawiki.sh
@ -0,0 +1,3 @@
 #!/bin/bash
 cd "`dirname $0`"
 ./apicall.sh /IndexImportWikimedia_p.html?file=$1 > /dev/null
--- a/htroot/Blog.java
+++ b/htroot/Blog.java
@ -191,7 +191,7 @@ public class Blog {
                prop.putHTML("mode_author", UTF8.String(author));
                prop.putHTML("mode_subject", post.get("subject",""));
                prop.put("mode_date", dateString(new Date()));
-                prop.putWiki("mode_page", post.get("content", ""));
+                prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
                prop.putHTML("mode_page-code", post.get("content", ""));
            }
            else {
@ -234,7 +234,7 @@ public class Blog {
            else {
                //only show 1 entry
                prop.put("mode_entries", "1");
-                putBlogEntry(prop, page, address, 0, hasRights, xml);
+                putBlogEntry(sb, prop, page, address, 0, hasRights, xml);
            }
        }
@ -263,6 +263,7 @@ public class Blog {
        while (i.hasNext() && (num == 0 || num > count)) {
            if(0 < start--) continue;
            putBlogEntry(
                    switchboard,
                    prop,
                    switchboard.blogDB.readBlogEntry(i.next()),
                    address,
@ -293,6 +294,7 @@ public class Blog {
    }
    private static serverObjects putBlogEntry(
            final Switchboard sb,
            final serverObjects prop,
            final BlogBoard.BlogEntry entry,
            final String address,
@ -324,7 +326,7 @@ public class Blog {
            prop.put("mode_entries_" + number + "_page", entry.getPage());
            prop.put("mode_entries_" + number + "_timestamp", entry.getTimestamp());
        } else {
-            prop.putWiki("mode_entries_" + number + "_page", entry.getPage());
+            prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_entries_" + number + "_page", entry.getPage());
        }
        if (hasRights) {
--- a/htroot/BlogComments.java
+++ b/htroot/BlogComments.java
@ -175,7 +175,7 @@ public class BlogComments {
            prop.putHTML("mode_allow_author", UTF8.String(author));
            prop.putHTML("mode_subject", post.get("subject",""));
            prop.put("mode_date", dateString(new Date()));
-            prop.putWiki("mode_page", post.get("content", ""));
+            prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
            prop.put("mode_page-code", post.get("content", ""));
        } else {
            // show blog-entry/entries
@ -191,7 +191,7 @@ public class BlogComments {
                prop.putHTML("mode_allow_author", UTF8.String(author));
                prop.put("mode_comments", page.getCommentsSize());
                prop.put("mode_date", dateString(page.getDate()));
-                prop.putWiki("mode_page", page.getPage());
+                prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", page.getPage());
                if (hasRights) {
                    prop.put("mode_admin", "1");
                    prop.put("mode_admin_pageid", page.getKey());
@ -234,7 +234,7 @@ public class BlogComments {
                    if (!xml) {
                        prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject()));
                        prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor()));
-                        prop.putWiki("mode_entries_"+count+"_page", entry.getPage());
+                        prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_entries_"+count+"_page", entry.getPage());
                    } else {
                        prop.putHTML("mode_entries_"+count+"_subject", UTF8.String(entry.getSubject()));
                        prop.putHTML("mode_entries_"+count+"_author", UTF8.String(entry.getAuthor()));
--- a/htroot/ConfigBasic.java
+++ b/htroot/ConfigBasic.java
@ -103,11 +103,11 @@ public class ConfigBasic {
        // check if peer name already exists
        final yacySeed oldSeed = sb.peers.lookupByName(peerName);
-        if (oldSeed == null && !peerName.equals(sb.peers.mySeed().getName())) {
+        if (oldSeed == null &&
-            // the name is new
+            !peerName.equals(sb.peers.mySeed().getName()) &&
-            if (Pattern.compile("[A-Za-z0-9\\-_]{3,80}").matcher(peerName).matches()) {
+            Pattern.compile("[A-Za-z0-9\\-_]{3,80}").matcher(peerName).matches()) {
-                sb.peers.mySeed().setName(peerName);
+            sb.peers.mySeed().setName(peerName);
-            }
+            sb.peers.saveMySeed();
        }
        // UPnP config
--- a/htroot/IndexImportWikimedia_p.html
+++ b/htroot/IndexImportWikimedia_p.html
@ -15,14 +15,14 @@
    <form action="IndexImportWikimedia_p.html" method="get" accept-charset="UTF-8">
        <!-- no post method here, we don't want to transmit the whole file, only the path-->
        <fieldset>
-          <legend>Wikimedia Dump File Selection: select a 'bz2' file</legend>
+          <legend>Wikimedia Dump File Selection: select a xml file (which may be bz2- or gz-encoded)</legend>
          You can import Wikipedia dumps here. An example is the file
          <a href="http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2">
          http://download.wikimedia.org/dewiki/20090311/dewiki-20090311-pages-articles.xml.bz2</a>.
          <br />
-          Dumps must be in XML format and must be encoded in bz2. Do not decompress the file after downloading!
+          Dumps must be in XML format and may be compressed in gz or bz2. Uncompressed XML is also ok.
          <br />
-          <input name="file" type="text" value="DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2" size="80" />
+          <input name="file" type="text" value="" size="80" />
          <input name="submit" type="submit" value="Import Wikimedia Dump" />
        </fieldset>
    </form>
--- a/htroot/IndexImportWikimedia_p.java
+++ b/htroot/IndexImportWikimedia_p.java
@ -57,16 +57,17 @@ public class IndexImportWikimedia_p {
            } else {
                if (post.containsKey("file")) {
                    final File sourcefile = new File(post.get("file"));
-                    final String name = sourcefile.getName(); // i.e. dewiki-20090311-pages-articles.xml.bz2
+                    //final String name = sourcefile.getName(); // i.e. dewiki-20090311-pages-articles.xml.bz2
                    /*
                    if (!name.endsWith("pages-articles.xml.bz2")) {
                        prop.put("import", 0);
                        prop.put("import_status", 1);
                        prop.put("import_status_message", "file name must end with 'pages-articles.xml.bz2'");
                        return prop;
                    }
-                    final String lang = name.substring(0, 2);
+                    */
                    try {
-                        MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath, "http://" + lang + ".wikipedia.org/wiki/");
+                        MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath);
                        MediawikiImporter.job.start();
                        prop.put("import", 1);
                        prop.put("import_thread", "started");
--- a/htroot/MessageSend_p.java
+++ b/htroot/MessageSend_p.java
@ -107,7 +107,7 @@ public class MessageSend_p {
                    prop.putXML("mode_permission_message", message);
                    prop.putHTML("mode_permission_hash", hash);
                    if (post.containsKey("preview")) {
-                        prop.putWiki("mode_permission_previewmessage", message);
+                        prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_permission_previewmessage", message);
                    }
--- a/htroot/Messages_p.java
+++ b/htroot/Messages_p.java
@ -160,7 +160,7 @@ public class Messages_p {
            prop.putXML("mode_subject", message.subject());
            String theMessage = null;
            theMessage = UTF8.String(message.message());
-            prop.putWiki("mode_message", theMessage);
+            prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_message", theMessage);
            prop.put("mode_hash", message.authorHash());
            prop.putXML("mode_key", key);
        }
--- a/htroot/ViewProfile.java
+++ b/htroot/ViewProfile.java
@ -162,7 +162,7 @@ public class ViewProfile {
                    prop.put("success_" + key, "1");
                    // only comments get "wikified"
                    if(key.equals("comment")){
-                        prop.putWiki(
+                        prop.putWiki(sb.peers.mySeed().getClusterAddress(), 
                                "success_" + key + "_value",
                                entry.getValue().replaceAll("\r", "").replaceAll("\\\\n", "\n"));
                        prop.put("success_" + key + "_b64value", Base64Order.standardCoder.encodeString(entry.getValue()));
--- a/htroot/Wiki.java
+++ b/htroot/Wiki.java
@ -152,7 +152,7 @@ public class Wiki {
            prop.put("mode_display", display);
            prop.putHTML("mode_author", author);
            prop.put("mode_date", dateString(new Date()));
-            prop.putWiki("mode_page", post.get("content", ""));
+            prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", post.get("content", ""));
            prop.putHTML("mode_page-code", post.get("content", ""));
        }
        //end contrib of [MN]
@ -247,7 +247,7 @@ public class Wiki {
                    prop.put("mode_versioning_display", display);
                    prop.putHTML("mode_versioning_author", oentry.author());
                    prop.put("mode_versioning_date", dateString(oentry.date()));
-                    prop.putWiki("mode_versioning_page", oentry.page());
+                    prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_versioning_page", oentry.page());
                    prop.putHTML("mode_versioning_page-code", UTF8.String(oentry.page()));
                }
            } catch (final IOException e) {
@ -263,7 +263,7 @@ public class Wiki {
            prop.put("mode_display", display);
            prop.putHTML("mode_author", page.author());
            prop.put("mode_date", dateString(page.date()));
-            prop.putWiki("mode_page", page.page());
+            prop.putWiki(sb.peers.mySeed().getClusterAddress(), "mode_page", page.page());
            prop.put("controls", "0");
            prop.putHTML("controls_pagename", pagename);
--- a/htroot/mediawiki_p.java
+++ b/htroot/mediawiki_p.java
@ -71,7 +71,7 @@ public class mediawiki_p {
        page = page.substring(p, q);
        prop.putHTML("title", title);
-        prop.putWiki("page", page);
+        prop.putWiki(sb.peers.mySeed().getClusterAddress(), "page", page);
        return prop;
    }
--- a/source/de/anomic/data/wiki/AbstractWikiParser.java
+++ b/source/de/anomic/data/wiki/AbstractWikiParser.java
@ -34,17 +34,12 @@ import java.io.UnsupportedEncodingException;
 abstract class AbstractWikiParser implements WikiParser {
-    final String address;
+    protected abstract String transform(String hostport, BufferedReader reader, int length) throws IOException;
    public AbstractWikiParser(final String address) {
        this.address = address;
    }
    protected abstract String transform(BufferedReader reader, int length) throws IOException;
-    public String transform(final String content) {
+    public String transform(String hostport, final String content) {
        try {
            return transform(
                    hostport,
                    new BufferedReader(new StringReader(content)),
                    content.length());
        } catch (final IOException e) {
@ -52,9 +47,10 @@ abstract class AbstractWikiParser implements WikiParser {
        }
    }
-    public String transform(final String content, final String publicAddress) {
+    public String transform(String hostport, final String content, final String publicAddress) {
        try {
            return transform(
                    hostport, 
                    new BufferedReader(new StringReader(content)),
                    content.length());
        } catch (final IOException e) {
@ -62,14 +58,15 @@ abstract class AbstractWikiParser implements WikiParser {
        }
    }
-    public String transform(final byte[] content) throws UnsupportedEncodingException {
+    public String transform(String hostport, final byte[] content) throws UnsupportedEncodingException {
-        return transform(content, "UTF-8");
+        return transform(hostport, content, "UTF-8");
    }
-    public String transform(final byte[] content, final String encoding, final String publicAddress) {
+    public String transform(String hostport, final byte[] content, final String encoding, final String publicAddress) {
        final ByteArrayInputStream bais = new ByteArrayInputStream(content);
        try {
            return transform(
                    hostport, 
                    new BufferedReader(new InputStreamReader(bais, encoding)),
                    content.length);
        } catch (final IOException e) {
@ -77,10 +74,11 @@ abstract class AbstractWikiParser implements WikiParser {
        }
    }
-    public String transform(final byte[] content, final String encoding) throws UnsupportedEncodingException {
+    public String transform(String hostport, final byte[] content, final String encoding) throws UnsupportedEncodingException {
        final ByteArrayInputStream bais = new ByteArrayInputStream(content);
        try {
            return transform(
                    hostport,
                    new BufferedReader(new InputStreamReader(bais, encoding)),
                    content.length);
        } catch (final IOException e) {
--- a/source/de/anomic/data/wiki/WikiCode.java
+++ b/source/de/anomic/data/wiki/WikiCode.java
@ -190,8 +190,8 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
     * Constructor
     * @param address
     */
-    public WikiCode(final String address) {
+    public WikiCode() {
-        super(address);
+        super();
    }
    /**
@ -201,12 +201,12 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
     * @return HTML fragment.
     * @throws IOException in case input from reader can not be read.
     */
-    protected String transform(final BufferedReader reader, final int length)
+    protected String transform(String hostport, final BufferedReader reader, final int length)
            throws IOException {
        final StringBuilder out = new StringBuilder(length);
        String line;
        while ((line = reader.readLine()) != null) {
-            out.append(processLineOfWikiCode(line)).append(serverCore.CRLF_STRING);
+            out.append(processLineOfWikiCode(hostport, line)).append(serverCore.CRLF_STRING);
        }
        return out.insert(0, createTableOfContents()).toString();
    }
@ -531,7 +531,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
     * @param line line of text to be transformed from wiki code to HTML
     * @return HTML fragment
     */
-    private String processLinksAndImages(String line) {
+    private String processLinksAndImages(String hostport, String line) {
        // create links
        String kl, kv, alt, align;
@ -586,7 +586,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
                // or an image DATA/HTDOCS/grafics/kaskelix.jpg with [[Image:grafics/kaskelix.jpg]]
                // you are free to use other sub-paths of DATA/HTDOCS
                if (kl.indexOf("://") < 1) {
-                    kl = "http://" + super.address + "/" + kl;
+                    kl = "http://" + hostport + "/" + kl;
                }
                line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + 2);
@ -623,7 +623,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
            // or a file DATA/HTDOCS/www/page.html with [www/page.html]
            // you are free to use other sub-paths of DATA/HTDOCS
            if (kl.indexOf("://") < 1) {
-                kl = "http://" + super.address + "/" + kl;
+                kl = "http://" + hostport + "/" + kl;
            }
            line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK);
        }
@ -635,7 +635,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
     * @param line line of text to be transformed from wiki code to HTML
     * @return HTML fragment
     */
-    private String processPreformattedText(String line) {
+    private String processPreformattedText(String hostport, String line) {
        if (!escaped) {
            final int positionOfOpeningTag = line.indexOf(WIKI_OPEN_PRE_ESCAPED);
            final int positionOfClosingTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED);
@ -647,15 +647,15 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
                    preformattedText.append(line.substring(positionOfOpeningTag + LEN_WIKI_OPEN_PRE_ESCAPED, positionOfClosingTag));
                    preformattedText.append("</pre>");
-                    line = processLineOfWikiCode(line.substring(0, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
+                    line = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
                    line = line.replaceAll("!pre!txt!", preformattedText.toString().replaceAll("!pre!", "!pre!!"));
                    line = line.replaceAll("!pre!!", "!pre!");
                } //handles cases like <pre><pre> </pre></pre> <pre> </pre> that would cause an exception otherwise
                else {
                    processingPreformattedText = true;
-                    final String temp1 = processLineOfWikiCode(line.substring(0, positionOfOpeningTag - 1).replaceAll("!tmp!", "!tmp!!") + "!tmp!txt!");
+                    final String temp1 = processLineOfWikiCode(hostport, line.substring(0, positionOfOpeningTag - 1).replaceAll("!tmp!", "!tmp!!") + "!tmp!txt!");
                    noList = true;
-                    final String temp2 = processLineOfWikiCode(line.substring(positionOfOpeningTag));
+                    final String temp2 = processLineOfWikiCode(hostport, line.substring(positionOfOpeningTag));
                    noList = false;
                    line = temp1.replaceAll("!tmp!txt!", temp2);
                    line = line.replaceAll("!tmp!!", "!tmp!");
@ -673,7 +673,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
                    preindented++;
                    openBlockQuoteTags.append(HTML_OPEN_BLOCKQUOTE);
                }
-                line = processLineOfWikiCode(line.substring(preindented, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!");
+                line = processLineOfWikiCode(hostport, line.substring(preindented, positionOfOpeningTag).replaceAll("!pre!", "!pre!!") + "!pre!txt!");
                line = openBlockQuoteTags + line.replaceAll("!pre!txt!", preformattedText);
                line = line.replaceAll("!pre!!", "!pre!");
                preformattedSpanning = true;
@ -688,7 +688,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
                    endBlockQuoteTags.append(HTML_CLOSE_BLOCKQUOTE);
                    preindented--;
                }
-                line = processLineOfWikiCode("!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
+                line = processLineOfWikiCode(hostport, "!pre!txt!" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_PRE_ESCAPED).replaceAll("!pre!", "!pre!!"));
                line = line.replaceAll("!pre!txt!", preformattedText) + endBlockQuoteTags;
                line = line.replaceAll("!pre!!", "!pre!");
                processingPreformattedText = false;
@ -698,7 +698,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
                while ((posTag = line.indexOf(WIKI_CLOSE_PRE_ESCAPED)) >= 0) {
                    line = line.substring(0, posTag) + line.substring(posTag + LEN_WIKI_CLOSE_PRE_ESCAPED);
                }
-                line = processLineOfWikiCode(line);
+                line = processLineOfWikiCode(hostport, line);
            }
        }
        return line;
@ -914,7 +914,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
     * @param line line of text to be transformed from wiki code to HTML
     * @return HTML fragment
     */
-    public String processLineOfWikiCode(String line) {
+    public String processLineOfWikiCode(String hostport, String line) {
        //If HTML has not been replaced yet (can happen if method gets called in recursion), replace now!
        if ((!replacedHtmlAlready || preformattedSpanning) && line.indexOf(WIKI_CLOSE_PRE_ESCAPED) < 0) {
            line = CharacterCoding.unicode2html(line, true);
@ -925,7 +925,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
        if ((line.indexOf(WIKI_OPEN_PRE_ESCAPED) >= 0) ||
                (line.indexOf(WIKI_CLOSE_PRE_ESCAPED) >= 0) ||
                preformattedSpanning) {
-            line = processPreformattedText(line);
+            line = processPreformattedText(hostport, line);
        } else {
            //tables first -> wiki-tags in cells can be treated after that
@ -970,7 +970,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
            line = processOrderedList(line);
            line = processDefinitionList(line);
-            line = processLinksAndImages(line);
+            line = processLinksAndImages(hostport, line);
        }
--- a/source/de/anomic/data/wiki/WikiParser.java
+++ b/source/de/anomic/data/wiki/WikiParser.java
@ -29,8 +29,8 @@ import java.io.UnsupportedEncodingException;
 public interface WikiParser {
-    public String transform(String text);
+    public String transform(String hostport, String text);
-    public String transform(byte[] text) throws UnsupportedEncodingException;
+    public String transform(String hostport, byte[] text) throws UnsupportedEncodingException;
-    public String transform(byte[] text, String encoding) throws UnsupportedEncodingException;
+    public String transform(String hostport, byte[] text, String encoding) throws UnsupportedEncodingException;
 }
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -543,7 +543,7 @@ public final class Switchboard extends serverSwitch {
        log.logConfig("Initializing Snippet Cache");
        // init the wiki
-        wikiParser = new WikiCode(this.peers.mySeed().getClusterAddress());
+        wikiParser = new WikiCode();
        // initializing the resourceObserver
        InstantBusyThread.oneTimeJob(ResourceObserver.class, "initThread", ResourceObserver.log, 0);
@ -822,7 +822,8 @@ public final class Switchboard extends serverSwitch {
        SearchEventCache.cleanupEvents(true);
        // switch the networks
-        synchronized (this) {            
+        synchronized (this) {
            // shut down
            this.crawler.close();
            this.dhtDispatcher.close();
@ -859,10 +860,8 @@ public final class Switchboard extends serverSwitch {
            // relocate
            this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
            final File mySeedFile = new File(this.networkRoot, yacySeedDB.DBFILE_OWN_SEED);
            peers.relocate(
                    this.networkRoot,
                    mySeedFile,
                    redundancy,
                    partitionExponent,
                    this.useTailCache,
--- a/source/de/anomic/server/serverObjects.java
+++ b/source/de/anomic/server/serverObjects.java
@ -229,13 +229,13 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
    }
-    public String putWiki(final String key, final String wikiCode){
+    public String putWiki(String hostport, final String key, final String wikiCode){
-        return this.put(key, Switchboard.wikiParser.transform(wikiCode));
+        return this.put(key, Switchboard.wikiParser.transform(hostport, wikiCode));
    }
-    public String putWiki(final String key, final byte[] wikiCode) {
+    public String putWiki(String hostport, final String key, final byte[] wikiCode) {
        try {
-            return this.put(key, Switchboard.wikiParser.transform(wikiCode));
+            return this.put(key, Switchboard.wikiParser.transform(hostport, wikiCode));
        } catch (final UnsupportedEncodingException e) {
            return this.put(key, "Internal error pasting wiki-code: " + e.getMessage());
        }
--- a/source/de/anomic/yacy/yacySeedDB.java
+++ b/source/de/anomic/yacy/yacySeedDB.java
@ -145,11 +145,11 @@ public final class yacySeedDB implements AlternativeDomainNames {
    public void relocate(
            File newNetworkRoot,
            final File myOwnSeedFile, 
            final int redundancy,
            final int partitionExponent,
            final boolean useTailCache,
            final boolean exceed134217727) {
        // close old databases
        this.seedActiveDB.close();
        this.seedPassiveDB.close();
@ -161,8 +161,13 @@ public final class yacySeedDB implements AlternativeDomainNames {
        this.seedActiveDBFile = new File(newNetworkRoot, seedActiveDBFile.getName());
        this.seedPassiveDBFile = new File(newNetworkRoot, seedPassiveDBFile.getName());
        this.seedPotentialDBFile = new File(newNetworkRoot, seedPotentialDBFile.getName());
        // read current peer name
        String peername = this.myName();
        this.mySeed = null; // my own seed
-        this.myOwnSeedFile = myOwnSeedFile;
+        this.myOwnSeedFile = new File(newNetworkRoot, yacySeedDB.DBFILE_OWN_SEED);
        this.netRedundancy = redundancy;
        this.scheme = new VerticalWordPartitionScheme(partitionExponent);
@ -275,7 +280,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
        } catch (final IOException e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
    }
-    protected void saveMySeed() {
+    public void saveMySeed() {
        try {
          this.mySeed().save(myOwnSeedFile);
        } catch (final IOException e) { Log.logWarning("yacySeedDB", "could not save mySeed '"+ myOwnSeedFile +"': "+ e.getMessage()); }
--- a/source/net/yacy/document/importer/MediawikiImporter.java
+++ b/source/net/yacy/document/importer/MediawikiImporter.java
@ -48,7 +48,6 @@ import java.io.PrintWriter;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Date;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
@ -59,6 +58,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.zip.GZIPInputStream;
 import de.anomic.data.wiki.WikiCode;
 import de.anomic.data.wiki.WikiParser;
@ -81,24 +81,25 @@ public class MediawikiImporter extends Thread implements Importer {
    public static Importer job; // if started from a servlet, this object is used to store the thread
    protected WikiParser wparser;
    protected String urlStub;
    public    File sourcefile;
    public    File targetdir;
    public    int count;
    private   long start;
    private   final long docsize;
    private   final int approxdocs;
    private   String hostport, urlStub;
-    public MediawikiImporter(File sourcefile, File targetdir, String baseURL) throws MalformedURLException {
+    public MediawikiImporter(File sourcefile, File targetdir) throws MalformedURLException {
    	this.sourcefile = sourcefile;
    	this.docsize = sourcefile.length();
    	this.approxdocs = (int) (this.docsize * (long) docspermbinxmlbz2 / 1024L / 1024L);
    	this.targetdir = targetdir;
-        this.urlStub = baseURL;
+        this.wparser = new WikiCode();
        this.wparser = new WikiCode(new URL(baseURL).getHost());
        this.count = 0;
        this.start = 0;
        this.hostport = null;
        this.urlStub = null;
    }
    public int count() {
@ -138,14 +139,17 @@ public class MediawikiImporter extends Thread implements Importer {
        this.start = System.currentTimeMillis();
        try {
            String targetstub = sourcefile.getName();
-            targetstub = targetstub.substring(0, targetstub.length() - 8);
+            int p = targetstub.lastIndexOf("\\.");
-            InputStream is = new BufferedInputStream(new FileInputStream(sourcefile), 1 * 1024 * 1024);
+            if (p > 0) targetstub = targetstub.substring(0, p);
            InputStream is = new BufferedInputStream(new FileInputStream(sourcefile), 1024 * 1024);
            if (sourcefile.getName().endsWith(".bz2")) {
                int b = is.read();
                if (b != 'B') throw new IOException("Invalid bz2 content.");
                b = is.read();
                if (b != 'Z') throw new IOException("Invalid bz2 content.");
                is = new CBZip2InputStream(is);
            } else if (sourcefile.getName().endsWith(".gz")) {
                is = new GZIPInputStream(is);
            }
            BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8"), 4 * 1024 * 1024);
            String t;
@ -167,15 +171,27 @@ public class MediawikiImporter extends Thread implements Importer {
            Future<Integer> writerResult = service.submit(writer);
            wikiparserrecord record;
-            int p;
+            int q;
            while ((t = r.readLine()) != null) {
                if ((p = t.indexOf("<base>")) >= 0 && (q = t.indexOf("</base>", p)) > 0) {
                    //urlStub = "http://" + lang + ".wikipedia.org/wiki/";
                    urlStub = t.substring(p + 6, q);
                    if (!urlStub.endsWith("/")) {
                        q = urlStub.lastIndexOf('/');
                        if (q > 0) urlStub = urlStub.substring(0, q + 1);
                    }
                    DigestURI uri = new DigestURI(urlStub);
                    hostport = uri.getHost();
                    if (uri.getPort() != 80) hostport += ":" + uri.getPort();
                    continue;
                }
                if (t.indexOf(pagestart) >= 0) {
                    page = true;
                    continue;
                }
                if ((p = t.indexOf(textstart)) >= 0) {
                    text = page;
-                    int q = t.indexOf('>', p + textstart.length());
+                    q = t.indexOf('>', p + textstart.length());
                    if (q > 0) {
                        int u = t.indexOf(textend, q + 1);
                        if (u > q) {
@ -185,7 +201,7 @@ public class MediawikiImporter extends Thread implements Importer {
                                Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content");
                                continue;
                            }
-                            record = newRecord(title, sb);
+                            record = newRecord(hostport, urlStub, title, sb);
                            try {
                                in.put(record);
                                this.count++;
@ -207,7 +223,7 @@ public class MediawikiImporter extends Thread implements Importer {
                        Log.logInfo("WIKITRANSLATION", "ERROR: " + title + " has empty content");
                        continue;
                    }
-                    record = newRecord(title, sb);
+                    record = newRecord(hostport, urlStub, title, sb);
                    try {
                        in.put(record);
                        this.count++;
@ -223,7 +239,7 @@ public class MediawikiImporter extends Thread implements Importer {
                }
                if ((p = t.indexOf("<title>")) >= 0) {
                    title = t.substring(p + 7);
-                    int q = title.indexOf("</title>");
+                    q = title.indexOf("</title>");
                    if (q >= 0) title = title.substring(0, q);
                    continue;
                }
@ -461,25 +477,26 @@ public class MediawikiImporter extends Thread implements Importer {
        }
    }
    public wikiparserrecord newRecord() {
-        return new wikiparserrecord(null, null);
+        return new wikiparserrecord(null, null, null, null);
    }
-    public wikiparserrecord newRecord(String title, StringBuilder sb) {
+    public wikiparserrecord newRecord(String hostport, String urlStub, String title, StringBuilder sb) {
-        return new wikiparserrecord(title, sb);
+        return new wikiparserrecord(hostport, urlStub, title, sb);
    }
    public class wikiparserrecord {
        public String title;
-        String source;
+        String source, html, hostport, urlStub;
        String html;
        DigestURI url;
        Document document;
-        public wikiparserrecord(String title, StringBuilder sb) {
+        public wikiparserrecord(String hostport, String urlStub, String title, StringBuilder sb) {
            this.title = title;
            this.hostport = hostport;
            this.urlStub = urlStub;
            this.source = (sb == null) ? null : sb.toString();
        }
        public void genHTML() throws IOException {
            try {
-                html = wparser.transform(source);
+                html = wparser.transform(hostport, source);
            } catch (Exception e) {
                Log.logException(e);
                throw new IOException(e.getMessage());
@ -734,13 +751,13 @@ public class MediawikiImporter extends Thread implements Importer {
        // example:
        // java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/
-        if (s[0].equals("-convert") && s.length > 2 && s[1].endsWith(".xml.bz2") && s[3].startsWith("http://")) {
+        if (s[0].equals("-convert") && s.length > 2) {
            File sourcefile = new File(s[1]);
            File targetdir = new File(s[2]);
-            String urlStub = s[3]; // i.e. http://de.wikipedia.org/wiki/
+            //String urlStub = s[3]; // i.e. http://de.wikipedia.org/wiki/
            //String language = urlStub.substring(7,9);
            try {
-                MediawikiImporter mi = new MediawikiImporter(sourcefile, targetdir, urlStub);
+                MediawikiImporter mi = new MediawikiImporter(sourcefile, targetdir);
                mi.start();
                mi.join();
            } catch (InterruptedException e) {