#(status)#No import thread is running, you can start a new thread here::Bad input data: #[message]# #(/status)#
+
+
+ When the import is started, the following happens:
+
+
The dump is extracted on the fly and wiki entries are translated into Dublin Core data format. The output looks like this:
+
+ <?xml version="1.0" encoding="utf-8"?>
+<surrogates xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <record>
+ <dc:Title><![CDATA[Alan Smithee]]></dc:Title>
+ <dc:Identifier>http://de.wikipedia.org/wiki/Alan%20Smithee</dc:Identifier>
+ <dc:Description><![CDATA[Der als Filmregisseur oft genannte Alan Smithee ist ein Anagramm]]></dc:Description>
+ <dc:Language>de</dc:Language>
+ <dc:Date>2009-05-07T06:03:48Z</dc:Date>
+ </record>
+ <record>
+ ...
+ </record>
+</surrogates>
+
+
+
Each 10000 wiki records are combined in one output file which is written to /DATA/SURROGATES/in into a temporary file.
+
When each of the generated output file is finished, it is renamed to a .xml file
+
Each time a xml surrogate file appears in /DATA/SURROGATES/in, the YaCy indexer fetches the file and indexes the record entries.
+
When a surrogate file is finished with indexing, it is moved to /DATA/SURROGATES/out
+
You can recycle processed surrogate files by moving them from /DATA/SURROGATES/out to /DATA/SURROGATES/in
+
+
+ ::
+
+ #(/import)#
+
+ #%env/templates/footer.template%#
+
+
\ No newline at end of file
diff --git a/htroot/IndexImportWikimedia_p.java b/htroot/IndexImportWikimedia_p.java
new file mode 100644
index 000000000..3da6cdeed
--- /dev/null
+++ b/htroot/IndexImportWikimedia_p.java
@@ -0,0 +1,78 @@
+// IndexImportWikimedia.java
+// -------------------------
+// (C) 2009 by Michael Peter Christen; mc@yacy.net
+// first published 04.05.2009 on http://yacy.net
+// Frankfurt, Germany
+//
+// $LastChangedDate: 2009-04-16 17:29:00 +0200 (Do, 16 Apr 2009) $
+// $LastChangedRevision: 5812 $
+// $LastChangedBy: orbiter $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.io.File;
+import java.net.MalformedURLException;
+
+import de.anomic.http.httpRequestHeader;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.tools.mediawikiIndex;
+
+public class IndexImportWikimedia_p {
+
+ public static serverObjects respond(final httpRequestHeader header, final serverObjects post, final serverSwitch> env) {
+ final serverObjects prop = new serverObjects();
+ final plasmaSwitchboard sb = (plasmaSwitchboard) env;
+
+ if (mediawikiIndex.job != null && mediawikiIndex.job.isAlive()) {
+ // one import is running, no option to insert anything
+ prop.put("import", 1);
+ prop.put("import_thread", "running");
+ prop.put("import_count", mediawikiIndex.job.count);
+ } else {
+ prop.put("import", 0);
+ if (post == null) {
+ prop.put("import_status", 0);
+ } else {
+ if (post.containsKey("file")) {
+ File sourcefile = new File(post.get("file"));
+ String name = sourcefile.getName(); // i.e. dewiki-20090311-pages-articles.xml.bz2
+ if (!name.endsWith("pages-articles.xml.bz2")) {
+ prop.put("import", 0);
+ prop.put("import_status", 1);
+ prop.put("import_status_message", "file name must end with 'pages-articles.xml.bz2'");
+ return prop;
+ }
+ String lang = name.substring(0, 2);
+ try {
+ mediawikiIndex.job = new mediawikiIndex(sourcefile, sb.surrogatesInPath, "http://" + lang + ".wikipedia.org/wiki/");
+ mediawikiIndex.job.start();
+ prop.put("import", 1);
+ prop.put("import_thread", "started");
+ prop.put("import_count", 0);
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ prop.put("import", 0);
+ prop.put("import_status", 1);
+ prop.put("import_status_message", e.getMessage());
+ }
+ }
+ return prop;
+ }
+ }
+ return prop;
+ }
+}
diff --git a/htroot/env/templates/submenuIndexControl.template b/htroot/env/templates/submenuIndexControl.template
index 599f91846..7b0759dad 100644
--- a/htroot/env/templates/submenuIndexControl.template
+++ b/htroot/env/templates/submenuIndexControl.template
@@ -6,5 +6,6 @@