diff --git a/htroot/IndexImportOAIPMH_p.html b/htroot/IndexImportOAIPMH_p.html new file mode 100644 index 000000000..0bcf4624c --- /dev/null +++ b/htroot/IndexImportOAIPMH_p.html @@ -0,0 +1,37 @@ + + + + YaCy '#[clientname]#': OAI-PMH Import + #%env/templates/metas.template%# + #(import)#::#(/import)# + + + #%env/templates/header.template%# + #%env/templates/submenuIntegration.template%# +

OAI-PMH Import

+ + #(import)# +

#(status)#No import thread is running, you can start a new thread here::Bad input data: #[message]# #(/status)#

+
+
+ OAI-PMH Import: set a OAI-PMH URL + + +
+
+ :: +
Import Process +
+
Thread:
#[thread]#
+
Source:
#[source]#
+
Processed:
#[count]# Wiki Entries
+
Speed:
#[speed]# articles per second
+
Running Time:
#[runningHours]# hours, #[runningMinutes]# minutes
+
Remaining Time:
#[remainingHours]# hours, #[remainingMinutes]# minutes
+
+
+ #(/import)# + + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/htroot/IndexImportOAIPMH_p.java b/htroot/IndexImportOAIPMH_p.java new file mode 100644 index 000000000..c8e334eef --- /dev/null +++ b/htroot/IndexImportOAIPMH_p.java @@ -0,0 +1,86 @@ +// IndexImportOAIPMH.java +// ------------------------- +// (C) 2009 by Michael Peter Christen; mc@yacy.net +// first published 04.05.2009 on http://yacy.net +// Frankfurt, Germany +// +// $LastChangedDate: 2009-10-11 23:29:18 +0200 (So, 11 Okt 2009) $ +// $LastChangedRevision: 6400 $ +// $LastChangedBy: orbiter $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.io.File; +import java.net.MalformedURLException; + +import net.yacy.document.importer.OAIPMHImporter; +import net.yacy.kelondro.data.meta.DigestURI; + +import de.anomic.http.server.RequestHeader; +import de.anomic.search.Switchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public class IndexImportOAIPMH_p { + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + final serverObjects prop = new serverObjects(); + final Switchboard sb = (Switchboard) env; + + if (OAIPMHImporter.job != null && OAIPMHImporter.job.isAlive()) { + // one import is running, no option to insert anything + prop.put("import", 1); + prop.put("import_thread", "running"); + prop.put("import_source", OAIPMHImporter.job.source()); + prop.put("import_count", OAIPMHImporter.job.count()); + prop.put("import_speed", OAIPMHImporter.job.speed()); + prop.put("import_runningHours", (OAIPMHImporter.job.runningTime() / 60) / 60); + prop.put("import_runningMinutes", (OAIPMHImporter.job.runningTime() / 60) % 60); + prop.put("import_remainingHours", (OAIPMHImporter.job.remainingTime() / 60) / 60); + prop.put("import_remainingMinutes", (OAIPMHImporter.job.remainingTime() / 60) % 60); + } else { + prop.put("import", 0); + if (post == null) { + prop.put("import_status", 0); + } else { + if (post.containsKey("file")) { + String oaipmhurl = post.get("oaipmhurl"); + DigestURI url = null; + try { + url = new DigestURI(oaipmhurl, null); + OAIPMHImporter.job = new OAIPMHImporter(sb.loader, url); + OAIPMHImporter.job.start(); + prop.put("import", 1); + prop.put("import_thread", "started"); + prop.put("import_dump", OAIPMHImporter.job.source()); + prop.put("import_count", 0); + prop.put("import_speed", 0); + prop.put("import_runningHours", 0); + prop.put("import_runningMinutes", 0); + prop.put("import_remainingHours", 0); + prop.put("import_remainingMinutes", 0); + } catch (MalformedURLException e) { + e.printStackTrace(); + prop.put("import", 0); + prop.put("import_status", 1); + prop.put("import_status_message", e.getMessage()); + } + } + return prop; + } + } + return prop; + } +} diff --git a/htroot/IndexImportWikimedia_p.java b/htroot/IndexImportWikimedia_p.java index a6b4d5039..45940dc15 100644 --- a/htroot/IndexImportWikimedia_p.java +++ b/htroot/IndexImportWikimedia_p.java @@ -25,11 +25,12 @@ import java.io.File; import java.net.MalformedURLException; +import net.yacy.document.importer.MediawikiImporter; + import de.anomic.http.server.RequestHeader; import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.tools.mediawikiIndex; public class IndexImportWikimedia_p { @@ -37,17 +38,17 @@ public class IndexImportWikimedia_p { final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; - if (mediawikiIndex.job != null && mediawikiIndex.job.isAlive()) { + if (MediawikiImporter.job != null && MediawikiImporter.job.isAlive()) { // one import is running, no option to insert anything prop.put("import", 1); prop.put("import_thread", "running"); - prop.put("import_dump", mediawikiIndex.job.sourcefile.getName()); - prop.put("import_count", mediawikiIndex.job.count); - prop.put("import_speed", mediawikiIndex.job.speed()); - prop.put("import_runningHours", (mediawikiIndex.job.runningTime() / 60) / 60); - prop.put("import_runningMinutes", (mediawikiIndex.job.runningTime() / 60) % 60); - prop.put("import_remainingHours", (mediawikiIndex.job.remainingTime() / 60) / 60); - prop.put("import_remainingMinutes", (mediawikiIndex.job.remainingTime() / 60) % 60); + prop.put("import_dump", MediawikiImporter.job.source()); + prop.put("import_count", MediawikiImporter.job.count()); + prop.put("import_speed", MediawikiImporter.job.speed()); + prop.put("import_runningHours", (MediawikiImporter.job.runningTime() / 60) / 60); + prop.put("import_runningMinutes", (MediawikiImporter.job.runningTime() / 60) % 60); + prop.put("import_remainingHours", (MediawikiImporter.job.remainingTime() / 60) / 60); + prop.put("import_remainingMinutes", (MediawikiImporter.job.remainingTime() / 60) % 60); } else { prop.put("import", 0); if (post == null) { @@ -64,11 +65,11 @@ public class IndexImportWikimedia_p { } String lang = name.substring(0, 2); try { - mediawikiIndex.job = new mediawikiIndex(sourcefile, sb.surrogatesInPath, "http://" + lang + ".wikipedia.org/wiki/"); - mediawikiIndex.job.start(); + MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath, "http://" + lang + ".wikipedia.org/wiki/"); + MediawikiImporter.job.start(); prop.put("import", 1); prop.put("import_thread", "started"); - prop.put("import_dump", mediawikiIndex.job.sourcefile.getName()); + prop.put("import_dump", MediawikiImporter.job.source()); prop.put("import_count", 0); prop.put("import_speed", 0); prop.put("import_runningHours", 0); diff --git a/htroot/mediawiki_p.java b/htroot/mediawiki_p.java index 06ea8ab52..14690c280 100644 --- a/htroot/mediawiki_p.java +++ b/htroot/mediawiki_p.java @@ -27,11 +27,12 @@ import java.io.File; import java.io.IOException; +import net.yacy.document.importer.MediawikiImporter; + import de.anomic.http.server.RequestHeader; import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.tools.mediawikiIndex; public class mediawiki_p { @@ -53,12 +54,12 @@ public class mediawiki_p { File dumpFile = new File(sb.getRootPath(), "DATA/HTCACHE/mediawiki/" + dump); if (!dumpFile.exists()) return post; - mediawikiIndex.checkIndex(dumpFile); - mediawikiIndex.wikisourcerecord w = mediawikiIndex.find(title.replaceAll(" ", "_"), mediawikiIndex.idxFromWikimediaXML(dumpFile)); + MediawikiImporter.checkIndex(dumpFile); + MediawikiImporter.wikisourcerecord w = MediawikiImporter.find(title.replaceAll(" ", "_"), MediawikiImporter.idxFromWikimediaXML(dumpFile)); if (w == null) { return post; } - String page = new String(mediawikiIndex.read(dumpFile, w.start, (int) (w.end - w.start)), "UTF-8"); + String page = new String(MediawikiImporter.read(dumpFile, w.start, (int) (w.end - w.start)), "UTF-8"); int p = page.indexOf("', p); diff --git a/source/net/yacy/document/importer/Importer.java b/source/net/yacy/document/importer/Importer.java new file mode 100644 index 000000000..b66281dbe --- /dev/null +++ b/source/net/yacy/document/importer/Importer.java @@ -0,0 +1,40 @@ +package net.yacy.document.importer; + +public interface Importer extends Runnable { + + + public String source(); + + public int count(); + + /** + * return the number of articles per second + * @return + */ + public int speed(); + + /** + * return the time this import is already running + * @return + */ + public long runningTime(); + + + /** + * return the remaining seconds for the completion of all records in milliseconds + * @return + */ + public long remainingTime(); + + + + public boolean isAlive(); + + public void start(); + + /** + * the run method from runnable + */ + public void run(); + +} diff --git a/source/de/anomic/tools/mediawikiIndex.java b/source/net/yacy/document/importer/MediawikiImporter.java similarity index 97% rename from source/de/anomic/tools/mediawikiIndex.java rename to source/net/yacy/document/importer/MediawikiImporter.java index 4b49cffb9..ac75528fe 100644 --- a/source/de/anomic/tools/mediawikiIndex.java +++ b/source/net/yacy/document/importer/MediawikiImporter.java @@ -24,7 +24,7 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -package de.anomic.tools; +package net.yacy.document.importer; import net.yacy.document.Document; import net.yacy.document.TextParser; @@ -71,7 +71,7 @@ import de.anomic.data.wiki.wikiParser; * as referenced with xmlns="http://www.mediawiki.org/xml/export-0.3/" */ -public class mediawikiIndex extends Thread { +public class MediawikiImporter extends Thread implements Importer { private static final String textstart = "