Apply Importer interface to WarcImporterpull/60/head^2
parent
1d81b8f102
commit
ba339a2a45
@ -0,0 +1,51 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Warc Import</title>
|
||||
#%env/templates/metas.template%#
|
||||
#(import)#::<meta http-equiv="REFRESH" content="10;url=IndexImportWarc_p.html" />
|
||||
<!-- the url= removes http get parameters on refresh, preventing restart of import -->
|
||||
#(/import)#
|
||||
</head>
|
||||
<body id="IndexImportWarc">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuIndexImport.template%#
|
||||
<h2>Web Archive File Import</h2>
|
||||
|
||||
#(import)#
|
||||
<p>No import thread is running, you can start a new thread here</p>
|
||||
<form action="IndexImportWarc_p.html" method="get" accept-charset="UTF-8">
|
||||
<!-- no post method here, we don't want to transmit the whole file, only the path-->
|
||||
<fieldset>
|
||||
<legend>Warc File Selection: select an warc file (which may be gz compressed)</legend>
|
||||
<p>
|
||||
You can download warc archives for example here
|
||||
<a href="https://archive.org/search.php?query=subject%3A%22warcarchives%22&and[]=subject%3A%22warcarchives%22" target="_blank">Internet Archive</a>.
|
||||
</p>
|
||||
<div class="input-group">
|
||||
<span style="display: inline-block">
|
||||
<input name="file" type="file" value="" size="75" /></span>
|
||||
<div class="btn-group">
|
||||
<input name="submit" class="btn btn-primary" type="submit" value="Import Warc File" />
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
<br />
|
||||
::
|
||||
<form><fieldset><legend>Import Process</legend>
|
||||
<dl>
|
||||
<dt>Thread:</dt><dd>#[thread]#</dd>
|
||||
<dt>Warc File:</dt><dd>#[warcfile]#</dd>
|
||||
<dt>Processed:</dt><dd>#[count]# Entries</dd>
|
||||
<dt>Speed:</dt><dd>#[speed]# pages per second</dd>
|
||||
<dt>Running Time:</dt><dd>#[runningHours]# hours, #[runningMinutes]# minutes</dd>
|
||||
<dt>Remaining Time:</dt><dd>#[remainingHours]# hours, #[remainingMinutes]# minutes</dd>
|
||||
</dl>
|
||||
</fieldset></form>
|
||||
#(/import)#
|
||||
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,76 @@
|
||||
// IndexImportWarc_p.java
|
||||
// -------------------------
|
||||
// (c) 2017 by reger24; https://github.com/reger24
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.document.importer.WarcImporter;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
|
||||
public class IndexImportWarc_p {
|
||||
|
||||
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
final serverObjects prop = new serverObjects();
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
if (WarcImporter.job != null && WarcImporter.job.isAlive()) {
|
||||
// one import is running, no option to insert anything
|
||||
prop.put("import", 1);
|
||||
prop.put("import_thread", "running");
|
||||
prop.put("import_warcfile", WarcImporter.job.source());
|
||||
prop.put("import_count", WarcImporter.job.count());
|
||||
prop.put("import_speed", WarcImporter.job.speed());
|
||||
prop.put("import_runningHours", (WarcImporter.job.runningTime() / 60) / 60);
|
||||
prop.put("import_runningMinutes", (WarcImporter.job.runningTime() / 60) % 60);
|
||||
prop.put("import_remainingHours", (WarcImporter.job.remainingTime() / 60) / 60);
|
||||
prop.put("import_remainingMinutes", (WarcImporter.job.remainingTime() / 60) % 60);
|
||||
} else {
|
||||
prop.put("import", 0);
|
||||
if (post != null) {
|
||||
if (post.containsKey("file")) {
|
||||
String file = post.get("file");
|
||||
final File sourcefile = new File(file);
|
||||
if (sourcefile.exists()) {
|
||||
try {
|
||||
WarcImporter wi = new WarcImporter(sourcefile);
|
||||
wi.start();
|
||||
prop.put("import_thread", "started");
|
||||
} catch (FileNotFoundException ex) {
|
||||
prop.put("import_thread", "Error: file not found [" + file + "]");
|
||||
}
|
||||
prop.put("import_warcfile", file);
|
||||
} else {
|
||||
prop.put("import_warcfile", "");
|
||||
prop.put("import_thread", "Error: file not found [" + file + "]");
|
||||
}
|
||||
prop.put("import", 1);
|
||||
prop.put("import_count", 0);
|
||||
prop.put("import_speed", 0);
|
||||
prop.put("import_runningHours", 0);
|
||||
prop.put("import_runningMinutes", 0);
|
||||
prop.put("import_remainingHours", 0);
|
||||
prop.put("import_remainingMinutes", 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return prop;
|
||||
}
|
||||
}
|
Loading…
Reference in new issue