fixed import of jsonlist imort from searchlab.eu using a direct URL

pull/533/head
Michael Peter Christen 2 years ago
parent 5ddc794bb9
commit 309adb814e

@ -14,6 +14,7 @@
<li><a href="Load_RSS_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">RSS Feed Importer</a></li> <li><a href="Load_RSS_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">RSS Feed Importer</a></li>
<li><a href="IndexImportOAIPMH_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">OAI-PMH Importer</a></li> <li><a href="IndexImportOAIPMH_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">OAI-PMH Importer</a></li>
<li><a href="IndexImportWarc_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Warc Importer</a></li> <li><a href="IndexImportWarc_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Warc Importer</a></li>
<li><a href="IndexImportJsonList_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">JsonList Importer</a></li>
</ul> </ul>
</div> </div>

@ -73,7 +73,7 @@ public class JsonListImporter extends Thread implements Importer {
private boolean abort; private boolean abort;
private final boolean deletewhendone; private final boolean deletewhendone;
public JsonListImporter(final File inputFile, final boolean deletewhendone) throws IOException { public JsonListImporter(final File inputFile, final boolean gz, final boolean deletewhendone) throws IOException {
super("JsonListImporter - from file " + inputFile.getName()); super("JsonListImporter - from file " + inputFile.getName());
this.lineCount = 0; this.lineCount = 0;
this.consumed = 0; this.consumed = 0;
@ -83,7 +83,7 @@ public class JsonListImporter extends Thread implements Importer {
this.abort = false; this.abort = false;
this.deletewhendone = deletewhendone; this.deletewhendone = deletewhendone;
this.source = new FileInputStream(inputFile); this.source = new FileInputStream(inputFile);
if (this.name.endsWith(".gz")) this.source = new GZIPInputStream(this.source); if (this.name.endsWith(".gz") || gz) this.source = new GZIPInputStream(this.source);
} }
@Override @Override
@ -97,6 +97,7 @@ public class JsonListImporter extends Thread implements Importer {
public void processSurrogateJson() throws IOException { public void processSurrogateJson() throws IOException {
this.startTime = System.currentTimeMillis(); this.startTime = System.currentTimeMillis();
job = this;
// start indexer threads which mostly care about tokenization and facet + synonym enrichment // start indexer threads which mostly care about tokenization and facet + synonym enrichment
final int concurrency = Runtime.getRuntime().availableProcessors(); final int concurrency = Runtime.getRuntime().availableProcessors();

@ -58,7 +58,7 @@ public class IndexImportJsonList_p {
final File sourcefile = new File(filename); final File sourcefile = new File(filename);
if (sourcefile.exists()) { if (sourcefile.exists()) {
try { try {
final JsonListImporter wi = new JsonListImporter(sourcefile, false); final JsonListImporter wi = new JsonListImporter(sourcefile, false, false);
wi.start(); wi.start();
prop.put("import_thread", "started"); prop.put("import_thread", "started");
} catch (final IOException ex) { } catch (final IOException ex) {
@ -72,14 +72,25 @@ public class IndexImportJsonList_p {
} }
} else { } else {
final String urlstr = post.get("url"); final String urlstr = post.get("url");
/*
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
final byte[] b = client.GETbytes(urlstr, null, null, true);
final File tempfile = File.createTempFile("jsonlistimporter", "");
final FileOutputStream fos = new FileOutputStream(tempfile);
fos.write(b);
fos.close();
client.close();
*/
if (urlstr != null && urlstr.length() > 0) { if (urlstr != null && urlstr.length() > 0) {
try { try {
final URL url = new URL(urlstr); final URL url = new URL(urlstr);
final File tempfile = File.createTempFile("jsonlistimporter", ""); final String tempfilename = "jsonlistimporter";
final boolean gz = urlstr.endsWith(".gz");
final File tempfile = File.createTempFile(tempfilename, "");
final FileOutputStream fos = new FileOutputStream(tempfile); final FileOutputStream fos = new FileOutputStream(tempfile);
fos.getChannel().transferFrom(Channels.newChannel(url.openStream()), 0, Long.MAX_VALUE); fos.getChannel().transferFrom(Channels.newChannel(url.openStream()), 0, Long.MAX_VALUE);
fos.close(); fos.close();
final JsonListImporter wi = new JsonListImporter(tempfile, true); final JsonListImporter wi = new JsonListImporter(tempfile, gz, true);
wi.start(); wi.start();
prop.put("import_thread", "started"); prop.put("import_thread", "started");
} catch (final IOException ex) { } catch (final IOException ex) {

@ -2206,7 +2206,7 @@ public final class Switchboard extends serverSwitch {
// see https://github.com/yacy/yacy_grid_parser/blob/master/README.md // see https://github.com/yacy/yacy_grid_parser/blob/master/README.md
this.log.info("processing json surrogate " + infile); this.log.info("processing json surrogate " + infile);
try { try {
final JsonListImporter importer = new JsonListImporter(infile, false); final JsonListImporter importer = new JsonListImporter(infile, false, false);
importer.run(); importer.run();
} catch (final IOException e) { } catch (final IOException e) {
this.log.warn(e); this.log.warn(e);

Loading…
Cancel
Save