fixed import of jsonlist imort from searchlab.eu using a direct URL

pull/533/head
Michael Peter Christen 2 years ago
parent 5ddc794bb9
commit 309adb814e

@ -14,6 +14,7 @@
<li><a href="Load_RSS_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">RSS Feed Importer</a></li>
<li><a href="IndexImportOAIPMH_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">OAI-PMH Importer</a></li>
<li><a href="IndexImportWarc_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Warc Importer</a></li>
<li><a href="IndexImportJsonList_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">JsonList Importer</a></li>
</ul>
</div>

@ -73,7 +73,7 @@ public class JsonListImporter extends Thread implements Importer {
private boolean abort;
private final boolean deletewhendone;
public JsonListImporter(final File inputFile, final boolean deletewhendone) throws IOException {
public JsonListImporter(final File inputFile, final boolean gz, final boolean deletewhendone) throws IOException {
super("JsonListImporter - from file " + inputFile.getName());
this.lineCount = 0;
this.consumed = 0;
@ -83,7 +83,7 @@ public class JsonListImporter extends Thread implements Importer {
this.abort = false;
this.deletewhendone = deletewhendone;
this.source = new FileInputStream(inputFile);
if (this.name.endsWith(".gz")) this.source = new GZIPInputStream(this.source);
if (this.name.endsWith(".gz") || gz) this.source = new GZIPInputStream(this.source);
}
@Override
@ -97,6 +97,7 @@ public class JsonListImporter extends Thread implements Importer {
public void processSurrogateJson() throws IOException {
this.startTime = System.currentTimeMillis();
job = this;
// start indexer threads which mostly care about tokenization and facet + synonym enrichment
final int concurrency = Runtime.getRuntime().availableProcessors();

@ -58,7 +58,7 @@ public class IndexImportJsonList_p {
final File sourcefile = new File(filename);
if (sourcefile.exists()) {
try {
final JsonListImporter wi = new JsonListImporter(sourcefile, false);
final JsonListImporter wi = new JsonListImporter(sourcefile, false, false);
wi.start();
prop.put("import_thread", "started");
} catch (final IOException ex) {
@ -72,14 +72,25 @@ public class IndexImportJsonList_p {
}
} else {
final String urlstr = post.get("url");
/*
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
final byte[] b = client.GETbytes(urlstr, null, null, true);
final File tempfile = File.createTempFile("jsonlistimporter", "");
final FileOutputStream fos = new FileOutputStream(tempfile);
fos.write(b);
fos.close();
client.close();
*/
if (urlstr != null && urlstr.length() > 0) {
try {
final URL url = new URL(urlstr);
final File tempfile = File.createTempFile("jsonlistimporter", "");
final String tempfilename = "jsonlistimporter";
final boolean gz = urlstr.endsWith(".gz");
final File tempfile = File.createTempFile(tempfilename, "");
final FileOutputStream fos = new FileOutputStream(tempfile);
fos.getChannel().transferFrom(Channels.newChannel(url.openStream()), 0, Long.MAX_VALUE);
fos.close();
final JsonListImporter wi = new JsonListImporter(tempfile, true);
final JsonListImporter wi = new JsonListImporter(tempfile, gz, true);
wi.start();
prop.put("import_thread", "started");
} catch (final IOException ex) {

@ -2206,7 +2206,7 @@ public final class Switchboard extends serverSwitch {
// see https://github.com/yacy/yacy_grid_parser/blob/master/README.md
this.log.info("processing json surrogate " + infile);
try {
final JsonListImporter importer = new JsonListImporter(infile, false);
final JsonListImporter importer = new JsonListImporter(infile, false, false);
importer.run();
} catch (final IOException e) {
this.log.warn(e);

Loading…
Cancel
Save