You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/contentcontrol/SMWListSyncThread.java

202 lines
5.7 KiB

package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
public class SMWListSyncThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
private String targetList;
private String parameters;
private String query;
public static Boolean dirty = false;
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) {
this.sb = sb;
this.targetList = targetList;
this.parameters = parameters;
this.query = query;
if (purgeOnInit) {
this.sb.tables.clear(targetList);
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
@Override
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (!this.runningjob) {
// we have to count all new elements first
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
URL urlCount;
urlCount = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/offset%3D0"
+ "/limit%3D200000"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false));
String overallcount = CommonPattern.COMMA.split(reply)[0];
String lastsyncstring = CommonPattern.COMMA.split(reply)[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
ConcurrentLog.info("SMWLISTSYNC",
"import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
ConcurrentLog.warn("SMWLISTSYNC",
"No SMWimport URL defined");
}
} catch (final MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// there are new elements to be imported
ConcurrentLog.info("SMWLISTSYNC",
"importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL urlImport;
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
urlImport = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/syntax%3Dobsolete"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
urlImport.openStream(), StandardCharsets.UTF_8);
} catch (final Exception e) {
ConcurrentLog.logException(e);
this.runningjob = false;
}
if (reader != null) {
SMWListImporterFormatObsolete smwListImporter = null;
try {
smwListImporter = new SMWListImporterFormatObsolete(
reader, 200);
} catch (final Exception e) {
// TODO: display an error message
ConcurrentLog.logException(e);
this.runningjob = false;
}
Thread t;
SMWListRow row;
t = new Thread(smwListImporter,"SMW List Importer");
t.start();
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
if (row == SMWListRow.EMPTY) {
this.runningjob = false;
} else {
try {
this.sb.tables.insert(targetList, row.getData());
dirty = true;
} catch (final Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
} catch (final MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
}
this.locked = false;
}
}
return;
}
}