You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
5.5 KiB
239 lines
5.5 KiB
package net.yacy.interaction.contentcontrol;
|
|
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
|
|
import net.yacy.cora.document.UTF8;
|
|
import net.yacy.cora.protocol.http.HTTPClient;
|
|
import net.yacy.data.ymark.YMarkEntry;
|
|
import net.yacy.data.ymark.YMarkSMWJSONImporter;
|
|
import net.yacy.kelondro.logging.Log;
|
|
import net.yacy.search.Switchboard;
|
|
|
|
public class ContentControlImportThread {
|
|
|
|
private final Switchboard sb;
|
|
|
|
private Boolean locked = false;
|
|
|
|
private String lastsync = "1900-01-01T01:00:00";
|
|
|
|
private String currenttimestamp = "1900-01-01T01:00:00";
|
|
|
|
private long offset = 0;
|
|
|
|
private final long limit = 500;
|
|
|
|
private long currentmax = 0;
|
|
|
|
private boolean runningjob = false;
|
|
|
|
public ContentControlImportThread(final Switchboard sb) {
|
|
//final long time = System.currentTimeMillis();
|
|
|
|
this.sb = sb;
|
|
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
|
|
false)) {
|
|
this.sb.tables.clear(this.sb.getConfig(
|
|
"contentcontrol.smwimport.targetlist", "contentcontrol"));
|
|
|
|
}
|
|
}
|
|
|
|
private final String wikiurlify (String s) {
|
|
|
|
String ret = s;
|
|
|
|
ret = ret.replace("-", "-2D");
|
|
ret = ret.replace("+", "-2B");
|
|
ret = ret.replace(" ", "-20");
|
|
|
|
ret = ret.replace("[", "-5B");
|
|
ret = ret.replace("]", "-5D");
|
|
|
|
ret = ret.replace(":", "-3A");
|
|
ret = ret.replace(">", "-3E");
|
|
|
|
ret = ret.replace("?", "-3F");
|
|
|
|
return ret;
|
|
}
|
|
|
|
public final void run() {
|
|
|
|
if (!this.locked) {
|
|
|
|
this.locked = true;
|
|
|
|
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
|
|
|
|
if (this.runningjob) {
|
|
|
|
Log.logInfo("CONTENTCONTROL",
|
|
"CONTENTCONTROL importing max. " + this.limit
|
|
+ " elements at " + this.offset + " of "
|
|
+ this.currentmax + ", since "
|
|
+ this.currenttimestamp);
|
|
|
|
URL bmks_json;
|
|
|
|
//String currenttimestampurl = wikiurlify (this.currenttimestamp);
|
|
|
|
try {
|
|
|
|
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
|
|
"").equals("")) {
|
|
|
|
|
|
|
|
bmks_json = new URL(
|
|
this.sb.getConfig(
|
|
"contentcontrol.smwimport.baseurl",
|
|
"")
|
|
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.currenttimestamp+ "]]")
|
|
|
|
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
|
|
+ "/mainlabel%3D"
|
|
+ "/offset%3D" + this.offset
|
|
+ "/limit%3D" + this.limit
|
|
+ "/format%3Djson");
|
|
|
|
this.offset += this.limit;
|
|
|
|
if (this.offset > this.currentmax) {
|
|
this.runningjob = false;
|
|
}
|
|
|
|
InputStreamReader reader = null;
|
|
try {
|
|
reader = new InputStreamReader(
|
|
bmks_json.openStream(), "UTF-8");
|
|
} catch (Exception e) {
|
|
|
|
Log.logException(e);
|
|
this.runningjob = false;
|
|
}
|
|
|
|
if (reader != null) {
|
|
YMarkSMWJSONImporter bookmarkImporter = null;
|
|
try {
|
|
bookmarkImporter = new YMarkSMWJSONImporter(
|
|
reader, 200, "");
|
|
} catch (final Exception e) {
|
|
// TODO: display an error message
|
|
Log.logException(e);
|
|
this.runningjob = false;
|
|
}
|
|
|
|
Thread t;
|
|
YMarkEntry bmk;
|
|
|
|
t = new Thread(bookmarkImporter,
|
|
"YMarks - Network bookmark importer");
|
|
t.start();
|
|
|
|
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
|
|
|
|
if (bmk == YMarkEntry.EMPTY) {
|
|
|
|
this.runningjob = false;
|
|
|
|
} else {
|
|
|
|
try {
|
|
this.sb.tables.bookmarks.addBookmark(
|
|
this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
|
|
true, true);
|
|
|
|
} catch (Exception e) {
|
|
// TODO Auto-generated catch block
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
}
|
|
}
|
|
|
|
else {
|
|
|
|
}
|
|
|
|
} catch (MalformedURLException e2) {
|
|
// TODO Auto-generated catch block
|
|
e2.printStackTrace();
|
|
}
|
|
|
|
} else {
|
|
|
|
try {
|
|
|
|
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
|
|
"").equals("")) {
|
|
|
|
URL bmks_count;
|
|
|
|
bmks_count = new URL(
|
|
this.sb.getConfig(
|
|
"contentcontrol.smwimport.baseurl",
|
|
"")
|
|
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]")
|
|
|
|
|
|
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
|
|
+ "/mainlabel%3D"
|
|
+ "/format%3Dystat");
|
|
|
|
String reply = UTF8.String(new HTTPClient()
|
|
.GETbytes(bmks_count.toString()));
|
|
|
|
String overallcount = reply.split(",")[0];
|
|
|
|
String lastsyncstring = reply.split(",")[1];
|
|
|
|
this.currentmax = Integer.parseInt(overallcount);
|
|
|
|
if (this.currentmax > 0) {
|
|
|
|
Log.logInfo("CONTENTCONTROL",
|
|
"CONTENTCONTROL import job counts "
|
|
+ this.currentmax
|
|
+ " new elements between "
|
|
+ this.lastsync + " and "
|
|
+ this.currenttimestamp);
|
|
|
|
this.currenttimestamp = this.lastsync;
|
|
|
|
this.runningjob = true;
|
|
this.lastsync = lastsyncstring;
|
|
this.offset = 0;
|
|
}
|
|
} else {
|
|
Log.logWarning("CONTENTCONTROL",
|
|
"No SMWimport URL defined");
|
|
}
|
|
|
|
} catch (MalformedURLException e) {
|
|
// TODO Auto-generated catch block
|
|
e.printStackTrace();
|
|
} catch (IOException e) {
|
|
// TODO Auto-generated catch block
|
|
e.printStackTrace();
|
|
}
|
|
|
|
}
|
|
|
|
this.locked = false;
|
|
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
}
|