You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/interaction/contentcontrol/ContentControlImportThread....

239 lines
5.5 KiB

package net.yacy.interaction.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.data.ymark.YMarkEntry;
import net.yacy.data.ymark.YMarkSMWJSONImporter;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
public class ContentControlImportThread {
private final Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
public ContentControlImportThread(final Switchboard sb) {
//final long time = System.currentTimeMillis();
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
false)) {
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (this.runningjob) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL bmks_json;
//String currenttimestampurl = wikiurlify (this.currenttimestamp);
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
bmks_json = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
bmks_json.openStream(), "UTF-8");
} catch (Exception e) {
Log.logException(e);
this.runningjob = false;
}
if (reader != null) {
YMarkSMWJSONImporter bookmarkImporter = null;
try {
bookmarkImporter = new YMarkSMWJSONImporter(
reader, 200, "");
} catch (final Exception e) {
// TODO: display an error message
Log.logException(e);
this.runningjob = false;
}
Thread t;
YMarkEntry bmk;
t = new Thread(bookmarkImporter,
"YMarks - Network bookmark importer");
t.start();
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
if (bmk == YMarkEntry.EMPTY) {
this.runningjob = false;
} else {
try {
this.sb.tables.bookmarks.addBookmark(
this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
true, true);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
} else {
}
}
else {
}
} catch (MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
} else {
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
URL bmks_count;
bmks_count = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient()
.GETbytes(bmks_count.toString()));
String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
Log.logWarning("CONTENTCONTROL",
"No SMWimport URL defined");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
this.locked = false;
}
}
return;
}
}