You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/interaction/contentcontrol/ContentControlImportThread....

254 lines
5.7 KiB

package net.yacy.interaction.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkSMWJSONImporter;
import de.anomic.data.ymark.YMarkUtil;
public class ContentControlImportThread {
private Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
public ContentControlImportThread(final Switchboard sb) {
final long time = System.currentTimeMillis();
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
false)) {
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
@SuppressWarnings("deprecation")
public final void run() {
if (!locked) {
locked = true;
if (sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (runningjob) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL importing max. " + limit
+ " elements at " + offset + " of "
+ currentmax + ", since "
+ currenttimestamp);
URL bmks_json;
String currenttimestampurl = wikiurlify (currenttimestamp);
try {
if (!sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
bmks_json = new URL(
sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +currenttimestamp+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/offset%3D" + offset
+ "/limit%3D" + limit
+ "/format%3Djson");
offset += limit;
if (offset > currentmax) {
runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
bmks_json.openStream(), "UTF-8");
} catch (Exception e) {
Log.logException(e);
runningjob = false;
}
if (reader != null) {
YMarkSMWJSONImporter bookmarkImporter = null;
try {
bookmarkImporter = new YMarkSMWJSONImporter(
reader, 200, "");
} catch (final Exception e) {
// TODO: display an error message
Log.logException(e);
runningjob = false;
}
Thread t;
YMarkEntry bmk;
t = new Thread(bookmarkImporter,
"YMarks - Network bookmark importer");
t.start();
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
if (bmk == YMarkEntry.EMPTY) {
runningjob = false;
} else {
try {
sb.tables.bookmarks.addBookmark(
sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
true, true);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
} else {
}
}
else {
}
} catch (MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
} else {
try {
if (!sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
URL bmks_count;
bmks_count = new URL(
sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +lastsync+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/format%3Dsupercount");
String reply = UTF8.String(new HTTPClient()
.GETbytes(bmks_count.toString()));
String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1];
currentmax = Integer.parseInt(overallcount);
if (currentmax > 0) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL import job counts "
+ currentmax
+ " new elements between "
+ lastsync + " and "
+ currenttimestamp);
currenttimestamp = lastsync;
runningjob = true;
lastsync = lastsyncstring;
offset = 0;
}
} else {
Log.logWarning("CONTENTCONTROL",
"No SMWimport URL defined");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
locked = false;
}
}
return;
}
}