|
|
|
@ -18,6 +18,8 @@
|
|
|
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package net.yacy.htroot.api;
|
|
|
|
|
|
|
|
|
|
import java.awt.Container;
|
|
|
|
|
import java.awt.Image;
|
|
|
|
|
import java.awt.MediaTracker;
|
|
|
|
@ -89,30 +91,30 @@ public class snapshot {
|
|
|
|
|
defaultResponse.authenticationRequired();
|
|
|
|
|
return defaultResponse;
|
|
|
|
|
}
|
|
|
|
|
int maxcount = post == null ? 10 : post.getInt("maxcount", 10);
|
|
|
|
|
int depthx = post == null ? -1 : post.getInt("depth", -1);
|
|
|
|
|
Integer depth = depthx == -1 ? null : depthx;
|
|
|
|
|
String orderx = post == null ? "ANY" : post.get("order", "ANY");
|
|
|
|
|
Snapshots.Order order = Snapshots.Order.valueOf(orderx);
|
|
|
|
|
String statex = post == null ? Transactions.State.INVENTORY.name() : post.get("state", Transactions.State.INVENTORY.name());
|
|
|
|
|
Transactions.State state = Transactions.State.valueOf(statex);
|
|
|
|
|
String host = post == null ? null : post.get("host");
|
|
|
|
|
Map<String, Revisions> iddate = Transactions.select(host, depth, order, maxcount, state);
|
|
|
|
|
final int maxcount = post == null ? 10 : post.getInt("maxcount", 10);
|
|
|
|
|
final int depthx = post == null ? -1 : post.getInt("depth", -1);
|
|
|
|
|
final Integer depth = depthx == -1 ? null : depthx;
|
|
|
|
|
final String orderx = post == null ? "ANY" : post.get("order", "ANY");
|
|
|
|
|
final Snapshots.Order order = Snapshots.Order.valueOf(orderx);
|
|
|
|
|
final String statex = post == null ? Transactions.State.INVENTORY.name() : post.get("state", Transactions.State.INVENTORY.name());
|
|
|
|
|
final Transactions.State state = Transactions.State.valueOf(statex);
|
|
|
|
|
final String host = post == null ? null : post.get("host");
|
|
|
|
|
final Map<String, Revisions> iddate = Transactions.select(host, depth, order, maxcount, state);
|
|
|
|
|
// now select the URL from the index for these ids in iddate and make an RSS feed
|
|
|
|
|
RSSFeed rssfeed = new RSSFeed(Integer.MAX_VALUE);
|
|
|
|
|
final RSSFeed rssfeed = new RSSFeed(Integer.MAX_VALUE);
|
|
|
|
|
rssfeed.setChannel(new RSSMessage("Snapshot list for host = " + host + ", depth = " + depth + ", order = " + order + ", maxcount = " + maxcount, "", ""));
|
|
|
|
|
for (Map.Entry<String, Revisions> e: iddate.entrySet()) {
|
|
|
|
|
for (final Map.Entry<String, Revisions> e: iddate.entrySet()) {
|
|
|
|
|
try {
|
|
|
|
|
String u = e.getValue().url == null ? sb.index.fulltext().getURL(e.getKey()) : e.getValue().url;
|
|
|
|
|
final String u = e.getValue().url == null ? sb.index.fulltext().getURL(e.getKey()) : e.getValue().url;
|
|
|
|
|
if (u == null) continue;
|
|
|
|
|
RSSMessage message = new RSSMessage(u, "", new DigestURL(u), e.getKey());
|
|
|
|
|
final RSSMessage message = new RSSMessage(u, "", new DigestURL(u), e.getKey());
|
|
|
|
|
message.setPubDate(e.getValue().dates[0]);
|
|
|
|
|
rssfeed.addMessage(message);
|
|
|
|
|
} catch (IOException ee) {
|
|
|
|
|
} catch (final IOException ee) {
|
|
|
|
|
ConcurrentLog.logException(ee);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
byte[] rssBinary = UTF8.getBytes(rssfeed.toString());
|
|
|
|
|
final byte[] rssBinary = UTF8.getBytes(rssfeed.toString());
|
|
|
|
|
return new ByteArrayInputStream(rssBinary);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -126,34 +128,34 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
final boolean pngjpg = ext.equals("png") || ext.equals(DEFAULT_EXT);
|
|
|
|
|
String urlhash = post.get("urlhash", "");
|
|
|
|
|
String url = post.get("url", "");
|
|
|
|
|
final String url = post.get("url", "");
|
|
|
|
|
DigestURL durl = null;
|
|
|
|
|
if (urlhash.length() == 0 && url.length() > 0) {
|
|
|
|
|
try {
|
|
|
|
|
durl = new DigestURL(url);
|
|
|
|
|
urlhash = ASCII.String(durl.hash());
|
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (durl == null && urlhash.length() > 0) {
|
|
|
|
|
try {
|
|
|
|
|
String u = sb.index.fulltext().getURL(urlhash);
|
|
|
|
|
final String u = sb.index.fulltext().getURL(urlhash);
|
|
|
|
|
durl = u == null ? null : new DigestURL(u);
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
} catch (final IOException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ext.equals("json")) {
|
|
|
|
|
// command interface: view and change a transaction state, get metadata about transactions in the past
|
|
|
|
|
String command = post.get("command", "metadata");
|
|
|
|
|
String statename = post.get("state");
|
|
|
|
|
JSONObject result = new JSONObject();
|
|
|
|
|
final String command = post.get("command", "metadata");
|
|
|
|
|
final String statename = post.get("state");
|
|
|
|
|
final JSONObject result = new JSONObject();
|
|
|
|
|
try {
|
|
|
|
|
if (command.equals("status")) {
|
|
|
|
|
// return a status of the transaction archive
|
|
|
|
|
JSONObject sizes = new JSONObject();
|
|
|
|
|
for (Map.Entry<String, Integer> state: Transactions.sizes().entrySet()) sizes.put(state.getKey(), state.getValue());
|
|
|
|
|
final JSONObject sizes = new JSONObject();
|
|
|
|
|
for (final Map.Entry<String, Integer> state: Transactions.sizes().entrySet()) sizes.put(state.getKey(), state.getValue());
|
|
|
|
|
result.put("size", sizes);
|
|
|
|
|
} else if (command.equals("list")) {
|
|
|
|
|
if (!authenticated) {
|
|
|
|
@ -161,36 +163,36 @@ public class snapshot {
|
|
|
|
|
return defaultResponse;
|
|
|
|
|
}
|
|
|
|
|
// return a status of the transaction archive
|
|
|
|
|
String host = post.get("host");
|
|
|
|
|
String depth = post.get("depth");
|
|
|
|
|
int depthi = depth == null ? -1 : Integer.parseInt(depth);
|
|
|
|
|
for (Transactions.State state: statename == null ?
|
|
|
|
|
final String host = post.get("host");
|
|
|
|
|
final String depth = post.get("depth");
|
|
|
|
|
final int depthi = depth == null ? -1 : Integer.parseInt(depth);
|
|
|
|
|
for (final Transactions.State state: statename == null ?
|
|
|
|
|
new Transactions.State[]{Transactions.State.INVENTORY, Transactions.State.ARCHIVE} :
|
|
|
|
|
new Transactions.State[]{Transactions.State.valueOf(statename)}) {
|
|
|
|
|
if (host == null) {
|
|
|
|
|
JSONObject hostCountInventory = new JSONObject();
|
|
|
|
|
for (String h: Transactions.listHosts(state)) {
|
|
|
|
|
int size = Transactions.listIDsSize(h, depthi, state);
|
|
|
|
|
final JSONObject hostCountInventory = new JSONObject();
|
|
|
|
|
for (final String h: Transactions.listHosts(state)) {
|
|
|
|
|
final int size = Transactions.listIDsSize(h, depthi, state);
|
|
|
|
|
if (size > 0) hostCountInventory.put(h, size);
|
|
|
|
|
}
|
|
|
|
|
result.put("count." + state.name(), hostCountInventory);
|
|
|
|
|
} else {
|
|
|
|
|
TreeMap<Integer, Collection<Revisions>> ids = Transactions.listIDs(host, depthi, state);
|
|
|
|
|
final TreeMap<Integer, Collection<Revisions>> ids = Transactions.listIDs(host, depthi, state);
|
|
|
|
|
if (ids == null) {
|
|
|
|
|
result.put("result", "fail");
|
|
|
|
|
result.put("comment", "no entries for host " + host + " found");
|
|
|
|
|
} else {
|
|
|
|
|
for (Map.Entry<Integer, Collection<Revisions>> entry: ids.entrySet()) {
|
|
|
|
|
for (Revisions r: entry.getValue()) {
|
|
|
|
|
for (final Map.Entry<Integer, Collection<Revisions>> entry: ids.entrySet()) {
|
|
|
|
|
for (final Revisions r: entry.getValue()) {
|
|
|
|
|
try {
|
|
|
|
|
JSONObject metadata = new JSONObject();
|
|
|
|
|
String u = r.url != null ? r.url : sb.index.fulltext().getURL(r.urlhash);
|
|
|
|
|
final JSONObject metadata = new JSONObject();
|
|
|
|
|
final String u = r.url != null ? r.url : sb.index.fulltext().getURL(r.urlhash);
|
|
|
|
|
metadata.put("url", u == null ? "unknown" : u);
|
|
|
|
|
metadata.put("dates", r.dates);
|
|
|
|
|
assert r.depth == entry.getKey().intValue();
|
|
|
|
|
metadata.put("depth", entry.getKey().intValue());
|
|
|
|
|
result.put(r.urlhash, metadata);
|
|
|
|
|
} catch (IOException e) {}
|
|
|
|
|
} catch (final IOException e) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -201,7 +203,7 @@ public class snapshot {
|
|
|
|
|
defaultResponse.authenticationRequired();
|
|
|
|
|
return defaultResponse;
|
|
|
|
|
}
|
|
|
|
|
Revisions r = Transactions.commit(urlhash);
|
|
|
|
|
final Revisions r = Transactions.commit(urlhash);
|
|
|
|
|
if (r != null) {
|
|
|
|
|
result.put("result", "success");
|
|
|
|
|
result.put("depth", r.depth);
|
|
|
|
@ -216,7 +218,7 @@ public class snapshot {
|
|
|
|
|
defaultResponse.authenticationRequired();
|
|
|
|
|
return defaultResponse;
|
|
|
|
|
}
|
|
|
|
|
Revisions r = Transactions.rollback(urlhash);
|
|
|
|
|
final Revisions r = Transactions.rollback(urlhash);
|
|
|
|
|
if (r != null) {
|
|
|
|
|
result.put("result", "success");
|
|
|
|
|
result.put("depth", r.depth);
|
|
|
|
@ -239,8 +241,8 @@ public class snapshot {
|
|
|
|
|
r = Transactions.getRevisions(state, urlhash);
|
|
|
|
|
}
|
|
|
|
|
if (r != null) {
|
|
|
|
|
JSONObject metadata = new JSONObject();
|
|
|
|
|
String u = r.url != null ? r.url : sb.index.fulltext().getURL(r.urlhash);
|
|
|
|
|
final JSONObject metadata = new JSONObject();
|
|
|
|
|
final String u = r.url != null ? r.url : sb.index.fulltext().getURL(r.urlhash);
|
|
|
|
|
metadata.put("url", u == null ? "unknown" : u);
|
|
|
|
|
metadata.put("dates", r.dates);
|
|
|
|
|
metadata.put("depth", r.depth);
|
|
|
|
@ -249,7 +251,7 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException |IllegalArgumentException e) {}
|
|
|
|
|
}
|
|
|
|
|
} catch (JSONException e) {
|
|
|
|
|
} catch (final JSONException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
|
}
|
|
|
|
|
String json = result.toString();
|
|
|
|
@ -263,14 +265,14 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xml) {
|
|
|
|
|
Collection<File> xmlSnapshots = Transactions.findPaths(durl, "xml", Transactions.State.ANY);
|
|
|
|
|
final Collection<File> xmlSnapshots = Transactions.findPaths(durl, "xml", Transactions.State.ANY);
|
|
|
|
|
File xmlFile = null;
|
|
|
|
|
if (xmlSnapshots.isEmpty()) {
|
|
|
|
|
throw new TemplateProcessingException("Could not find the xml snapshot file.", HttpStatus.SC_NOT_FOUND);
|
|
|
|
|
}
|
|
|
|
|
xmlFile = xmlSnapshots.iterator().next();
|
|
|
|
|
try {
|
|
|
|
|
byte[] xmlBinary = FileUtils.read(xmlFile);
|
|
|
|
|
final byte[] xmlBinary = FileUtils.read(xmlFile);
|
|
|
|
|
return new ByteArrayInputStream(xmlBinary);
|
|
|
|
|
} catch (final IOException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
@ -288,12 +290,12 @@ public class snapshot {
|
|
|
|
|
"Could not find the pdf snapshot file. You must be authenticated to generate one on the fly.",
|
|
|
|
|
HttpStatus.SC_NOT_FOUND);
|
|
|
|
|
}
|
|
|
|
|
SolrDocument sd = sb.index.fulltext().getMetadata(durl.hash());
|
|
|
|
|
final SolrDocument sd = sb.index.fulltext().getMetadata(durl.hash());
|
|
|
|
|
boolean success = false;
|
|
|
|
|
if (sd == null) {
|
|
|
|
|
success = Transactions.store(durl, new Date(), 99, false, true, sb.getConfigBool(SwitchboardConstants.PROXY_TRANSPARENT_PROXY, false) ? "http://127.0.0.1:" + sb.getConfigInt(SwitchboardConstants.SERVER_PORT, 8090) : null, sb.getConfig("crawler.http.acceptLanguage", null));
|
|
|
|
|
} else {
|
|
|
|
|
SolrInputDocument sid = sb.index.fulltext().getDefaultConfiguration().toSolrInputDocument(sd);
|
|
|
|
|
final SolrInputDocument sid = sb.index.fulltext().getDefaultConfiguration().toSolrInputDocument(sd);
|
|
|
|
|
success = Transactions.store(sid, false, true, true, sb.getConfigBool(SwitchboardConstants.PROXY_TRANSPARENT_PROXY, false) ? "http://127.0.0.1:" + sb.getConfigInt(SwitchboardConstants.SERVER_PORT, 8090) : null, sb.getConfig("crawler.http.acceptLanguage", null));
|
|
|
|
|
}
|
|
|
|
|
if (success) {
|
|
|
|
@ -312,7 +314,7 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
if (pdf) {
|
|
|
|
|
try {
|
|
|
|
|
byte[] pdfBinary = FileUtils.read(pdfFile);
|
|
|
|
|
final byte[] pdfBinary = FileUtils.read(pdfFile);
|
|
|
|
|
return new ByteArrayInputStream(pdfBinary);
|
|
|
|
|
} catch (final IOException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
@ -321,10 +323,10 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pngjpg) {
|
|
|
|
|
int width = Math.min(post.getInt("width", DEFAULT_WIDTH), DEFAULT_WIDTH);
|
|
|
|
|
int height = Math.min(post.getInt("height", DEFAULT_HEIGHT), DEFAULT_HEIGHT);
|
|
|
|
|
final int width = Math.min(post.getInt("width", DEFAULT_WIDTH), DEFAULT_WIDTH);
|
|
|
|
|
final int height = Math.min(post.getInt("height", DEFAULT_HEIGHT), DEFAULT_HEIGHT);
|
|
|
|
|
String imageFileStub = pdfFile.getAbsolutePath(); imageFileStub = imageFileStub.substring(0, imageFileStub.length() - 3); // cut off extension
|
|
|
|
|
File imageFile = new File(imageFileStub + DEFAULT_WIDTH + "." + DEFAULT_HEIGHT + "." + ext);
|
|
|
|
|
final File imageFile = new File(imageFileStub + DEFAULT_WIDTH + "." + DEFAULT_HEIGHT + "." + ext);
|
|
|
|
|
if (!imageFile.exists() && authenticated) {
|
|
|
|
|
if(!Html2Image.pdf2image(pdfFile, imageFile, DEFAULT_WIDTH, DEFAULT_HEIGHT, DEFAULT_DENSITY, DEFAULT_QUALITY)) {
|
|
|
|
|
throw new TemplateProcessingException(
|
|
|
|
@ -339,7 +341,7 @@ public class snapshot {
|
|
|
|
|
}
|
|
|
|
|
if (width == DEFAULT_WIDTH && height == DEFAULT_HEIGHT) {
|
|
|
|
|
try {
|
|
|
|
|
byte[] imageBinary = FileUtils.read(imageFile);
|
|
|
|
|
final byte[] imageBinary = FileUtils.read(imageFile);
|
|
|
|
|
return new ByteArrayInputStream(imageBinary);
|
|
|
|
|
} catch (final IOException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
@ -362,7 +364,7 @@ public class snapshot {
|
|
|
|
|
* Ensure there is no alpha component on the ouput image, as it is pointless
|
|
|
|
|
* here and it is not well supported by the JPEGImageWriter from OpenJDK
|
|
|
|
|
*/
|
|
|
|
|
BufferedImage scaledBufferedImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
|
|
|
|
|
final BufferedImage scaledBufferedImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
|
|
|
|
|
scaledBufferedImg.createGraphics().drawImage(scaled, 0, 0, width, height, null);
|
|
|
|
|
return new EncodedImage(scaledBufferedImg, ext, true);
|
|
|
|
|
} catch (final IOException e) {
|