diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index 776306c41..78d2f4cfd 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -22,15 +22,23 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import java.io.IOException; import java.net.MalformedURLException; +import java.util.Date; import java.util.Iterator; import java.util.Map; +import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.protocol.RequestHeader; +import net.yacy.kelondro.data.citation.CitationReference; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.order.Base64Order; +import net.yacy.kelondro.rwi.IndexCell; +import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.search.Switchboard; +import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -40,40 +48,85 @@ public class webstructure { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; - String about = post == null ? null : post.get("about", null); + String about = post == null ? null : post.get("about", null); // may be a URL, a URL hash or a domain hash prop.put("out", 0); prop.put("in", 0); + prop.put("citations", 0); + boolean authenticated = sb.adminAuthenticated(header) >= 2; if (about != null) { DigestURI url = null; - if (about.length() > 6) { + byte[] urlhash = null; + String hosthash = null; + if (about.length() == 6 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) { + hosthash = about; + } else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) { + urlhash = ASCII.getBytes(about); + hosthash = about.substring(6); + url = authenticated ? sb.getURL(Segments.Process.PUBLIC, urlhash) : null; + } else if (authenticated && about.length() > 0) { + // consider "about" as url or hostname try { - url = new DigestURI(about); - about = ASCII.String(url.hash(), 6, 6); + url = new DigestURI(about.indexOf("://") >= 0 ? about : "http://" + about); // accept also domains + urlhash = url.hash(); + hosthash = ASCII.String(urlhash, 6, 6); } catch (MalformedURLException e) { - about = null; } } - if (url != null && about != null) { - WebStructureGraph.StructureEntry sentry = sb.webStructure.outgoingReferences(about); + if (hosthash != null) { + prop.put("out", 1); + prop.put("in", 1); + WebStructureGraph.StructureEntry sentry = sb.webStructure.outgoingReferences(hosthash); if (sentry != null) { reference(prop, "out", 0, sentry, sb.webStructure); prop.put("out_domains", 1); - prop.put("out", 1); } else { prop.put("out_domains", 0); - prop.put("out", 1); } - sentry = sb.webStructure.incomingReferences(about); + sentry = sb.webStructure.incomingReferences(hosthash); if (sentry != null) { reference(prop, "in", 0, sentry, sb.webStructure); prop.put("in_domains", 1); - prop.put("in", 1); } else { prop.put("in_domains", 0); - prop.put("in", 1); } } - } else if (sb.adminAuthenticated(header) >= 2) { + if (urlhash != null) { + prop.put("citations", 1); + IndexCell citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation(); + ReferenceContainer citations = null; + // citationReferences.count(urlhash) would give to the number of references good for ranking + try { + citations = citationReferences.get(urlhash, null); + } catch (IOException e) { + } + if (citations != null) { + prop.put("citations_anchors", 1); + prop.put("citations_anchors_0_hash", urlhash); + prop.put("citations_anchors_0_citationscount", citations.size()); + prop.put("citations_anchors_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote()))); + prop.put("citations_anchors_0_urle", url == null ? 0 : 1); + if (url != null) prop.put("citations_anchors_0_urle_url", url.toNormalform(true, false)); + Iterator i = citations.entries(); + int d = 0; + CitationReference cr; + byte[] refhash; + DigestURI refurl; + while (i.hasNext()) { + cr = i.next(); + refhash = cr.urlhash(); + refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null; + prop.put("citations_anchors_0_citations_" + d + "_refurle", refurl == null ? 0 : 1); + if (refurl != null) prop.put("citations_anchors_0_citations_" + d + "_refurle_refurl", refurl.toNormalform(true, false)); + prop.put("citations_anchors_0_citations_" + d + "_refurle_refhash", refhash); + prop.put("citations_anchors_0_citations_" + d + "_refurle_refdate", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified()))); + d++; + } + prop.put("citations_anchors_0_citations", d); + } else { + prop.put("citations_anchors", 0); + } + } + } else if (authenticated) { // show a complete list of link structure informations in case that the user is authenticated final boolean latest = ((post == null) ? false : post.containsKey("latest")); final Iterator i = sb.webStructure.structureEntryIterator(latest); diff --git a/htroot/api/webstructure.xml b/htroot/api/webstructure.xml index 578126e1e..a168273ab 100644 --- a/htroot/api/webstructure.xml +++ b/htroot/api/webstructure.xml @@ -22,4 +22,16 @@ #{/domains}# #(/in)# +#(citations)#:: + +#{anchors}# + + #(urle)#::#[url]##(/urle)# +#{citations}# + #(refurle)#::#[refurl]##(/refurle)# +#{/citations}# + +#{/anchors}# + +#(/citations)# \ No newline at end of file