|
|
@ -22,15 +22,23 @@
|
|
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
|
|
|
|
import java.util.Date;
|
|
|
|
import java.util.Iterator;
|
|
|
|
import java.util.Iterator;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.date.GenericFormatter;
|
|
|
|
import net.yacy.cora.document.ASCII;
|
|
|
|
import net.yacy.cora.document.ASCII;
|
|
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
|
|
|
|
|
|
import net.yacy.kelondro.data.citation.CitationReference;
|
|
|
|
import net.yacy.kelondro.data.meta.DigestURI;
|
|
|
|
import net.yacy.kelondro.data.meta.DigestURI;
|
|
|
|
|
|
|
|
import net.yacy.kelondro.order.Base64Order;
|
|
|
|
|
|
|
|
import net.yacy.kelondro.rwi.IndexCell;
|
|
|
|
|
|
|
|
import net.yacy.kelondro.rwi.ReferenceContainer;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph;
|
|
|
|
import net.yacy.search.Switchboard;
|
|
|
|
import net.yacy.search.Switchboard;
|
|
|
|
|
|
|
|
import net.yacy.search.index.Segments;
|
|
|
|
|
|
|
|
|
|
|
|
import de.anomic.server.serverObjects;
|
|
|
|
import de.anomic.server.serverObjects;
|
|
|
|
import de.anomic.server.serverSwitch;
|
|
|
|
import de.anomic.server.serverSwitch;
|
|
|
@ -40,40 +48,85 @@ public class webstructure {
|
|
|
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
|
|
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
|
|
|
final serverObjects prop = new serverObjects();
|
|
|
|
final serverObjects prop = new serverObjects();
|
|
|
|
final Switchboard sb = (Switchboard) env;
|
|
|
|
final Switchboard sb = (Switchboard) env;
|
|
|
|
String about = post == null ? null : post.get("about", null);
|
|
|
|
String about = post == null ? null : post.get("about", null); // may be a URL, a URL hash or a domain hash
|
|
|
|
prop.put("out", 0);
|
|
|
|
prop.put("out", 0);
|
|
|
|
prop.put("in", 0);
|
|
|
|
prop.put("in", 0);
|
|
|
|
|
|
|
|
prop.put("citations", 0);
|
|
|
|
|
|
|
|
boolean authenticated = sb.adminAuthenticated(header) >= 2;
|
|
|
|
if (about != null) {
|
|
|
|
if (about != null) {
|
|
|
|
DigestURI url = null;
|
|
|
|
DigestURI url = null;
|
|
|
|
if (about.length() > 6) {
|
|
|
|
byte[] urlhash = null;
|
|
|
|
|
|
|
|
String hosthash = null;
|
|
|
|
|
|
|
|
if (about.length() == 6 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) {
|
|
|
|
|
|
|
|
hosthash = about;
|
|
|
|
|
|
|
|
} else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) {
|
|
|
|
|
|
|
|
urlhash = ASCII.getBytes(about);
|
|
|
|
|
|
|
|
hosthash = about.substring(6);
|
|
|
|
|
|
|
|
url = authenticated ? sb.getURL(Segments.Process.PUBLIC, urlhash) : null;
|
|
|
|
|
|
|
|
} else if (authenticated && about.length() > 0) {
|
|
|
|
|
|
|
|
// consider "about" as url or hostname
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
url = new DigestURI(about);
|
|
|
|
url = new DigestURI(about.indexOf("://") >= 0 ? about : "http://" + about); // accept also domains
|
|
|
|
about = ASCII.String(url.hash(), 6, 6);
|
|
|
|
urlhash = url.hash();
|
|
|
|
|
|
|
|
hosthash = ASCII.String(urlhash, 6, 6);
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
about = null;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (url != null && about != null) {
|
|
|
|
if (hosthash != null) {
|
|
|
|
WebStructureGraph.StructureEntry sentry = sb.webStructure.outgoingReferences(about);
|
|
|
|
prop.put("out", 1);
|
|
|
|
|
|
|
|
prop.put("in", 1);
|
|
|
|
|
|
|
|
WebStructureGraph.StructureEntry sentry = sb.webStructure.outgoingReferences(hosthash);
|
|
|
|
if (sentry != null) {
|
|
|
|
if (sentry != null) {
|
|
|
|
reference(prop, "out", 0, sentry, sb.webStructure);
|
|
|
|
reference(prop, "out", 0, sentry, sb.webStructure);
|
|
|
|
prop.put("out_domains", 1);
|
|
|
|
prop.put("out_domains", 1);
|
|
|
|
prop.put("out", 1);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
prop.put("out_domains", 0);
|
|
|
|
prop.put("out_domains", 0);
|
|
|
|
prop.put("out", 1);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sentry = sb.webStructure.incomingReferences(about);
|
|
|
|
sentry = sb.webStructure.incomingReferences(hosthash);
|
|
|
|
if (sentry != null) {
|
|
|
|
if (sentry != null) {
|
|
|
|
reference(prop, "in", 0, sentry, sb.webStructure);
|
|
|
|
reference(prop, "in", 0, sentry, sb.webStructure);
|
|
|
|
prop.put("in_domains", 1);
|
|
|
|
prop.put("in_domains", 1);
|
|
|
|
prop.put("in", 1);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
prop.put("in_domains", 0);
|
|
|
|
prop.put("in_domains", 0);
|
|
|
|
prop.put("in", 1);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (sb.adminAuthenticated(header) >= 2) {
|
|
|
|
if (urlhash != null) {
|
|
|
|
|
|
|
|
prop.put("citations", 1);
|
|
|
|
|
|
|
|
IndexCell<CitationReference> citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation();
|
|
|
|
|
|
|
|
ReferenceContainer<CitationReference> citations = null;
|
|
|
|
|
|
|
|
// citationReferences.count(urlhash) would give to the number of references good for ranking
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
citations = citationReferences.get(urlhash, null);
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (citations != null) {
|
|
|
|
|
|
|
|
prop.put("citations_anchors", 1);
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_hash", urlhash);
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_citationscount", citations.size());
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote())));
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_urle", url == null ? 0 : 1);
|
|
|
|
|
|
|
|
if (url != null) prop.put("citations_anchors_0_urle_url", url.toNormalform(true, false));
|
|
|
|
|
|
|
|
Iterator<CitationReference> i = citations.entries();
|
|
|
|
|
|
|
|
int d = 0;
|
|
|
|
|
|
|
|
CitationReference cr;
|
|
|
|
|
|
|
|
byte[] refhash;
|
|
|
|
|
|
|
|
DigestURI refurl;
|
|
|
|
|
|
|
|
while (i.hasNext()) {
|
|
|
|
|
|
|
|
cr = i.next();
|
|
|
|
|
|
|
|
refhash = cr.urlhash();
|
|
|
|
|
|
|
|
refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null;
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_citations_" + d + "_refurle", refurl == null ? 0 : 1);
|
|
|
|
|
|
|
|
if (refurl != null) prop.put("citations_anchors_0_citations_" + d + "_refurle_refurl", refurl.toNormalform(true, false));
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_citations_" + d + "_refurle_refhash", refhash);
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_citations_" + d + "_refurle_refdate", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified())));
|
|
|
|
|
|
|
|
d++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
prop.put("citations_anchors_0_citations", d);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
prop.put("citations_anchors", 0);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (authenticated) {
|
|
|
|
// show a complete list of link structure informations in case that the user is authenticated
|
|
|
|
// show a complete list of link structure informations in case that the user is authenticated
|
|
|
|
final boolean latest = ((post == null) ? false : post.containsKey("latest"));
|
|
|
|
final boolean latest = ((post == null) ? false : post.containsKey("latest"));
|
|
|
|
final Iterator<WebStructureGraph.StructureEntry> i = sb.webStructure.structureEntryIterator(latest);
|
|
|
|
final Iterator<WebStructureGraph.StructureEntry> i = sb.webStructure.structureEntryIterator(latest);
|
|
|
|