diff --git a/htroot/HostBrowser.html b/htroot/HostBrowser.html new file mode 100644 index 000000000..036867997 --- /dev/null +++ b/htroot/HostBrowser.html @@ -0,0 +1,118 @@ + + + + + YaCy '#[clientname]#': URL References Administration + #%env/templates/metas.template%# + + + + #%env/templates/header.template%# + #%env/templates/submenuIndexControl.template%# +

URL References Administration

+

The local index currently contains #[ucount]# URL references

+
+
URL Retrieval +
+ +
Retrieve by URL:
+
+
+
+
+ +
+
+
+ +
+ + #(statisticslines)#:: +

Statistics about the top-#[domains]# domains in the database:

+ + + + + + #{domains}# + + + + + #{/domains}# +
DomainURLs
#[domain]##[count]#
+ #(/statisticslines)# + + + #(genUrlProfile)# + ::No entry found for URL-hash #[urlhash]# + ::
+
+ + API + These document details can be retrieved as XHTML+RDFa + document containg RDF annotations in Dublin Core vocabulary. + The XHTML+RDFa data format is both a XML content format and a HTML display format and is considered as an important Semantic Web content format. + The same content can also be retrieved as pure XML metadata with DC tag name vocabulary. + Click the API icon to see an example call to the search rss API. + To see a list of all APIs, please visit the API wiki page. +
+ #(/genUrlProfile)# + + #[result]# + + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java new file mode 100644 index 000000000..5072cf25b --- /dev/null +++ b/htroot/HostBrowser.java @@ -0,0 +1,107 @@ + +import java.net.MalformedURLException; + +import net.yacy.cora.document.ASCII; +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.data.meta.URIMetadata; +import net.yacy.kelondro.data.word.Word; +import net.yacy.search.Switchboard; +import net.yacy.search.index.Segment; +import net.yacy.server.serverObjects; +import net.yacy.server.serverSwitch; + +public class HostBrowser { + + public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { + // return variable that accumulates replacements + final Switchboard sb = (Switchboard) env; + + final serverObjects prop = new serverObjects(); + + Segment segment = sb.index; + + // set default values + prop.put("urlstring", ""); + prop.put("urlhash", ""); + prop.put("result", ""); + prop.putNum("ucount", segment.fulltext().size()); + prop.put("otherHosts", ""); + prop.put("genUrlProfile", 0); + prop.put("statistics", 1); + prop.put("statistics_lines", 100); + prop.put("statisticslines", 0); + + if (post == null || env == null) { + return prop; // nothing to do + } + + // post values that are set on numerous input fields with same name + String urlstring = post.get("urlstring", "").trim(); + + if (!urlstring.startsWith("http://") && + !urlstring.startsWith("https://") && + !urlstring.startsWith("ftp://") && + !urlstring.startsWith("smb://") && + !urlstring.startsWith("file://")) { urlstring = "http://" + urlstring; } + + prop.putHTML("urlstring", urlstring); + prop.put("result", " "); + + if (post.containsKey("urlstringsearch")) { + try { + final DigestURI url = new DigestURI(urlstring); + String urlhash = ASCII.String(url.hash()); + prop.put("urlhash", urlhash); + final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash)); + if (entry == null) { + prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true)); + prop.putHTML("urlstring", urlstring); + prop.put("urlhash", ""); + } else { + prop.putAll(genUrlProfile(segment, entry, urlhash)); + prop.put("statistics", 0); + } + } catch (final MalformedURLException e) { + prop.putHTML("result", "bad url: " + urlstring); + prop.put("urlhash", ""); + } + prop.put("lurlexport", 0); + } + + // insert constants + prop.putNum("ucount", segment.fulltext().size()); + // return rewrite properties + return prop; + } + + private static serverObjects genUrlProfile(final Segment segment, final URIMetadata entry, final String urlhash) { + final serverObjects prop = new serverObjects(); + if (entry == null) { + prop.put("genUrlProfile", "1"); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } + final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.fulltext().getMetadata(entry.referrerHash()); + if (entry.url() == null) { + prop.put("genUrlProfile", "1"); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } + prop.put("genUrlProfile", "2"); + prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(false, true)); + prop.put("genUrlProfile_urlhash", urlhash); + prop.put("genUrlProfile_urlDescr", entry.dc_title()); + prop.put("genUrlProfile_moddate", entry.moddate().toString()); + prop.put("genUrlProfile_loaddate", entry.loaddate().toString()); + prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1); + prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true)); + prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash())); + prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype())); + prop.put("genUrlProfile_language", entry.language()); + prop.put("genUrlProfile_size", entry.size()); + prop.put("genUrlProfile_wordCount", entry.wordCount()); + return prop; + } + +}