From bd886054cb0ee3e4f562e88ec2127d432d185359 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Wed, 9 Apr 2014 12:45:04 +0200
Subject: [PATCH] new structure and enhancements for link graph computation: -
 added order option to solr queries to be able to retrieve document lists in
 specific order, here: link length - added HyperlinkEdge class which manages
 the link structure - integrated the HyperlinkEdge class into clickdepth
 computation - extended the linkstructure.json servlet to show also the
 clickdepth and other statistic information

---
 htroot/HostBrowser.java                       |   2 +-
 htroot/IndexDeletion_p.java                   |   2 +-
 htroot/api/citation.java                      |   2 +-
 htroot/api/linkstructure.java                 | 119 ++---------
 htroot/api/linkstructure.json                 |  12 +-
 htroot/js/hypertree.js                        |   4 +-
 .../opensearch/OpenSearchConnector.java       |   4 +-
 .../federate/solr/SchemaConfiguration.java    |   2 +-
 .../solr/connector/AbstractSolrConnector.java |  42 +++-
 .../solr/connector/CachedSolrConnector.java   |   6 +-
 .../ConcurrentUpdateSolrConnector.java        |  12 +-
 .../solr/connector/EmbeddedSolrConnector.java |  19 +-
 .../solr/connector/MirrorSolrConnector.java   |  20 +-
 .../solr/connector/SolrConnector.java         |  12 +-
 source/net/yacy/search/index/ErrorCache.java  |   2 +-
 source/net/yacy/search/index/Fulltext.java    |   4 +-
 .../search/index/ReindexSolrBusyThread.java   |   2 +-
 source/net/yacy/search/index/Segment.java     |  39 ++--
 .../schema/CollectionConfiguration.java       |   8 +-
 .../net/yacy/search/schema/HyperlinkEdge.java |  12 ++
 .../yacy/search/schema/HyperlinkGraph.java    | 197 ++++++++++++++++++
 21 files changed, 346 insertions(+), 176 deletions(-)
 create mode 100644 source/net/yacy/search/schema/HyperlinkGraph.java
diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 114fa3824..dbcc56bba 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -288,7 +288,7 @@ public class HostBrowser {
                         q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM);
                     }
                 }
-                BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, 100000, TIMEOUT, 100, 1,
+                BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000, TIMEOUT, 100, 1,
                         CollectionSchema.id.getSolrFieldName(),
                         CollectionSchema.sku.getSolrFieldName(),
                         CollectionSchema.failreason_s.getSolrFieldName(),
diff --git a/htroot/IndexDeletion_p.java b/htroot/IndexDeletion_p.java
index e569e8116..224ae41fd 100644
--- a/htroot/IndexDeletion_p.java
+++ b/htroot/IndexDeletion_p.java
@@ -130,7 +130,7 @@ public class IndexDeletion_p {
                     }
                     try {
                         DigestURL u = new DigestURL(urlStub);
-                        BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + u.getHost() + "\"", 0, 100000000, Long.MAX_VALUE, 100, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
+                        BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + u.getHost() + "\"", null, 0, 100000000, Long.MAX_VALUE, 100, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
                         SolrDocument doc;
                         try {
                             while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
diff --git a/htroot/api/citation.java b/htroot/api/citation.java
index 2d76f3fc9..079efab2d 100644
--- a/htroot/api/citation.java
+++ b/htroot/api/citation.java
@@ -127,7 +127,7 @@ public class citation {
             }
             try {
                 sentence = sentence.replace('"', '\'');
-                SolrDocumentList doclist = connector.getDocumentListByQuery("text_t:\"" + sentence + "\"", 0, 100, CollectionSchema.sku.getSolrFieldName());
+                SolrDocumentList doclist = connector.getDocumentListByQuery("text_t:\"" + sentence + "\"", CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100, CollectionSchema.sku.getSolrFieldName());
                 int count = (int) doclist.getNumFound();
                 if (count > 0) {
                     Set<DigestURL> list = new TreeSet<DigestURL>();
diff --git a/htroot/api/linkstructure.java b/htroot/api/linkstructure.java
index e988c18f3..4db2f96fd 100644
--- a/htroot/api/linkstructure.java
+++ b/htroot/api/linkstructure.java
@@ -17,29 +17,18 @@
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-
 import java.net.MalformedURLException;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.solr.common.SolrDocument;
 
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
-import net.yacy.cora.federate.solr.FailType;
-import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.protocol.ResponseHeader;
-import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.search.Switchboard;
 import net.yacy.search.index.Fulltext;
-import net.yacy.search.schema.CollectionSchema;
 import net.yacy.search.schema.HyperlinkEdge;
+import net.yacy.search.schema.HyperlinkGraph;
 import net.yacy.server.serverObjects;
 import net.yacy.server.serverSwitch;
 import net.yacy.server.servletProperties;
@@ -59,8 +48,8 @@ public class linkstructure {
         String about = post.get("about", null); // may be a URL, a URL hash or a domain hash
         if (about == null) return prop;
         boolean authenticated = sb.adminAuthenticated(header) >= 2;
-        int maxtime = Math.min(post.getInt("maxtime", 1000), authenticated ? 60000 : 1000);
-        int maxnodes = Math.min(post.getInt("maxnodes", 100), authenticated ? 1000 : 100);
+        int maxtime = Math.min(post.getInt("maxtime", 1000), authenticated ? 300000 : 1000);
+        int maxnodes = Math.min(post.getInt("maxnodes", 100), authenticated ? 10000000 : 100);
 
         DigestURL url = null;
         String hostname = null;
@@ -72,104 +61,32 @@ public class linkstructure {
             try {
                 url = new DigestURL(about.indexOf("://") >= 0 ? about : "http://" + about); // accept also domains
                 hostname = url.getHost();
-                if (hostname.startsWith("www.")) hostname = hostname.substring(4);
             } catch (final MalformedURLException e) {
             }
         }
         if (hostname == null) return prop;
         
         // now collect _all_ documents inside the domain until a timeout appears
-        StringBuilder q = new StringBuilder();
-        q.append(CollectionSchema.host_s.getSolrFieldName()).append(':').append(hostname).append(" OR ").append(CollectionSchema.host_s.getSolrFieldName()).append(':').append("www.").append(hostname);
-        BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, maxnodes, maxtime, 100, 1,
-                CollectionSchema.id.getSolrFieldName(),
-                CollectionSchema.sku.getSolrFieldName(),
-                CollectionSchema.failreason_s.getSolrFieldName(),
-                CollectionSchema.failtype_s.getSolrFieldName(),
-                CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName(),
-                CollectionSchema.inboundlinks_urlstub_sxt.getSolrFieldName(),
-                CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(),
-                CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName()
-                );
-        SolrDocument doc;
-        Map<String, FailType> errorDocs = new HashMap<String, FailType>();
-        Map<String, HyperlinkEdge> inboundEdges = new HashMap<String, HyperlinkEdge>();
-        Map<String, HyperlinkEdge> outboundEdges = new HashMap<String, HyperlinkEdge>();
-        Map<String, HyperlinkEdge> errorEdges = new HashMap<String, HyperlinkEdge>();
-        try {
-            while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
-                String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
-                String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
-                DigestURL from = new DigestURL(u, ASCII.getBytes(ids));
-                String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
-                FailType error = errortype == null ? null : FailType.valueOf(errortype);
-                if (error != null) {
-                    errorDocs.put(u, error);
-                } else {
-                    Iterator<String> links = URIMetadataNode.getLinks(doc, true); // inbound
-                    String link;
-                    while (links.hasNext()) {
-                        link = links.next();
-                        try {
-                            DigestURL linkurl = new DigestURL(link, null);
-                            String edgehash = ids + ASCII.String(linkurl.hash());
-                            inboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Inbound));
-                        } catch (MalformedURLException e) {}
-                    }
-                    links = URIMetadataNode.getLinks(doc, false); // outbound
-                    while (links.hasNext()) {
-                        link = links.next();
-                        try {
-                            DigestURL linkurl = new DigestURL(link, null);
-                            String edgehash = ids + ASCII.String(linkurl.hash());
-                            outboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Outbound));
-                        } catch (MalformedURLException e) {}
-                    }
-                }
-                if (inboundEdges.size() + outboundEdges.size() > maxnodes) break;
-            }
-        } catch (InterruptedException e) {
-        } catch (MalformedURLException e) {
-        }
-        // we use the errorDocs to mark all edges with endpoint to error documents
-        Iterator<Map.Entry<String, HyperlinkEdge>> i = inboundEdges.entrySet().iterator();
-        Map.Entry<String, HyperlinkEdge> edge;
-        while (i.hasNext()) {
-            edge = i.next();
-            if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) {
-                i.remove();
-                edge.getValue().type = HyperlinkEdge.Type.Dead;
-                errorEdges.put(edge.getKey(), edge.getValue());
-            }
-        }
-        i = outboundEdges.entrySet().iterator();
-        while (i.hasNext()) {
-            edge = i.next();
-            if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) {
-                i.remove();
-                edge.getValue().type = HyperlinkEdge.Type.Dead;
-                errorEdges.put(edge.getKey(), edge.getValue());
-            }
-        }
-        // we put all edges together in a specific order which is used to create nodes in a svg display:
-        // notes that appear first are possible painted over by nodes coming later.
-        // less important nodes shall appear therefore first
-        Map<String, HyperlinkEdge> edges = new LinkedHashMap<String, HyperlinkEdge>();
-        edges.putAll(outboundEdges);
-        edges.putAll(inboundEdges);
-        edges.putAll(errorEdges);
+        HyperlinkGraph hlg = new HyperlinkGraph();
+        hlg.fill(fulltext.getDefaultConnector(), hostname, maxtime, maxnodes);
+        int maxdepth = hlg.findLinkDepth();
         
         // finally just write out the edge array
         int c = 0;
-        for (Map.Entry<String, HyperlinkEdge> e: edges.entrySet()) {
-            prop.putJSON("list_" + c + "_source", e.getValue().source.getPath());
-            prop.putJSON("list_" + c + "_target", e.getValue().type.equals(HyperlinkEdge.Type.Outbound) ? e.getValue().target.toNormalform(true) : e.getValue().target.getPath());
-            prop.putJSON("list_" + c + "_type", e.getValue().type.name());
-            prop.put("list_" + c + "_eol", 1);
+        for (HyperlinkEdge e: hlg) {
+            prop.putJSON("edges_" + c + "_source", e.source.getPath());
+            prop.putJSON("edges_" + c + "_target", e.type.equals(HyperlinkEdge.Type.Outbound) ? e.target.toNormalform(true) : e.target.getPath());
+            prop.putJSON("edges_" + c + "_type", e.type.name());
+            Integer depth_source = hlg.getDepth(e.source);
+            Integer depth_target = hlg.getDepth(e.target);
+            prop.put("edges_" + c + "_depthSource", depth_source == null ? -1 : depth_source.intValue());
+            prop.put("edges_" + c + "_depthTarget", depth_target == null ? -1 : depth_target.intValue());
+            prop.put("edges_" + c + "_eol", 1);
             c++;
         }
-        prop.put("list_" + (c-1) + "_eol", 0);
-        prop.put("list", c);
+        prop.put("edges_" + (c-1) + "_eol", 0);
+        prop.put("edges", c);
+        prop.put("maxdepth", maxdepth);
 
         // Adding CORS Access header for xml output
         if (xml) {
diff --git a/htroot/api/linkstructure.json b/htroot/api/linkstructure.json
index 84cec3aec..9421a56b0 100644
--- a/htroot/api/linkstructure.json
+++ b/htroot/api/linkstructure.json
@@ -1,5 +1,7 @@
-[
-#{list}#
-{"source":"#[source]#", "target":"#[target]#", "type":"#[type]#"}#(eol)#::,#(/eol)#
-#{/list}#
-]
\ No newline at end of file
+{
+"edges" : "#[edges]#",
+"maxdepth" : "#[maxdepth]#",
+"graph" : [#{edges}#
+{"source":"#[source]#", "target":"#[target]#", "type":"#[type]#", "depthSource":"#[depthSource]#", "depthTarget":"#[depthTarget]#"}#(eol)#::,#(/eol)#
+#{/edges}#]
+}
\ No newline at end of file
diff --git a/htroot/js/hypertree.js b/htroot/js/hypertree.js
index d05b931b1..ef0014877 100644
--- a/htroot/js/hypertree.js
+++ b/htroot/js/hypertree.js
@@ -1,7 +1,9 @@
 function linkstructure(hostname, element, width, height, maxtime, maxnodes) {
 	var nodes = {};
 	var links = [];
-	$.getJSON("/api/linkstructure.json?about=" + hostname + "&maxtime=" + maxtime + "&maxnodes=" + maxnodes, function(links) {
+	var linkstructure = {};
+	$.getJSON("/api/linkstructure.json?about=" + hostname + "&maxtime=" + maxtime + "&maxnodes=" + maxnodes, function(linkstructure) {
+		links = linkstructure.graph;
 		links.forEach(function(link) {
 			  link.source = nodes[link.source] || (nodes[link.source] = {name: link.source, type:"Inbound"});
 			  link.target = nodes[link.target] || (nodes[link.target] = {name: link.target, type:link.type});
diff --git a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
index 14ced0fed..9590da53b 100644
--- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
+++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
@@ -201,7 +201,7 @@ public class OpenSearchConnector {
 
         final long numfound;
         try {
-            SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, 0, 1, webgraphqueryfields);
+            SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, 0, 1, webgraphqueryfields);
             numfound = docList.getNumFound();
             if (numfound == 0) {
                 ConcurrentLog.info("OpenSearchConnector.Discover", "no results found, abort discover job");
@@ -226,7 +226,7 @@ public class OpenSearchConnector {
                     Set<String> dblmem = new HashSet<String>(); // temp memory for already checked url
                     while (doloop) {
                         ConcurrentLog.info("OpenSearchConnector.Discover", "start Solr query loop at " + Integer.toString(loopnr * 20) + " of " + Long.toString(numfound));
-                        SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, loopnr * 20, 20,webgraphqueryfields); // check chunk of 20 result documents
+                        SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, loopnr * 20, 20,webgraphqueryfields); // check chunk of 20 result documents
                         loopnr++;
                         if (stoptime < System.currentTimeMillis()) {// stop after max 1h
                             doloop = false;
diff --git a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
index 3e6601a55..10805fbb5 100644
--- a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
+++ b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
@@ -158,7 +158,7 @@ public class SchemaConfiguration extends Configuration implements Serializable {
                         continue uniquecheck;
                     }
                     try {
-                        final SolrDocumentList docs = segment.fulltext().getDefaultConnector().getDocumentListByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + checkhash.toString() + "\"", 0, 1);
+                        final SolrDocumentList docs = segment.fulltext().getDefaultConnector().getDocumentListByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + checkhash.toString() + "\"", null, 0, 1);
                         if (docs != null && !docs.isEmpty()) {
                             SolrDocument doc = docs.get(0);
                             // switch unique attribute in new document
diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
index ad0dc0ef4..cb8313aef 100644
--- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
@@ -134,6 +134,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
      * The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
      * The method returns immediately and feeds the search results into the queue
      * @param querystring the solr query string
+     * @param sort the solr sort string, may be null to be not used
      * @param offset first result offset
      * @param maxcount the maximum number of results
      * @param maxtime the maximum time in milliseconds
@@ -144,6 +145,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
     @Override
     public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(
             final String querystring,
+            final String sort,
             final int offset,
             final int maxcount,
             final long maxtime,
@@ -160,7 +162,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
                 int count = 0;
                 while (System.currentTimeMillis() < endtime && count < maxcount) {
                     try {
-                        SolrDocumentList sdl = getDocumentListByQuery(querystring, o, Math.min(maxcount, pagesize), fields);
+                        SolrDocumentList sdl = getDocumentListByQuery(querystring, sort, o, Math.min(maxcount, pagesize), fields);
                         for (SolrDocument d: sdl) {
                             try {queue.put(d);} catch (final InterruptedException e) {break;}
                             count++;
@@ -185,6 +187,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
     @Override
     public BlockingQueue<String> concurrentIDsByQuery(
             final String querystring,
+            final String sort,
             final int offset,
             final int maxcount,
             final long maxtime,
@@ -199,7 +202,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
                 int o = offset;
                 while (System.currentTimeMillis() < endtime) {
                     try {
-                        SolrDocumentList sdl = getDocumentListByQuery(querystring, o, Math.min(maxcount, pagesize), CollectionSchema.id.getSolrFieldName());
+                        SolrDocumentList sdl = getDocumentListByQuery(querystring, sort, o, Math.min(maxcount, pagesize), CollectionSchema.id.getSolrFieldName());
                         for (SolrDocument d: sdl) {
                             try {queue.put((String) d.getFieldValue(CollectionSchema.id.getSolrFieldName()));} catch (final InterruptedException e) {break;}
                         }
@@ -222,7 +225,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
 
     @Override
     public Iterator<String> iterator() {
-        final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_QUERY, 0, Integer.MAX_VALUE, 60000, 2 * pagesize, 1);
+        final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_QUERY, null, 0, Integer.MAX_VALUE, 60000, 2 * pagesize, 1);
         return new LookAheadIterator<String>() {
             @Override
             protected String next0() {
@@ -245,22 +248,43 @@ public abstract class AbstractSolrConnector implements SolrConnector {
      * @throws IOException
      */
     @Override
-    public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
+    public SolrDocumentList getDocumentListByQuery(
+            final String querystring,
+            final String sort,
+            final int offset,
+            final int count,
+            final String ... fields) throws IOException {
+        // construct query
+        final SolrQuery params = getSolrQuery(querystring, sort, offset, count, fields);
+        
+        // query the server
+        final SolrDocumentList docs = getDocumentListByParams(params);
+        return docs;
+    }
+
+    public static SolrQuery getSolrQuery(
+            final String querystring,
+            final String sort,
+            final int offset,
+            final int count,
+            final String ... fields) {
         // construct query
         final SolrQuery params = new SolrQuery();
         params.setQuery(querystring);
+        params.clearSorts();
+        if (sort != null) {
+            params.set("sort", sort);
+        }
         params.setRows(count);
         params.setStart(offset);
         params.setFacet(false);
-        params.clearSorts();
         if (fields.length > 0) params.setFields(fields);
         params.setIncludeScore(false);
         
-        // query the server
-        final SolrDocumentList docs = getDocumentListByParams(params);
-        return docs;
+        return params;
     }
-
+    
+    
     @Override
     public long getDocumentCountByParams(ModifiableSolrParams params) throws IOException, SolrException {
         final SolrDocumentList sdl = getDocumentListByParams(params);
diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
index 9d09a9040..2147fca85 100644
--- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
@@ -211,7 +211,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
      * @throws IOException
      */
     @Override
-    public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
+    public SolrDocumentList getDocumentListByQuery(final String querystring, final String sort, final int offset, final int count, final String ... fields) throws IOException {
         if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
             ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
              querystring.length() == 15)) {
@@ -222,14 +222,14 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
             return list;
         }
         if (this.solr != null) {
-            SolrDocumentList list = this.solr.getDocumentListByQuery(querystring, offset, count, fields);
+            SolrDocumentList list = this.solr.getDocumentListByQuery(querystring, sort, offset, count, fields);
             addToCache(list, fields.length == 0);
             return list;
         }
         
         // combine both lists
         SolrDocumentList list;
-        list = this.solr.getDocumentListByQuery(querystring, offset, count, fields);
+        list = this.solr.getDocumentListByQuery(querystring, sort, offset, count, fields);
 
         // add caching
         addToCache(list, fields.length == 0);
diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
index 7a2844aea..755819b28 100644
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@@ -382,7 +382,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
     }
     
     @Override
-    public SolrDocumentList getDocumentListByQuery(String querystring, int offset, int count, String... fields) throws IOException, SolrException {
+    public SolrDocumentList getDocumentListByQuery(String querystring, String sort, int offset, int count, String... fields) throws IOException, SolrException {
         if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
             ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
              querystring.length() == 15)) {
@@ -392,7 +392,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
             return list;
         }
         
-        SolrDocumentList sdl = this.connector.getDocumentListByQuery(querystring, offset, count, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields));
+        SolrDocumentList sdl = this.connector.getDocumentListByQuery(querystring, sort, offset, count, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields));
         /*
         Iterator<SolrDocument> i = sdl.iterator();
         while (i.hasNext()) {
@@ -415,13 +415,13 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
     }
 
     @Override
-    public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, final int concurrency, String... fields) {
-        return this.connector.concurrentDocumentsByQuery(querystring, offset, maxcount, maxtime, buffersize, concurrency, fields);
+    public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(String querystring, String sort, int offset, int maxcount, long maxtime, int buffersize, final int concurrency, String... fields) {
+        return this.connector.concurrentDocumentsByQuery(querystring, sort, offset, maxcount, maxtime, buffersize, concurrency, fields);
     }
 
     @Override
-    public BlockingQueue<String> concurrentIDsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, final int concurrency) {
-        return this.connector.concurrentIDsByQuery(querystring, offset, maxcount, maxtime, buffersize, concurrency);
+    public BlockingQueue<String> concurrentIDsByQuery(String querystring, String sort, int offset, int maxcount, long maxtime, int buffersize, final int concurrency) {
+        return this.connector.concurrentIDsByQuery(querystring, sort, offset, maxcount, maxtime, buffersize, concurrency);
     }
 
 }
diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
index 27f6b9d68..db3f95434 100644
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@@ -360,16 +360,9 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
         private SolrQueryRequest request;
         private DocList response;
 
-        public DocListSearcher(final String querystring, final int offset, final int count, final String ... fields) {
+        public DocListSearcher(final String querystring, String sort, final int offset, final int count, final String ... fields) {
             // construct query
-            final SolrQuery params = new SolrQuery();
-            params.setQuery(querystring);
-            params.setRows(count);
-            params.setStart(offset);
-            params.setFacet(false);
-            params.clearSorts();
-            if (fields.length > 0) params.setFields(fields);
-            params.setIncludeScore(false);
+            final SolrQuery params = AbstractSolrConnector.getSolrQuery(querystring, sort, offset, count, fields);
             
             // query the server
             this.request = EmbeddedSolrConnector.this.request(params);
@@ -395,7 +388,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
     	int numFound = 0;
     	DocListSearcher docListSearcher = null;
         try {
-        	docListSearcher = new DocListSearcher(querystring, 0, 0, CollectionSchema.id.getSolrFieldName());
+        	docListSearcher = new DocListSearcher(querystring, null, 0, 0, CollectionSchema.id.getSolrFieldName());
         	numFound = docListSearcher.response.matches();
         } finally { 
         	if (docListSearcher != null) docListSearcher.close();
@@ -414,7 +407,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
         int responseCount = 0;
         DocListSearcher docListSearcher = null;
         try {
-            docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName());
+            docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, null, 0, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName());
             responseCount = docListSearcher.response.size();
             if (responseCount == 0) return null;
             SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
@@ -431,7 +424,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
     }
     
     @Override
-    public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final int concurrency) {
+    public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final String sort, final int offset, final int maxcount, final long maxtime, final int buffersize, final int concurrency) {
         final BlockingQueue<String> queue = buffersize <= 0 ? new LinkedBlockingQueue<String>() : new ArrayBlockingQueue<String>(buffersize);
         final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity!
         final Thread t = new Thread() {
@@ -443,7 +436,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
                 while (System.currentTimeMillis() < endtime) {
                     try {
                     	responseCount = 0;
-                        docListSearcher = new DocListSearcher(querystring, o, pagesize, CollectionSchema.id.getSolrFieldName());
+                        docListSearcher = new DocListSearcher(querystring, sort, o, pagesize, CollectionSchema.id.getSolrFieldName());
                         responseCount = docListSearcher.response.size();
                         SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
                         DocIterator iterator = docListSearcher.response.iterator();
diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
index 045f0bee5..1820e4248 100644
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@@ -218,7 +218,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
      * @throws IOException
      */
     @Override
-    public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
+    public SolrDocumentList getDocumentListByQuery(final String querystring, final String sort, final int offset, final int count, final String ... fields) throws IOException {
         if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
         if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
             ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
@@ -230,31 +230,31 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
             return list;
         }
         if (this.solr0 != null && this.solr1 == null) {
-            SolrDocumentList list = this.solr0.getDocumentListByQuery(querystring, offset, count, fields);
+            SolrDocumentList list = this.solr0.getDocumentListByQuery(querystring, sort, offset, count, fields);
             return list;
         }
         if (this.solr1 != null && this.solr0 == null) {
-            SolrDocumentList list = this.solr1.getDocumentListByQuery(querystring, offset, count, fields);
+            SolrDocumentList list = this.solr1.getDocumentListByQuery(querystring, sort, offset, count, fields);
             return list;
         }
 
         // combine both lists
         SolrDocumentList l;
-        l = this.solr0.getDocumentListByQuery(querystring, offset, count, fields);
+        l = this.solr0.getDocumentListByQuery(querystring, sort, offset, count, fields);
         if (l.size() >= count) return l;
 
         // at this point we need to know how many results are in solr0
         // compute this with a very bad hack; replace with better method later
         int size0 = 0;
         { //bad hack - TODO: replace
-            SolrDocumentList lHack = this.solr0.getDocumentListByQuery(querystring, 0, Integer.MAX_VALUE, fields);
+            SolrDocumentList lHack = this.solr0.getDocumentListByQuery(querystring, sort, 0, Integer.MAX_VALUE, fields);
             size0 = lHack.size();
         }
 
         // now use the size of the first query to do a second query
         final SolrDocumentList list = new SolrDocumentList();
         for (final SolrDocument d: l) list.add(d);
-        l = this.solr1.getDocumentListByQuery(querystring, offset + l.size() - size0, count - l.size(), fields);
+        l = this.solr1.getDocumentListByQuery(querystring, sort, offset + l.size() - size0, count - l.size(), fields);
         for (final SolrDocument d: l) list.add(d);
 
         return list;
@@ -427,10 +427,10 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
     }
 
     @Override
-    public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final int concurrency) {
-        if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, offset, maxcount, maxtime, buffersize, concurrency);
-        if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, offset, maxcount, maxtime, buffersize, concurrency);
-        return super.concurrentIDsByQuery(querystring, offset, maxcount, maxtime, buffersize, concurrency);
+    public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final String sort, final int offset, final int maxcount, final long maxtime, final int buffersize, final int concurrency) {
+        if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, sort, offset, maxcount, maxtime, buffersize, concurrency);
+        if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, sort, offset, maxcount, maxtime, buffersize, concurrency);
+        return super.concurrentIDsByQuery(querystring, sort, offset, maxcount, maxtime, buffersize, concurrency);
     }
     
 }
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
index dcc9d1d06..ccd774e93 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@@ -180,12 +180,18 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      * get a query result from solr
      * to get all results set the query String to "*:*"
      * @param querystring the solr query string
+     * @param sort the solr sort string, may be null to be not used
      * @param offset the first result offset
      * @param count number of wanted results
      * @param fields list of fields
      * @throws IOException
      */
-    public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException, SolrException;
+    public SolrDocumentList getDocumentListByQuery(
+            final String querystring,
+            final String sort,
+            final int offset,
+            final int count,
+            final String ... fields) throws IOException, SolrException;
     
     /**
      * get the number of results when this query is done.
@@ -210,6 +216,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      * The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
      * The method returns immediately and feeds the search results into the queue
      * @param querystring the solr query string
+     * @param sort the solr sort string, may be null to be not used
      * @param offset first result offset
      * @param maxcount the maximum number of results
      * @param maxtime the maximum time in milliseconds
@@ -220,6 +227,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      */
     public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(
             final String querystring,
+            final String sort,
             final int offset,
             final int maxcount,
             final long maxtime,
@@ -232,6 +240,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      * The result queue is considered as terminated if AbstractSolrConnector.POISON_ID is returned.
      * The method returns immediately and feeds the search results into the queue
      * @param querystring
+     * @param sort the solr sort string, may be null to be not used
      * @param offset
      * @param maxcount
      * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
@@ -240,6 +249,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      */
     public BlockingQueue<String> concurrentIDsByQuery(
             final String querystring,
+            final String sort,
             final int offset,
             final int maxcount,
             final long maxtime,
diff --git a/source/net/yacy/search/index/ErrorCache.java b/source/net/yacy/search/index/ErrorCache.java
index 3e7780cab..78638f014 100644
--- a/source/net/yacy/search/index/ErrorCache.java
+++ b/source/net/yacy/search/index/ErrorCache.java
@@ -171,7 +171,7 @@ public class ErrorCache {
         }
         if (failDoc != null) return failDoc;
         try {
-            final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM, 0, 1);
+            final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM, null, 0, 1);
             if (docs == null || docs.isEmpty()) return null;
             SolrDocument doc = docs.get(0);
             if (doc == null) return null;
diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java
index e64178e1e..c78180a25 100644
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@@ -428,7 +428,7 @@ public final class Fulltext {
         final String collectionQuery = CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"" +
                 ((freshdate != null && freshdate.before(new Date())) ? (" AND " + CollectionSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]") : "");
         final AtomicInteger count = new AtomicInteger(0);
-        final BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(collectionQuery, 0, 1000000, 600000, 100, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
+        final BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(collectionQuery, null, 0, 1000000, 600000, 100, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
         try {
             Set<String> deleteIDs = new HashSet<String>();
             SolrDocument doc;
@@ -664,7 +664,7 @@ public final class Fulltext {
                         this.count++;
                     }
                 } else {
-                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100, 1,
+                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", null, 0, 100000000, 10 * 60 * 60 * 1000, 100, 1,
                             CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
                             CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
                     SolrDocument doc;
diff --git a/source/net/yacy/search/index/ReindexSolrBusyThread.java b/source/net/yacy/search/index/ReindexSolrBusyThread.java
index bc1bd2f7b..88209f91e 100644
--- a/source/net/yacy/search/index/ReindexSolrBusyThread.java
+++ b/source/net/yacy/search/index/ReindexSolrBusyThread.java
@@ -113,7 +113,7 @@ import org.apache.solr.common.SolrInputDocument;
                 if (sem.tryAcquire()) {
                     try {
                         String query = querylist.get(0);
-                        SolrDocumentList xdocs = esc.getDocumentListByQuery(query, start, chunksize);
+                        SolrDocumentList xdocs = esc.getDocumentListByQuery(query, null, start, chunksize);
                         docstoreindex = (int) xdocs.getNumFound();
                         
                         if (xdocs.size() == 0) { // no documents returned = all of current query reindexed (or eventual start to large)                                                       
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index 5693a436b..dddb59c34 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -30,6 +30,7 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -80,6 +81,7 @@ import net.yacy.repository.LoaderDispatcher;
 import net.yacy.search.query.SearchEvent;
 import net.yacy.search.schema.CollectionConfiguration;
 import net.yacy.search.schema.CollectionSchema;
+import net.yacy.search.schema.HyperlinkGraph;
 import net.yacy.search.schema.WebgraphConfiguration;
 import net.yacy.search.schema.WebgraphSchema;
 
@@ -259,21 +261,13 @@ public class Segment {
         return 999;
     }
     
+    
     private static RowHandleSet getPossibleRootHashes(final DigestURL url) {
         RowHandleSet rootCandidates = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 10);
         String rootStub = url.getProtocol() + "://" + url.getHost() + (url.getProtocol().equals("http") && url.getPort() != 80 ? (":" + url.getPort()) : "");
         try {
             rootCandidates.put(new DigestURL(rootStub).hash());
-            rootCandidates.put(new DigestURL(rootStub + "/").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/index.htm").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/index.html").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/index.php").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/home.htm").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/home.html").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/home.php").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/default.htm").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/default.html").hash());
-            rootCandidates.put(new DigestURL(rootStub + "/default.php").hash());
+            for (String rootfn: HyperlinkGraph.ROOTFNS) rootCandidates.put(new DigestURL(rootStub + rootfn).hash());
             rootCandidates.optimize();
         } catch (final Throwable e) {}
         rootCandidates.optimize();
@@ -310,22 +304,41 @@ public class Segment {
     
     public class ClickdepthCache {
         private final ReferenceReportCache rrc;
+        private final Map<String, HyperlinkGraph> hyperlinkGraphCache; // map from host name to a HyperlinkGraph for that host name
         private final Map<String, Integer> cache;
         public final int maxdepth; // maximum clickdepth
         public final int maxtime; // maximum time to compute clickdepth
         public ClickdepthCache(final ReferenceReportCache rrc, final int maxtime, final int maxdepth) {
             this.rrc = rrc;
+            this.hyperlinkGraphCache = new HashMap<String, HyperlinkGraph>();
             this.cache = new ConcurrentHashMap<String, Integer>();
             this.maxdepth = maxdepth;
             this.maxtime = maxtime;
         }
         public int getClickdepth(final DigestURL url) throws IOException {
+            // first try: get the clickdepth from the cache
             Integer clickdepth = cache.get(ASCII.String(url.hash()));
             if (MemoryControl.shortStatus()) cache.clear();
             if (clickdepth != null) {
                 //ConcurrentLog.info("Segment", "get clickdepth of url " + url.toNormalform(true) + ": " + clickdepth + " CACHE HIT");
                 return clickdepth.intValue();
             }
+            
+            // second try: get the clickdepth from a hyperlinGraphCache (forward clickdepth)
+            HyperlinkGraph hlg = hyperlinkGraphCache.get(url.getHost());
+            if (hlg == null) {
+                hlg = new HyperlinkGraph();
+                hlg.fill(fulltext.getDefaultConnector(), url.getHost(), 300000, 10000000);
+                hlg.findLinkDepth();
+                hyperlinkGraphCache.put(url.getHost(), hlg);
+            }
+            clickdepth = hlg.getDepth(url);
+            if (clickdepth != null) {
+                return clickdepth.intValue();
+            }
+                    
+            
+            // third try: get the clickdepth from a reverse link graph
             clickdepth = Segment.this.getClickDepth(this.rrc, url, this.maxtime, this.maxdepth);
             //ConcurrentLog.info("Segment", "get clickdepth of url " + url.toNormalform(true) + ": " + clickdepth);
             this.cache.put(ASCII.String(url.hash()), clickdepth);
@@ -375,7 +388,7 @@ public class Segment {
             if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
                 // reqd the references from the webgraph
                 SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
-                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), 0, 10000000, 1000, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
+                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, 1000, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
                 SolrDocument doc;
                 try {
                     while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
@@ -478,12 +491,12 @@ public class Segment {
         final BlockingQueue<SolrDocument> docQueue;
         final String urlstub;
         if (stub == null) {
-            docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_QUERY, 0, Integer.MAX_VALUE, maxtime, maxcount, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
+            docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_QUERY, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, Integer.MAX_VALUE, maxtime, maxcount, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
             urlstub = null;
         } else {
             final String host = stub.getHost();
             String hh = DigestURL.hosthash(host);
-            docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.host_id_s + ":\"" + hh + "\"", 0, Integer.MAX_VALUE, maxtime, maxcount, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
+            docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.host_id_s + ":\"" + hh + "\"", CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, Integer.MAX_VALUE, maxtime, maxcount, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
             urlstub = stub.toNormalform(true);
         }
 
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index 461bcd69f..ca651b56b 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -962,7 +962,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                 // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
                 String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
                 long patchquerycount = collectionConnector.getCountByQuery(patchquery);
-                BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 600000, 200, 1,
+                BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 10000000, 600000, 200, 1,
                         CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
                 SolrDocument doc_B;
                 int patchquerycountcheck = 0;
@@ -1044,7 +1044,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                     final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(query);
                     int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4));
                     ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency);
-                    final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 200, concurrency);
+                    final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(query, WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, 1800000, 200, concurrency);
                     final AtomicInteger proccount = new AtomicInteger(0);
                     Thread[] t = new Thread[concurrency];
                     for (final AtomicInteger i = new AtomicInteger(0); i.get() < t.length; i.incrementAndGet()) {
@@ -1151,7 +1151,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
             long count = collectionConnector.getCountByQuery(query);
             long start = System.currentTimeMillis();
             ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey);
-            BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 200, 1);
+            BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(query, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 10000000, 1800000, 200, 1);
             int countcheck = 0;
             Collection<String> failids = new ArrayList<String>();
             SolrDocument doc;
@@ -1274,7 +1274,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
             this.crt = new ConcurrentHashMap<String, double[]>();
             try {
                 // select all documents for each host
-                BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, 0, 10000000, 600000, 200, 1);
+                BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 10000000, 600000, 200, 1);
                 String id;
                 while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
                     this.crt.put(id, new double[]{0.0d,0.0d}); //{old value, new value}
diff --git a/source/net/yacy/search/schema/HyperlinkEdge.java b/source/net/yacy/search/schema/HyperlinkEdge.java
index 587699ae0..6526f0871 100644
--- a/source/net/yacy/search/schema/HyperlinkEdge.java
+++ b/source/net/yacy/search/schema/HyperlinkEdge.java
@@ -37,4 +37,16 @@ public class HyperlinkEdge {
         this.type = type;
     }
     
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder(120);
+        sb.append(this.source.toNormalform(true));
+        sb.append(" -> ");
+        sb.append(this.target.toNormalform(true));
+        sb.append(" (");
+        sb.append(type.name());
+        sb.append(")");
+        return sb.toString();
+    }
+    
 }
diff --git a/source/net/yacy/search/schema/HyperlinkGraph.java b/source/net/yacy/search/schema/HyperlinkGraph.java
new file mode 100644
index 000000000..312f70674
--- /dev/null
+++ b/source/net/yacy/search/schema/HyperlinkGraph.java
@@ -0,0 +1,197 @@
+/**
+ *  HyperlinkGraph
+ *  Copyright 2014 by Michael Peter Christen
+ *  First released 08.04.2014 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.search.schema;
+
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.BlockingQueue;
+
+import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailType;
+import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
+import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.kelondro.data.meta.URIMetadataNode;
+
+import org.apache.solr.common.SolrDocument;
+
+
+public class HyperlinkGraph implements Iterable<HyperlinkEdge> {
+    
+    public final static Set<String> ROOTFNS = new HashSet<String>();
+    static {
+        for (String s: new String[]{"/", "/index.htm", "/index.html", "/index.php", "/home.htm", "/home.html", "/home.php", "/default.htm", "/default.html", "/default.php"}) {
+            ROOTFNS.add(s);
+        }
+    }
+    
+    Map<String, HyperlinkEdge> edges;
+    Map<DigestURL, Integer> depths;
+    String hostname;
+    
+    public HyperlinkGraph() {
+        this.edges = new LinkedHashMap<String, HyperlinkEdge>();
+        this.depths = new HashMap<DigestURL, Integer>();
+        this.hostname = null;
+    }
+    
+    public void fill(final SolrConnector solrConnector, String hostname, final int maxtime, final int maxnodes) {
+        this.hostname = hostname;
+        if (hostname.startsWith("www.")) hostname = hostname.substring(4);
+        StringBuilder q = new StringBuilder();
+        q.append(CollectionSchema.host_s.getSolrFieldName()).append(':').append(hostname).append(" OR ").append(CollectionSchema.host_s.getSolrFieldName()).append(':').append("www.").append(hostname);
+        BlockingQueue<SolrDocument> docs = solrConnector.concurrentDocumentsByQuery(q.toString(), CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, maxnodes, maxtime, 100, 1,
+                CollectionSchema.id.getSolrFieldName(),
+                CollectionSchema.sku.getSolrFieldName(),
+                CollectionSchema.failreason_s.getSolrFieldName(),
+                CollectionSchema.failtype_s.getSolrFieldName(),
+                CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName(),
+                CollectionSchema.inboundlinks_urlstub_sxt.getSolrFieldName(),
+                CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(),
+                CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName()
+                );
+        SolrDocument doc;
+        Map<String, FailType> errorDocs = new HashMap<String, FailType>();
+        Map<String, HyperlinkEdge> inboundEdges = new HashMap<String, HyperlinkEdge>();
+        Map<String, HyperlinkEdge> outboundEdges = new HashMap<String, HyperlinkEdge>();
+        Map<String, HyperlinkEdge> errorEdges = new HashMap<String, HyperlinkEdge>();
+        try {
+            while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
+                String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
+                String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
+                DigestURL from = new DigestURL(u, ASCII.getBytes(ids));
+                String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
+                FailType error = errortype == null ? null : FailType.valueOf(errortype);
+                if (error != null) {
+                    errorDocs.put(u, error);
+                } else {
+                    Iterator<String> links = URIMetadataNode.getLinks(doc, true); // inbound
+                    String link;
+                    while (links.hasNext()) {
+                        link = links.next();
+                        try {
+                            DigestURL linkurl = new DigestURL(link, null);
+                            String edgehash = ids + ASCII.String(linkurl.hash());
+                            inboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Inbound));
+                        } catch (MalformedURLException e) {}
+                    }
+                    links = URIMetadataNode.getLinks(doc, false); // outbound
+                    while (links.hasNext()) {
+                        link = links.next();
+                        try {
+                            DigestURL linkurl = new DigestURL(link, null);
+                            String edgehash = ids + ASCII.String(linkurl.hash());
+                            outboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Outbound));
+                        } catch (MalformedURLException e) {}
+                    }
+                }
+                if (inboundEdges.size() + outboundEdges.size() > maxnodes) {
+                    break;
+                }
+            }
+        } catch (InterruptedException e) {
+        } catch (MalformedURLException e) {
+        }
+        // we use the errorDocs to mark all edges with endpoint to error documents
+        Iterator<Map.Entry<String, HyperlinkEdge>> i = inboundEdges.entrySet().iterator();
+        Map.Entry<String, HyperlinkEdge> edge;
+        while (i.hasNext()) {
+            edge = i.next();
+            if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) {
+                i.remove();
+                edge.getValue().type = HyperlinkEdge.Type.Dead;
+                errorEdges.put(edge.getKey(), edge.getValue());
+            }
+        }
+        i = outboundEdges.entrySet().iterator();
+        while (i.hasNext()) {
+            edge = i.next();
+            if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) {
+                i.remove();
+                edge.getValue().type = HyperlinkEdge.Type.Dead;
+                errorEdges.put(edge.getKey(), edge.getValue());
+            }
+        }
+        // we put all edges together in a specific order which is used to create nodes in a svg display:
+        // notes that appear first are possible painted over by nodes coming later.
+        // less important nodes shall appear therefore first
+        this.edges.putAll(outboundEdges);
+        this.edges.putAll(inboundEdges);
+        this.edges.putAll(errorEdges);
+    }
+    
+    public int findLinkDepth() {
+
+        int remaining = this.edges.size();
+        
+        // first find root nodes
+        Set<DigestURL> nodes = new HashSet<DigestURL>();
+        Set<DigestURL> nextnodes = new HashSet<DigestURL>();
+        for (HyperlinkEdge edge: this.edges.values()) {
+            String path = edge.source.getPath();
+            if (ROOTFNS.contains(path)) {
+                if (!this.depths.containsKey(edge.source)) this.depths.put(edge.source, 0);
+                if (edge.type == HyperlinkEdge.Type.Inbound && !this.depths.containsKey(edge.target)) this.depths.put(edge.target, 1);
+                nodes.add(edge.source);
+                nextnodes.add(edge.target);
+                remaining--;
+            }
+        }
+        if (nodes.size() == 0) ConcurrentLog.warn("HyperlinkGraph", "could not find a root node for " + hostname + " in " + this.edges.size() + " edges");
+
+        // recusively step into depth and find next level
+        int depth = 1;
+        while (remaining > 0) {
+            boolean found = false;
+            nodes = nextnodes;
+            nextnodes = new HashSet<DigestURL>();
+            for (HyperlinkEdge edge: this.edges.values()) {
+                if (nodes.contains(edge.source)) {
+                    if (!this.depths.containsKey(edge.source)) this.depths.put(edge.source, depth);
+                    if (edge.type == HyperlinkEdge.Type.Inbound && !this.depths.containsKey(edge.target)) this.depths.put(edge.target, depth + 1);
+                    nextnodes.add(edge.target);
+                    remaining--;
+                    found = true;
+                }
+            }
+            depth++;
+            if (!found) break; // terminating in case that not all edges are linked together
+        }
+        if (remaining > 0) ConcurrentLog.warn("HyperlinkGraph", "could not find all edges for " + hostname + ", " + remaining + " remaining.");
+        return depth - 1;
+    }
+    
+    public Integer getDepth(DigestURL url) {
+        return this.depths.get(url);
+    }
+
+    @Override
+    public Iterator<HyperlinkEdge> iterator() {
+        return this.edges.values().iterator();
+    }
+    
+}