added a hack which makes the HostBrowser more performant when the given

host has a lot of urls. If the number of urls is > 1000, then the list of documents is restricted to such which have no subpath, if the root path is selected. However, this can cause a problem if no documents on the root path exist but only on paths below that root path.
13 years ago · c6a6f4c4e6
parent 619bf7e875
commit c6a6f4c4e6
2 changed files with 13 additions and 5 deletions
--- a/htroot/HostBrowser.html
+++ b/htroot/HostBrowser.html
@ -76,7 +76,7 @@ function updatepage(str) {
     <fieldset><legend>Host List</legend>
      #{list}#
      <div style="float:left; padding:1px 5px 1px 5px;">
-        <div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#">#[host]#</a><span>browse #[host]#</span></div></div>
+        <div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#&facetcount=#[count]#">#[host]#</a><span>browse #[host]#</span></div></div>
        <div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]##(crawler)#::/#[pending]##(/crawler)# URLs</div>
      </div>
      #{/list}#
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@ -167,7 +167,7 @@ public class HostBrowser {
                // delete the complete path!! That includes everything that matches with this prefix.
                delete = true;
            }
-            
+            int facetcount=post.getInt("facetcount", 0);
            boolean complete = post.getBoolean("complete");
            if (complete) { // we want only root paths for complete lists
                p = path.indexOf('/', 10);
@ -192,9 +192,17 @@ public class HostBrowser {
                String[] pathparts = uri.getPaths();
                
                // get all files for a specific host from the index
-                String query = YaCySchema.host_s.name() + ":" + host;
-                for (String pe: pathparts) if (pe.length() > 0) query += " AND " + YaCySchema.url_paths_sxt.name() + ":" + pe;
-                BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(query, 0, 100000, 3000, 100);
+                StringBuilder q = new StringBuilder();
+                q.append(YaCySchema.host_s.name()).append(':').append(host);
+                if (pathparts.length > 0 && pathparts[0].length() > 0) {
+                    for (String pe: pathparts) {
+                        if (pe.length() > 0) q.append(" AND ").append(YaCySchema.url_paths_sxt.name()).append(':').append(pe);
+                    }
+                } else {
+                    if (facetcount > 1000 && !post.containsKey("nepr")) q.append(" AND ").append(YaCySchema.url_paths_sxt.name()).append(":[* TO *]");
+                }
+                q.append(" AND -").append(YaCySchema.failreason_t.name()).append(":[* TO *]");
+                BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, 3000, 100);
                SolrDocument doc;
                Set<String> storedDocs = new HashSet<String>();
                Set<String> inboundLinks = new HashSet<String>();