From d481abd0876ee38d8ccbc38066cc3ff0091096d1 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 6 Nov 2012 00:29:37 +0100 Subject: [PATCH] added the visualization of error-urls to host browser - only visible for admins - a faceted search generates a huge list for all hosts in the host list - the faceted search algorithms had to be modified for that - within the browsing of the directory path, the error cause is written to the url which is presented as error-url - the errors are also accumulated for directory sums --- htroot/CrawlResults.java | 1 - htroot/HostBrowser.html | 9 +- htroot/HostBrowser.java | 129 +++++++++++------- htroot/env/base.css | 2 +- .../solr/connector/MirrorSolrConnector.java | 35 ++--- .../solr/connector/MultipleSolrConnector.java | 5 +- .../solr/connector/RetrySolrConnector.java | 10 +- .../solr/connector/ShardSolrConnector.java | 30 ++-- .../solr/connector/SolrConnector.java | 12 +- .../solr/connector/SolrServerConnector.java | 36 +++-- source/net/yacy/crawler/data/ZURL.java | 40 ------ source/net/yacy/search/index/Fulltext.java | 1 + 12 files changed, 154 insertions(+), 156 deletions(-) diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 19081f27c..a40e1d65e 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -24,7 +24,6 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; diff --git a/htroot/HostBrowser.html b/htroot/HostBrowser.html index 80bbfbaaf..1537a4b11 100644 --- a/htroot/HostBrowser.html +++ b/htroot/HostBrowser.html @@ -76,16 +76,19 @@ function updatepage(str) {
Host List #{list}#
-
#[host]#browse #[host]#
-
#[count]##(crawler)#::/#[pending]##(/crawler)# URLs
+
#[host]#browse #[host]#
+
#[count]##(crawler)#::/#[pending]##(/crawler)##(errors)#::/#[count]##(/errors)# URLs
#{/list}# +
+
Count Colors:
Documents
Pending in Crawler
Load Errors
+
#(/hosts)# #(files)#::
Browser for #[path]# -

Documents on host: #[hostsize]#; Documents in subpath: #[subpathsize]#; #(complete)#get complete list::directory view#(/complete)# +

Documents on host: #[hostsize]#; Documents in subpath: #[subpathsize]#

diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index 39d0dea0d..523b37d09 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -54,6 +54,10 @@ import net.yacy.server.serverSwitch; public class HostBrowser { + public static enum StoreType { + LINK, INDEX, ERROR; + } + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; @@ -131,27 +135,35 @@ public class HostBrowser { if (post.containsKey("hosts")) { // generate host list try { - int maxcount = 360; // == 6!/2 which makes nice matrixes for 3, 4, 5, 6 rows/colums + int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums - // collect from index - ReversibleScoreMap score = fulltext.getSolr().getFacet(YaCySchema.host_s.name(), maxcount); + // collect hosts from index + ReversibleScoreMap hostscore = fulltext.getSolr().getFacets("*:*", new String[]{YaCySchema.host_s.name()}, maxcount).get(YaCySchema.host_s.name()); + if (hostscore == null) hostscore = new ClusteredScoreMap(); - // collect from crawler + // collect hosts from crawler final Map crawler = (admin) ? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots) : new HashMap(); for (Map.Entry host: crawler.entrySet()) { - score.inc(host.getKey(), host.getValue()[0]); + hostscore.inc(host.getKey(), host.getValue()[0]); } + // collect the errorurls + ReversibleScoreMap errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.name() + ":[* TO *]", new String[]{YaCySchema.host_s.name()}, maxcount).get(YaCySchema.host_s.name()) : null; + if (errorscore == null) errorscore = new ClusteredScoreMap(); + int c = 0; - Iterator i = score.keys(false); + Iterator i = hostscore.keys(false); String host; while (i.hasNext() && c < maxcount) { host = i.next(); prop.put("hosts_list_" + c + "_host", host); - prop.put("hosts_list_" + c + "_count", score.get(host)); + prop.put("hosts_list_" + c + "_count", hostscore.get(host)); boolean inCrawler = crawler.containsKey(host); prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0); if (inCrawler) prop.put("hosts_list_" + c + "_crawler_pending", crawler.get(host)[0]); + int errors = errorscore.get(host); + prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0); + if (errors > 0) prop.put("hosts_list_" + c + "_errors_count", errors); c++; } prop.put("hosts_list", c); @@ -201,10 +213,10 @@ public class HostBrowser { } else { if (facetcount > 1000 && !post.containsKey("nepr")) q.append(" AND ").append(YaCySchema.url_paths_sxt.name()).append(":[* TO *]"); } - q.append(" AND -").append(YaCySchema.failreason_t.name()).append(":[* TO *]"); BlockingQueue docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, 3000, 100); SolrDocument doc; Set storedDocs = new HashSet(); + Map errorDocs = new HashMap(); Set inboundLinks = new HashSet(); Map> outboundHosts = new HashMap>(); int hostsize = 0; @@ -212,42 +224,48 @@ public class HostBrowser { long timeout = System.currentTimeMillis() + 3000; while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()); - hostsize++; + String error = (String) doc.getFieldValue(YaCySchema.failreason_t.name()); if (u.startsWith(path)) { if (delete) { deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.name()))); } else { - storedDocs.add(u); + if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error); } } else if (complete) { - storedDocs.add(u); - } - // collect inboundlinks to browse the host - Iterator links = URIMetadataNode.getLinks(doc, true); - while (links.hasNext()) { - u = links.next(); - if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); + if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error); } - - // collect outboundlinks to browse to the outbound - links = URIMetadataNode.getLinks(doc, false); - while (links.hasNext()) { - u = links.next(); - try { - MultiProtocolURI mu = new MultiProtocolURI(u); - if (mu.getHost() != null) { - ReversibleScoreMap lks = outboundHosts.get(mu.getHost()); - if (lks == null) { - lks = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); - outboundHosts.put(mu.getHost(), lks); + if (error == null) { + hostsize++; + // collect inboundlinks to browse the host + Iterator links = URIMetadataNode.getLinks(doc, true); + while (links.hasNext()) { + u = links.next(); + if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); + } + + // collect outboundlinks to browse to the outbound + links = URIMetadataNode.getLinks(doc, false); + while (links.hasNext()) { + u = links.next(); + try { + MultiProtocolURI mu = new MultiProtocolURI(u); + if (mu.getHost() != null) { + ReversibleScoreMap lks = outboundHosts.get(mu.getHost()); + if (lks == null) { + lks = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); + outboundHosts.put(mu.getHost(), lks); + } + lks.set(u, u.length()); } - lks.set(u, u.length()); - } - } catch (MalformedURLException e) {} + } catch (MalformedURLException e) {} + } } if (System.currentTimeMillis() > timeout) break; } - if (deleteIDs.size() > 0) sb.index.fulltext().remove(deleteIDs, true); + if (deleteIDs.size() > 0) { + for (byte[] b: deleteIDs) sb.crawlQueues.urlRemove(b); + sb.index.fulltext().remove(deleteIDs, true); + } // collect from crawler List domainStackReferences = (admin) ? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000) : new ArrayList(0); @@ -255,43 +273,46 @@ public class HostBrowser { for (Request crawlEntry: domainStackReferences) loadingLinks.add(crawlEntry.url().toNormalform(true)); // now combine all lists into one - Map files = new HashMap(); - for (String u: storedDocs) files.put(u, true); - for (String u: inboundLinks) if (!storedDocs.contains(u)) files.put(u, false); - for (String u: loadingLinks) if (u.startsWith(path) && !storedDocs.contains(u)) files.put(u, false); + Map files = new HashMap(); + for (String u: storedDocs) files.put(u, StoreType.INDEX); + for (String u: errorDocs.keySet()) files.put(u, StoreType.ERROR); + for (String u: inboundLinks) if (!storedDocs.contains(u)) files.put(u, StoreType.LINK); + for (String u: loadingLinks) if (u.startsWith(path) && !storedDocs.contains(u)) files.put(u, StoreType.LINK); Log.logInfo("HostBrowser", "collected " + files.size() + " urls for path " + path); // distinguish files and folders Map list = new TreeMap(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder int pl = path.length(); String file; - boolean loaded; - for (Map.Entry entry: files.entrySet()) { + for (Map.Entry entry: files.entrySet()) { if (entry.getKey().length() < pl) continue; // this is not inside the path if (!entry.getKey().startsWith(path)) continue; file = entry.getKey().substring(pl); - loaded = entry.getValue().booleanValue(); + StoreType type = entry.getValue(); p = file.indexOf('/'); if (p < 0) { // this is a file - list.put(entry.getKey(), loaded); // Boolean value: this is a file; true -> file is in index; false -> not in index, maybe in crawler + list.put(entry.getKey(), type); // StoreType value: this is a file; true -> file is in index; false -> not in index, maybe in crawler } else { // this is a directory path or a file in a subdirectory String remainingPath = file.substring(0, p + 1); if (complete && remainingPath.indexOf('.') > 0) { - list.put(entry.getKey(), loaded); // Boolean value: this is a file + list.put(entry.getKey(), type); // StoreType value: this is a file } else { String dir = path + remainingPath; Object c = list.get(dir); - boolean incrawler = loadingLinks.contains(entry.getKey()); if (c == null) { - int[] linkedStored = new int[]{0,0,0}; - linkedStored[loaded ? 1 : 0]++; - if (incrawler) linkedStored[2]++; - list.put(dir, linkedStored); + int[] linkedStoredIncrawlerError = new int[]{0,0,0,0}; + if (type == StoreType.LINK) linkedStoredIncrawlerError[0]++; + if (type == StoreType.INDEX) linkedStoredIncrawlerError[1]++; + if (loadingLinks.contains(entry.getKey())) linkedStoredIncrawlerError[2]++; + if (errorDocs.containsKey(entry.getKey())) linkedStoredIncrawlerError[3]++; + list.put(dir, linkedStoredIncrawlerError); } else if (c instanceof int[]) { - ((int[]) c)[loaded ? 1 : 0]++; - if (incrawler) ((int[]) c)[2]++; + if (type == StoreType.LINK) ((int[]) c)[0]++; + if (type == StoreType.INDEX) ((int[]) c)[1]++; + if (loadingLinks.contains(entry.getKey())) ((int[]) c)[2]++; + if (errorDocs.containsKey(entry.getKey())) ((int[]) c)[3]++; } } } @@ -300,16 +321,17 @@ public class HostBrowser { int maxcount = 1000; int c = 0; for (Map.Entry entry: list.entrySet()) { - if (entry.getValue() instanceof Boolean) { + if (entry.getValue() instanceof StoreType) { // this is a file prop.put("files_list_" + c + "_type", 0); prop.put("files_list_" + c + "_type_url", entry.getKey()); - boolean indexed = ((Boolean) entry.getValue()).booleanValue(); + StoreType type = (StoreType) entry.getValue(); try {uri = new DigestURI(entry.getKey());} catch (MalformedURLException e) {uri = null;} boolean loading = load.equals(entry.getKey()) || (uri != null && sb.crawlQueues.urlExists(uri.hash()) != null); //String failr = fulltext.failReason(ASCII.String(uri.hash())); - prop.put("files_list_" + c + "_type_stored", indexed ? 1 : loading ? 2 : 0); + prop.put("files_list_" + c + "_type_stored", type == StoreType.INDEX ? 1 : loading ? 2 : type == StoreType.ERROR ? 3 : 0 /*linked*/); prop.put("files_list_" + c + "_type_stored_load", loadRight ? 1 : 0); + if (type == StoreType.ERROR) prop.put("files_list_" + c + "_type_stored_error", errorDocs.get(entry.getKey())); if (loadRight) { prop.put("files_list_" + c + "_type_stored_load_url", entry.getKey()); prop.put("files_list_" + c + "_type_stored_load_path", path); @@ -321,7 +343,8 @@ public class HostBrowser { int linked = ((int[]) entry.getValue())[0]; int stored = ((int[]) entry.getValue())[1]; int crawler = ((int[]) entry.getValue())[2]; - prop.put("files_list_" + c + "_type_count", stored + " stored / " + linked + " linked" + (crawler > 0 ? (" / " + crawler + " pending") : "")); + int error = ((int[]) entry.getValue())[3]; + prop.put("files_list_" + c + "_type_count", stored + " stored / " + linked + " linked" + (crawler > 0 ? (" / " + crawler + " pending") : "") + (error > 0 ? (" / " + error + " errors") : "")); } if (++c >= maxcount) break; } diff --git a/htroot/env/base.css b/htroot/env/base.css index f552714cb..de6a58c77 100644 --- a/htroot/env/base.css +++ b/htroot/env/base.css @@ -134,7 +134,7 @@ tt, *.tt { .info { font-weight:bold; - color:olive; + color:darkblue; } .commit { diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index fe3a5c325..12f4ad19c 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -22,12 +22,12 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; -import net.yacy.cora.document.UTF8; import net.yacy.cora.federate.solr.YaCySchema; -import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.storage.ARC; import net.yacy.cora.storage.ConcurrentARC; @@ -419,25 +419,26 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return count.get(); } - /** - * get a facet of the index: a list of values that are most common in a specific field - * @param field the field which is selected for the facet - * @param maxresults the maximum size of the resulting map - * @return an ordered map of fields - * @throws IOException - */ - public ReversibleScoreMap getFacet(String field, int maxresults) throws IOException { - if (this.solr0 == null && this.solr1 == null) return new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); + @Override + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException { + if (this.solr0 == null && this.solr1 == null) return new HashMap>(0); if (this.solr0 != null && this.solr1 == null) { - return this.solr0.getFacet(field, maxresults); + return this.solr0.getFacets(query, fields, maxresults); } if (this.solr1 != null && this.solr0 == null) { - return this.solr1.getFacet(field, maxresults); + return this.solr1.getFacets(query, fields, maxresults); + } + Map> facets0 = this.solr0.getFacets(query, fields, maxresults); + Map> facets1 = this.solr1.getFacets(query, fields, maxresults); + for (Map.Entry> facet0: facets0.entrySet()) { + ReversibleScoreMap facet1 = facets1.remove(facet0.getKey()); + if (facet1 == null) continue; + for (String key: facet1) facet0.getValue().inc(key, facet1.get(key)); + } + for (Map.Entry> facet1: facets1.entrySet()) { + facets0.put(facet1.getKey(), facet1.getValue()); } - ReversibleScoreMap facet0 = this.solr0.getFacet(field, maxresults); - ReversibleScoreMap facet1 = this.solr1.getFacet(field, maxresults); - for (String key: facet1) facet0.inc(key, facet1.get(key)); - return facet0; + return facets0; } private void addToCache(SolrDocumentList list) { diff --git a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java index 08854306e..9040aebde 100644 --- a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java @@ -23,6 +23,7 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import net.yacy.cora.sorting.ReversibleScoreMap; @@ -183,8 +184,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr } @Override - public ReversibleScoreMap getFacet(final String field, final int maxresults) throws IOException { - return this.solr.getFacet(field, maxresults); + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException { + return this.solr.getFacets(query, fields, maxresults); } @Override diff --git a/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java index d53fba5ab..814a67019 100644 --- a/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java @@ -22,10 +22,10 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; -import net.yacy.cora.document.UTF8; -import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; import org.apache.solr.client.solrj.response.QueryResponse; @@ -225,18 +225,18 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon } @Override - public ReversibleScoreMap getFacet(final String field, final int maxresults) throws IOException { + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException { final long t = System.currentTimeMillis() + this.retryMaxTime; Throwable ee = null; while (System.currentTimeMillis() < t) try { - return this.solrConnector.getFacet(field, maxresults); + return this.solrConnector.getFacets(query, fields, maxresults); } catch (final Throwable e) { ee = e; try {Thread.sleep(10);} catch (final InterruptedException e1) {} continue; } if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage()); - return new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); + return new HashMap>(); } @Override diff --git a/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java index f67988521..c09bcb3b6 100644 --- a/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java @@ -24,11 +24,11 @@ import java.io.IOException; import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; -import net.yacy.cora.document.UTF8; -import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.protocol.Domains; @@ -225,22 +225,24 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon return count.get(); } - /** - * get a facet of the index: a list of values that are most common in a specific field - * @param field the field which is selected for the facet - * @param maxresults the maximum size of the resulting map - * @return an ordered map of fields - * @throws IOException - */ - public ReversibleScoreMap getFacet(final String field, final int maxresults) throws IOException { - ReversibleScoreMap acc = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); + @Override + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException { + Map> facets = new HashMap>(); for (final SolrConnector connector: this.connectors) { - ReversibleScoreMap peer = connector.getFacet(field, maxresults); - for (String key: peer) acc.inc(key, peer.get(key)); + Map> peer = connector.getFacets(query, fields, maxresults); + innerloop: for (Map.Entry> facet: facets.entrySet()) { + ReversibleScoreMap peerfacet = peer.remove(facet.getKey()); + if (peerfacet == null) continue innerloop; + for (String key: peerfacet) facet.getValue().inc(key, peerfacet.get(key)); + } + for (Map.Entry> peerfacet: peer.entrySet()) { + facets.put(peerfacet.getKey(), peerfacet.getValue()); + } } - return acc; + return facets; } + public long[] getSizeList() { final long[] size = new long[this.connectors.size()]; int i = 0; diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 78b7ddfe5..f21763b9f 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -23,6 +23,7 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.concurrent.BlockingQueue; import net.yacy.cora.sorting.ReversibleScoreMap; @@ -134,13 +135,14 @@ public interface SolrConnector extends Iterable /* Iterable of document public long getQueryCount(final String querystring) throws IOException; /** - * get a facet of the index: a list of values that are most common in a specific field - * @param field the field which is selected for the facet - * @param maxresults the maximum size of the resulting map - * @return an ordered map of fields + * get facets of the index: a list of lists with values that are most common in a specific field + * @param query a query which is performed to get the facets + * @param fields the field names which are selected as facet + * @param maxresults the maximum size of the resulting maps + * @return a map with key = facet field name, value = an ordered map of field values for that field * @throws IOException */ - public ReversibleScoreMap getFacet(String field, int maxresults) throws IOException; + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException; /** * Get a query result from solr as a stream of documents. diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index 301635f97..0f225e7ed 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -24,7 +24,9 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import net.yacy.cora.document.UTF8; import net.yacy.cora.sorting.ClusteredScoreMap; @@ -261,34 +263,38 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen final SolrDocumentList docs = rsp.getResults(); return docs.getNumFound(); } - + /** - * get a facet of the index: a list of values that are most common in a specific field - * @param field the field which is selected for the facet - * @param maxresults the maximum size of the resulting map - * @return an ordered map of fields + * get facets of the index: a list of lists with values that are most common in a specific field + * @param query a query which is performed to get the facets + * @param fields the field names which are selected as facet + * @param maxresults the maximum size of the resulting maps + * @return a map with key = facet field name, value = an ordered map of field values for that field * @throws IOException */ - @Override - public ReversibleScoreMap getFacet(String field, int maxresults) throws IOException { + public Map> getFacets(String query, String[] fields, int maxresults) throws IOException { // construct query final SolrQuery params = new SolrQuery(); - params.setQuery("*:*"); + params.setQuery(query); params.setRows(0); params.setStart(0); params.setFacet(true); params.setFacetLimit(maxresults); params.setFacetSort(FacetParams.FACET_SORT_COUNT); - params.addFacetField(field); + for (String field: fields) params.addFacetField(field); // query the server QueryResponse rsp = query(params); - FacetField facet = rsp.getFacetField(field); - ReversibleScoreMap result = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); - List values = facet.getValues(); - if (values == null) return result; - for (Count ff: values) result.set(ff.getName(), (int) ff.getCount()); - return result; + Map> facets = new HashMap>(fields.length); + for (String field: fields) { + FacetField facet = rsp.getFacetField(field); + ReversibleScoreMap result = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); + List values = facet.getValues(); + if (values == null) continue; + for (Count ff: values) result.set(ff.getName(), (int) ff.getCount()); + facets.put(field, result); + } + return facets; } @Override diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java index 7ec8740c8..771caab8c 100644 --- a/source/net/yacy/crawler/data/ZURL.java +++ b/source/net/yacy/crawler/data/ZURL.java @@ -37,7 +37,6 @@ import java.util.concurrent.LinkedBlockingQueue; import org.apache.solr.common.SolrInputDocument; -import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.order.Base64Order; @@ -347,44 +346,5 @@ public class ZURL implements Iterable { } - private class kiter implements Iterator { - // enumerates entry elements - private final Iterator i; - private boolean error = false; - - private kiter(final boolean up, final String firstHash) throws IOException { - this.i = ZURL.this.urlIndex.rows(up, (firstHash == null) ? null : ASCII.getBytes(firstHash)); - this.error = false; - } - - @Override - public boolean hasNext() { - if (this.error) return false; - return this.i.hasNext(); - } - - @Override - public Entry next() throws RuntimeException { - final Row.Entry e = this.i.next(); - if (e == null) return null; - try { - return new Entry(e); - } catch (final IOException ex) { - throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getPrimaryKeyASCII()); - } - } - - @Override - public void remove() { - this.i.remove(); - } - - } - - public Iterator entries(final boolean up, final String firstHash) throws IOException { - // enumerates entry elements - return new kiter(up, firstHash); - } - } diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 912250f97..22bd3e256 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -350,6 +350,7 @@ public final class Fulltext implements Iterable { for (byte[] urlHash: deleteIDs) { Fulltext.this.solr.delete(ASCII.String(urlHash)); } + Fulltext.this.solr.commit(); } } catch (final Throwable e) { Log.logException(e);