From 29171e2f6c52698b13f06cc2ac4cea730e204a00 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 24 Aug 2012 14:13:42 +0200 Subject: [PATCH] fixed generation of ontologies from index enumerations --- .../federated/solr/AbstractSolrConnector.java | 7 ++- .../federated/solr/SolrServerConnector.java | 4 +- source/net/yacy/search/index/Segment.java | 62 +++++-------------- 3 files changed, 22 insertions(+), 51 deletions(-) diff --git a/source/net/yacy/cora/services/federated/solr/AbstractSolrConnector.java b/source/net/yacy/cora/services/federated/solr/AbstractSolrConnector.java index 01810b296..af0ac788b 100644 --- a/source/net/yacy/cora/services/federated/solr/AbstractSolrConnector.java +++ b/source/net/yacy/cora/services/federated/solr/AbstractSolrConnector.java @@ -38,7 +38,7 @@ import org.apache.solr.common.SolrException; public abstract class AbstractSolrConnector implements SolrConnector { public final SolrDocument POISON_DOCUMENT = new SolrDocument(); - public final String POISON_ID = "POISON_ID"; + public final static String POISON_ID = "POISON_ID"; public final static SolrQuery catchallQuery = new SolrQuery(); static { catchallQuery.setQuery("*:*"); @@ -102,13 +102,14 @@ public abstract class AbstractSolrConnector implements SolrConnector { try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;} } if (sdl.size() < pagesize) break; + o += pagesize; } catch (SolrException e) { break; } catch (IOException e) { break; } } - try {queue.put(AbstractSolrConnector.this.POISON_ID);} catch (InterruptedException e1) {} + try {queue.put(AbstractSolrConnector.POISON_ID);} catch (InterruptedException e1) {} } }; t.start(); @@ -123,7 +124,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { protected String next0() { try { String s = queue.poll(60000, TimeUnit.MILLISECONDS); - if (s == AbstractSolrConnector.this.POISON_ID) return null; + if (s == AbstractSolrConnector.POISON_ID) return null; return s; } catch (InterruptedException e) { return null; diff --git a/source/net/yacy/cora/services/federated/solr/SolrServerConnector.java b/source/net/yacy/cora/services/federated/solr/SolrServerConnector.java index 81df27270..e72dc8e58 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrServerConnector.java +++ b/source/net/yacy/cora/services/federated/solr/SolrServerConnector.java @@ -90,7 +90,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen @Override public long getSize() { try { - final QueryResponse rsp = this.server.query(AbstractSolrConnector.catchallQuery); + final QueryResponse rsp = query(AbstractSolrConnector.catchallQuery); if (rsp == null) return 0; final SolrDocumentList docs = rsp.getResults(); if (docs == null) return 0; @@ -232,7 +232,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen // query the server try { - final QueryResponse rsp = this.server.query(query); + final QueryResponse rsp = query(query); final SolrDocumentList docs = rsp.getResults(); if (docs.isEmpty()) return null; return docs.get(0); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 1c72550e3..052be2496 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -33,12 +33,14 @@ import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.concurrent.BlockingQueue; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.order.ByteOrder; import net.yacy.cora.protocol.ResponseHeader; +import net.yacy.cora.services.federated.solr.AbstractSolrConnector; import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.SpaceExceededException; @@ -54,7 +56,6 @@ import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceRow; -import net.yacy.kelondro.index.RowHandleSet; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Bitfield; @@ -194,28 +195,6 @@ public class Segment { return this.fulltext.exists(urlhash); } - /** - * discover all urls that belong to a specific host - * and return an iterator for the url hashes of those urls - * @param host - * @return an iterator for all url hashes that belong to a specific host - */ - private Iterator hostSelector(String host) { - String hh = DigestURI.hosthash(host); - final HandleSet ref = new RowHandleSet(12, Base64Order.enhancedCoder, 100); - for (byte[] b: this.fulltext) { - if (hh.equals(ASCII.String(b, 6, 6))) { - try { - ref.putUnique(b); - } catch (SpaceExceededException e) { - Log.logException(e); - break; - } - } - } - return ref.iterator(); - } - /** * discover all urls that start with a given url stub * @param stub @@ -223,36 +202,27 @@ public class Segment { */ public Iterator urlSelector(MultiProtocolURI stub) { final String host = stub.getHost(); - final Iterator bi = hostSelector(host); + String hh = DigestURI.hosthash(host); + final BlockingQueue hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000); + final String urlstub = stub.toNormalform(false, false); - // get all urls from the specific domain - final Iterator urls = new Iterator() { - @Override - public boolean hasNext() { - return bi.hasNext(); - } - @Override - public DigestURI next() { - URIMetadata umr = Segment.this.fulltext.getMetadata(bi.next()); - return umr.url(); - } - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; - // now filter the stub from the iterated urls return new LookAheadIterator() { @Override protected DigestURI next0() { - DigestURI u; - while (urls.hasNext()) { - u = urls.next(); - if (u.toNormalform(false, false).startsWith(urlstub)) return u; + while (true) { + String id; + try { + id = hostQueue.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + if (id == null || id == AbstractSolrConnector.POISON_ID) return null; + DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url(); + if (u.toNormalform(true, false).startsWith(urlstub)) return u; } - return null; } }; }