fixed generation of ontologies from index enumerations

pull/1/head
orbiter 13 years ago
parent 7cd302de3e
commit 29171e2f6c

@ -38,7 +38,7 @@ import org.apache.solr.common.SolrException;
public abstract class AbstractSolrConnector implements SolrConnector {
public final SolrDocument POISON_DOCUMENT = new SolrDocument();
public final String POISON_ID = "POISON_ID";
public final static String POISON_ID = "POISON_ID";
public final static SolrQuery catchallQuery = new SolrQuery();
static {
catchallQuery.setQuery("*:*");
@ -102,13 +102,14 @@ public abstract class AbstractSolrConnector implements SolrConnector {
try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;}
}
if (sdl.size() < pagesize) break;
o += pagesize;
} catch (SolrException e) {
break;
} catch (IOException e) {
break;
}
}
try {queue.put(AbstractSolrConnector.this.POISON_ID);} catch (InterruptedException e1) {}
try {queue.put(AbstractSolrConnector.POISON_ID);} catch (InterruptedException e1) {}
}
};
t.start();
@ -123,7 +124,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
protected String next0() {
try {
String s = queue.poll(60000, TimeUnit.MILLISECONDS);
if (s == AbstractSolrConnector.this.POISON_ID) return null;
if (s == AbstractSolrConnector.POISON_ID) return null;
return s;
} catch (InterruptedException e) {
return null;

@ -90,7 +90,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override
public long getSize() {
try {
final QueryResponse rsp = this.server.query(AbstractSolrConnector.catchallQuery);
final QueryResponse rsp = query(AbstractSolrConnector.catchallQuery);
if (rsp == null) return 0;
final SolrDocumentList docs = rsp.getResults();
if (docs == null) return 0;
@ -232,7 +232,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
// query the server
try {
final QueryResponse rsp = this.server.query(query);
final QueryResponse rsp = query(query);
final SolrDocumentList docs = rsp.getResults();
if (docs.isEmpty()) return null;
return docs.get(0);

@ -33,12 +33,14 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.services.federated.solr.AbstractSolrConnector;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
@ -54,7 +56,6 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
@ -194,28 +195,6 @@ public class Segment {
return this.fulltext.exists(urlhash);
}
/**
* discover all urls that belong to a specific host
* and return an iterator for the url hashes of those urls
* @param host
* @return an iterator for all url hashes that belong to a specific host
*/
private Iterator<byte[]> hostSelector(String host) {
String hh = DigestURI.hosthash(host);
final HandleSet ref = new RowHandleSet(12, Base64Order.enhancedCoder, 100);
for (byte[] b: this.fulltext) {
if (hh.equals(ASCII.String(b, 6, 6))) {
try {
ref.putUnique(b);
} catch (SpaceExceededException e) {
Log.logException(e);
break;
}
}
}
return ref.iterator();
}
/**
* discover all urls that start with a given url stub
* @param stub
@ -223,36 +202,27 @@ public class Segment {
*/
public Iterator<DigestURI> urlSelector(MultiProtocolURI stub) {
final String host = stub.getHost();
final Iterator<byte[]> bi = hostSelector(host);
String hh = DigestURI.hosthash(host);
final BlockingQueue<String> hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000);
final String urlstub = stub.toNormalform(false, false);
// get all urls from the specific domain
final Iterator<DigestURI> urls = new Iterator<DigestURI>() {
@Override
public boolean hasNext() {
return bi.hasNext();
}
@Override
public DigestURI next() {
URIMetadata umr = Segment.this.fulltext.getMetadata(bi.next());
return umr.url();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
// now filter the stub from the iterated urls
return new LookAheadIterator<DigestURI>() {
@Override
protected DigestURI next0() {
DigestURI u;
while (urls.hasNext()) {
u = urls.next();
if (u.toNormalform(false, false).startsWith(urlstub)) return u;
while (true) {
String id;
try {
id = hostQueue.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
if (id == null || id == AbstractSolrConnector.POISON_ID) return null;
DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url();
if (u.toNormalform(true, false).startsWith(urlstub)) return u;
}
return null;
}
};
}

Loading…
Cancel
Save