fixed generation of ontologies from index enumerations

pull/1/head
orbiter 13 years ago
parent 7cd302de3e
commit 29171e2f6c

@ -38,7 +38,7 @@ import org.apache.solr.common.SolrException;
public abstract class AbstractSolrConnector implements SolrConnector { public abstract class AbstractSolrConnector implements SolrConnector {
public final SolrDocument POISON_DOCUMENT = new SolrDocument(); public final SolrDocument POISON_DOCUMENT = new SolrDocument();
public final String POISON_ID = "POISON_ID"; public final static String POISON_ID = "POISON_ID";
public final static SolrQuery catchallQuery = new SolrQuery(); public final static SolrQuery catchallQuery = new SolrQuery();
static { static {
catchallQuery.setQuery("*:*"); catchallQuery.setQuery("*:*");
@ -102,13 +102,14 @@ public abstract class AbstractSolrConnector implements SolrConnector {
try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;} try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;}
} }
if (sdl.size() < pagesize) break; if (sdl.size() < pagesize) break;
o += pagesize;
} catch (SolrException e) { } catch (SolrException e) {
break; break;
} catch (IOException e) { } catch (IOException e) {
break; break;
} }
} }
try {queue.put(AbstractSolrConnector.this.POISON_ID);} catch (InterruptedException e1) {} try {queue.put(AbstractSolrConnector.POISON_ID);} catch (InterruptedException e1) {}
} }
}; };
t.start(); t.start();
@ -123,7 +124,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
protected String next0() { protected String next0() {
try { try {
String s = queue.poll(60000, TimeUnit.MILLISECONDS); String s = queue.poll(60000, TimeUnit.MILLISECONDS);
if (s == AbstractSolrConnector.this.POISON_ID) return null; if (s == AbstractSolrConnector.POISON_ID) return null;
return s; return s;
} catch (InterruptedException e) { } catch (InterruptedException e) {
return null; return null;

@ -90,7 +90,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override @Override
public long getSize() { public long getSize() {
try { try {
final QueryResponse rsp = this.server.query(AbstractSolrConnector.catchallQuery); final QueryResponse rsp = query(AbstractSolrConnector.catchallQuery);
if (rsp == null) return 0; if (rsp == null) return 0;
final SolrDocumentList docs = rsp.getResults(); final SolrDocumentList docs = rsp.getResults();
if (docs == null) return 0; if (docs == null) return 0;
@ -232,7 +232,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
// query the server // query the server
try { try {
final QueryResponse rsp = this.server.query(query); final QueryResponse rsp = query(query);
final SolrDocumentList docs = rsp.getResults(); final SolrDocumentList docs = rsp.getResults();
if (docs.isEmpty()) return null; if (docs.isEmpty()) return null;
return docs.get(0); return docs.get(0);

@ -33,12 +33,14 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.ByteOrder; import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.services.federated.solr.AbstractSolrConnector;
import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
@ -54,7 +56,6 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
@ -194,28 +195,6 @@ public class Segment {
return this.fulltext.exists(urlhash); return this.fulltext.exists(urlhash);
} }
/**
* discover all urls that belong to a specific host
* and return an iterator for the url hashes of those urls
* @param host
* @return an iterator for all url hashes that belong to a specific host
*/
private Iterator<byte[]> hostSelector(String host) {
String hh = DigestURI.hosthash(host);
final HandleSet ref = new RowHandleSet(12, Base64Order.enhancedCoder, 100);
for (byte[] b: this.fulltext) {
if (hh.equals(ASCII.String(b, 6, 6))) {
try {
ref.putUnique(b);
} catch (SpaceExceededException e) {
Log.logException(e);
break;
}
}
}
return ref.iterator();
}
/** /**
* discover all urls that start with a given url stub * discover all urls that start with a given url stub
* @param stub * @param stub
@ -223,37 +202,28 @@ public class Segment {
*/ */
public Iterator<DigestURI> urlSelector(MultiProtocolURI stub) { public Iterator<DigestURI> urlSelector(MultiProtocolURI stub) {
final String host = stub.getHost(); final String host = stub.getHost();
final Iterator<byte[]> bi = hostSelector(host); String hh = DigestURI.hosthash(host);
final String urlstub = stub.toNormalform(false, false); final BlockingQueue<String> hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000);
// get all urls from the specific domain final String urlstub = stub.toNormalform(false, false);
final Iterator<DigestURI> urls = new Iterator<DigestURI>() {
@Override
public boolean hasNext() {
return bi.hasNext();
}
@Override
public DigestURI next() {
URIMetadata umr = Segment.this.fulltext.getMetadata(bi.next());
return umr.url();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
// now filter the stub from the iterated urls // now filter the stub from the iterated urls
return new LookAheadIterator<DigestURI>() { return new LookAheadIterator<DigestURI>() {
@Override @Override
protected DigestURI next0() { protected DigestURI next0() {
DigestURI u; while (true) {
while (urls.hasNext()) { String id;
u = urls.next(); try {
if (u.toNormalform(false, false).startsWith(urlstub)) return u; id = hostQueue.take();
} } catch (InterruptedException e) {
Log.logException(e);
return null; return null;
} }
if (id == null || id == AbstractSolrConnector.POISON_ID) return null;
DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url();
if (u.toNormalform(true, false).startsWith(urlstub)) return u;
}
}
}; };
} }

Loading…
Cancel
Save