changed strategy to test existence of documents in Solr: using the

update time. The reason for that is a better caching for the crawler double-check, which needs the update time for crawler steering.
11 years ago · 69391e5d9e
parent 790f103f32
commit 69391e5d9e
17 changed files with 182 additions and 248 deletions
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@ -70,7 +70,7 @@ public class HostBrowser {
        LINK, INDEX, EXCLUDED, FAILED, RELOAD;
    }
    
-    @SuppressWarnings({ "deprecation", "unchecked" })
+    @SuppressWarnings({ "unchecked" })
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
@ -125,7 +125,7 @@ public class HostBrowser {

        String load = post.get("load", "");
        boolean wait = false;
-        if (loadRight && autoload && path.length() != 0 && pathURI != null && load.length() == 0 && !sb.index.exists(ASCII.String(pathURI.hash()))) {
+        if (loadRight && autoload && path.length() != 0 && pathURI != null && load.length() == 0 && sb.index.getLoadTime(ASCII.String(pathURI.hash())) < 0) {
            // in case that the url does not exist and loading is wanted turn this request into a loading request
            load = path;
            wait = true;
@ -144,7 +144,7 @@ public class HostBrowser {
                    ));
                prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load) : ("not indexed url '" + load + "': " + reasonString));
                if (wait) for (int i = 0; i < 30; i++) {
-                    if (sb.index.exists(ASCII.String(url.hash()))) break;
+                    if (sb.index.getLoadTime(ASCII.String(url.hash())) >= 0) break;
                    try {Thread.sleep(100);} catch (final InterruptedException e) {}
                }
            } catch (final MalformedURLException e) {
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@ -73,7 +73,6 @@ public class IndexControlRWIs_p {

    private final static String errmsg = "not possible to compute word from hash";

-    @SuppressWarnings("deprecation")
    public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
@ -281,7 +280,7 @@ public class IndexControlRWIs_p {
                        Reference iEntry;
                        while (urlIter.hasNext()) {
                            iEntry = urlIter.next();
-                            if (!segment.fulltext().exists(ASCII.String(iEntry.urlhash()))) {
+                            if (segment.fulltext().getLoadTime(ASCII.String(iEntry.urlhash())) >= 0) {
                                try {
                                    unknownURLEntries.put(iEntry.urlhash());
                                } catch (final SpaceExceededException e) {
--- a/htroot/Load_RSS_p.java
+++ b/htroot/Load_RSS_p.java
@ -292,12 +292,12 @@ public class Load_RSS_p {
                    ConcurrentLog.logException(e);
                }
            }
-            Map<String, HarvestProcess> existingurls = sb.urlExists(messages.keySet());
            loop: for (final Map.Entry<String, RSSMessage> entry: messages.entrySet()) {
                try {
                    final RSSMessage message = entry.getValue();
                    final DigestURL messageurl = new DigestURL(message.getLink());
-                    if (existingurls.get(ASCII.String(messageurl.hash())) != null) continue loop;
+                    HarvestProcess harvestProcess = sb.urlExists(ASCII.String(messageurl.hash()));
+                    if (harvestProcess != null) continue loop;
                    list.add(messageurl);
                    RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
                } catch (final IOException e) {
@ -344,7 +344,6 @@ public class Load_RSS_p {
                    continue;
                }
            }
-            Map<String, HarvestProcess> ids = sb.urlExists(urls.keySet());
            
            int i = 0;
            for (final Hit item: feed) {
@ -353,7 +352,8 @@ public class Load_RSS_p {
                    author = item.getAuthor();
                    if (author == null) author = item.getCopyright();
                    pubDate = item.getPubDate();
-                    prop.put("showitems_item_" + i + "_state", ids.get(ASCII.String(messageurl.hash())) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
+                    HarvestProcess harvestProcess = sb.urlExists(ASCII.String(messageurl.hash()));
+                    prop.put("showitems_item_" + i + "_state", harvestProcess != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
                    prop.put("showitems_item_" + i + "_state_count", i);
                    prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
                    prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@ -232,10 +232,9 @@ public final class transferRWI {
                testids.add(ASCII.String(urlHash));
                received++;
            }
-            Set<String> existing = sb.index.fulltext().exists(testids);
            for (String id: testids) {
                try {
-                    if (existing.contains(id)) {
+                    if (sb.index.fulltext().getLoadTime(id) >= 0) {
                        knownURL.put(ASCII.getBytes(id));
                    } else {
                        unknownURL.put(ASCII.getBytes(id));
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@ -30,7 +30,6 @@ import java.io.IOException;
 import java.text.ParseException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Set;

 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.encoding.ASCII;
@ -144,10 +143,9 @@ public final class transferURL {
                lEm.put(ASCII.String(lEntry.hash()), lEntry);
            }
            
-            Set<String> doubles = sb.index.exists(lEm.keySet());
-            doublecheck = doubles.size();
+            doublecheck = 0;
            for (String id : lEm.keySet()) {
-                if (!doubles.contains(id)) {
+                if (sb.index.getLoadTime(id) < 0) {
                    lEntry = lEm.get(id);

                    // write entry to database
@ -160,6 +158,8 @@ public final class transferURL {
                    } catch (final IOException e) {
                        ConcurrentLog.logException(e);
                    }
+                } else {
+                    doublecheck++;
                }
            }

--- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
@ -21,6 +21,7 @@
 package net.yacy.cora.federate.solr.connector;

 import java.io.IOException;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -45,11 +46,20 @@ import org.apache.solr.client.solrj.response.FacetField.Count;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.ModifiableSolrParams;

 public abstract class AbstractSolrConnector implements SolrConnector {

+    protected static Set<String> SOLR_ID_FIELDS = new HashSet<String>();
+    protected static Set<String> SOLR_ID_and_LOAD_DATE_FIELDS = new HashSet<String>();
+    static {
+        SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName());
+        SOLR_ID_and_LOAD_DATE_FIELDS.add(CollectionSchema.id.getSolrFieldName());
+        SOLR_ID_and_LOAD_DATE_FIELDS.add(CollectionSchema.load_date_dt.getSolrFieldName());
+    }
+    
    public final static SolrDocument POISON_DOCUMENT = new SolrDocument();
    public final static String POISON_ID = "POISON_ID";
    public final static String CATCHALL_TERM = "*:*";
@ -72,6 +82,42 @@ public abstract class AbstractSolrConnector implements SolrConnector {
    }
    protected final static int pagesize = 100;
    
+    protected static long getLoadDate(final Object doc) {
+        Object d = null;
+        if (doc != null) {
+            if (doc instanceof SolrInputDocument) d = ((SolrInputDocument) doc).getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
+            if (doc instanceof SolrDocument) d = ((SolrDocument) doc).getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
+            if (doc instanceof org.apache.lucene.document.Document) {
+                String ds = ((org.apache.lucene.document.Document) doc).get(CollectionSchema.load_date_dt.getSolrFieldName());
+                try {
+                    d = Long.parseLong(ds);
+                } catch (NumberFormatException e) {
+                    d = -1l;
+                }
+            }
+        }
+        if (d == null) return -1l;
+        if (d instanceof Long) return ((Long) d).longValue();
+        if (d instanceof Date) return ((Date) d).getTime();
+        return -1l;
+    }
+
+    /**
+     * check if fields contain id and load_date_dt date
+     * @param fields
+     * @return fields with added id and load_date_dt if necessary
+     */
+    protected static String[] ensureEssentialFieldsIncluded(String[] fields) {
+        if (fields != null && fields.length > 0) {
+            Set<String> f = new HashSet<String>();
+            for (String s: fields) f.add(s);
+            f.add(CollectionSchema.id.getSolrFieldName());
+            f.add(CollectionSchema.load_date_dt.getSolrFieldName());
+            fields = f.toArray(new String[f.size()]);
+        }
+        return fields;
+    }
+    
    /**
     * Get a query result from solr as a stream of documents.
     * The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
@ -191,62 +237,31 @@ public abstract class AbstractSolrConnector implements SolrConnector {
    }
    
    /**
-     * check if a given document, identified by url hash as ducument id exists
+     * check if a given document, identified by url hash as document id exists
     * @param id the url hash and document id
-     * @return true if any entry in solr exists
+     * @return the load date if any entry in solr exists, -1 otherwise
     * @throws IOException
     */
    @Override
-    public boolean existsById(String id) throws IOException {
+    public long getLoadTime(String id) throws IOException {
        // construct raw query
        final SolrQuery params = new SolrQuery();
        //params.setQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\"");
        params.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id);
        //params.set("defType", "raw");
-        params.setRows(0);
-        params.setStart(0);
-        params.setFacet(false);
-        params.clearSorts();
-        params.setFields(CollectionSchema.id.getSolrFieldName());
-        params.setIncludeScore(false);
-
-        // query the server
-        return getDocumentCountByParams(params) > 0;
-    }
-
-    /**
-     * check a set of ids for existence.
-     * @param ids a collection of document ids
-     * @return a collection of a subset of the ids which exist in the index
-     * @throws IOException
-     */
-    public Set<String> existsByIds(Set<String> ids) throws IOException {
-        if (ids == null || ids.size() == 0) return new HashSet<String>();
-        // construct raw query
-        final SolrQuery params = new SolrQuery();
-        //params.setQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\"");
-        StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
-        for (String id: ids) {
-            sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
-        }
-        if (sb.length() > 0) sb.setLength(sb.length() - 4); // cut off the last 'or'
-        params.setQuery(sb.toString());
-        //params.set("defType", "raw");
-        params.setRows(ids.size()); // we want all lines
+        params.setRows(1);
        params.setStart(0);
        params.setFacet(false);
        params.clearSorts();
-        params.setFields(CollectionSchema.id.getSolrFieldName());
+        params.setFields(CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName());
        params.setIncludeScore(false);

        // query the server
-        final SolrDocumentList docs = getDocumentListByParams(params);
-        // construct a new id list from that
-        HashSet<String> idsr = new HashSet<String>();
-        for (SolrDocument doc : docs) {
-            idsr.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
-        }
-        return idsr;
+        final SolrDocumentList sdl = getDocumentListByParams(params);
+        if (sdl == null || sdl.getNumFound() <= 0) return -1;
+        SolrDocument doc = sdl.iterator().next();
+        long d = getLoadDate(doc);
+        return d;
    }
    
    /**
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@ -23,17 +23,15 @@ package net.yacy.cora.federate.solr.connector;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;

 import net.yacy.cora.sorting.ReversibleScoreMap;
-import net.yacy.cora.storage.ARH;
-import net.yacy.cora.storage.ConcurrentARH;
+import net.yacy.cora.storage.ARC;
+import net.yacy.cora.storage.ConcurrentARC;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.kelondro.util.MemoryControl;
 import net.yacy.search.schema.CollectionSchema;
@ -68,7 +66,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
                    try {
                        removeIdFromUpdateQueue(id);
                        ConcurrentUpdateSolrConnector.this.connector.deleteById(id);
-                        ConcurrentUpdateSolrConnector.this.idCache.delete(id);
+                        ConcurrentUpdateSolrConnector.this.idCache.remove(id);
                    } catch (final IOException e) {
                        ConcurrentLog.logException(e);
                    }
@ -90,7 +88,9 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
                        // accumulate a collection of documents because that is better to send at once to a remote server
                        Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(getmore + 1);
                        docs.add(doc);
-                        updateIdCache((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+                        String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
+                        long date = AbstractSolrConnector.getLoadDate(doc);
+                        updateIdCache(id, date);
                        for (int i = 0; i < getmore; i++) {
                            SolrInputDocument d = ConcurrentUpdateSolrConnector.this.updateQueue.take();
                            if (d == POISON_DOCUMENT) {
@ -98,7 +98,9 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
                                break;
                            }
                            docs.add(d);
-                            updateIdCache((String) d.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+                            id = (String) d.getFieldValue(CollectionSchema.id.getSolrFieldName());
+                            date = AbstractSolrConnector.getLoadDate(d);
+                            updateIdCache(id, date);
                        }
                        //ConcurrentLog.info("ConcurrentUpdateSolrConnector", "sending " + docs.size() + " documents to solr");
                        try {
@ -109,7 +111,9 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
                    } else {
                        // if there is only a single document, send this directly to solr
                        //ConcurrentLog.info("ConcurrentUpdateSolrConnector", "sending one document to solr");
-                        updateIdCache((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+                        String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
+                        long date = AbstractSolrConnector.getLoadDate(doc);
+                        updateIdCache(id, date);
                        try {
                            ConcurrentUpdateSolrConnector.this.connector.add(doc);
                        } catch (final OutOfMemoryError e) {
@ -131,14 +135,14 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
        }
    }
    
-    private ARH<String> idCache;
+    private ARC<String, Long> idCache;
    private BlockingQueue<SolrInputDocument> updateQueue;
    private BlockingQueue<String> deleteQueue;
    private Thread deletionHandler, updateHandler;
    
    public ConcurrentUpdateSolrConnector(SolrConnector connector, int updateCapacity, int idCacheCapacity, int concurrency) {
        this.connector = connector;
-        this.idCache = new ConcurrentARH<String>(idCacheCapacity, concurrency);
+        this.idCache = new ConcurrentARC<String, Long>(idCacheCapacity, concurrency); // url hash to load time
        this.updateQueue = new ArrayBlockingQueue<SolrInputDocument>(updateCapacity);
        this.deleteQueue = new LinkedBlockingQueue<String>();
        this.deletionHandler = null;
@ -162,7 +166,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
     * used for debugging
     */
    private static void cacheSuccessSign() {
-        //Log.logInfo("ConcurrentUpdate", "**** cache hit");
+        //ConcurrentLog.info("ConcurrentUpdate", "**** cache hit");
    }
    
    private boolean existIdFromDeleteQueue(String id) {
@ -188,16 +192,16 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
        return null;
    }

-    private boolean existIdFromUpdateQueue(String id) {
-        if (this.updateQueue.size() == 0) return false;
+    private long existIdFromUpdateQueue(String id) {
+        if (this.updateQueue.size() == 0) return -1;
        Iterator<SolrInputDocument> i = this.updateQueue.iterator();
        while (i.hasNext()) {
            SolrInputDocument doc = i.next();
            if (doc == null) break;
            String docID = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
-            if (docID != null && docID.equals(id)) return true;
+            if (docID != null && docID.equals(id)) return AbstractSolrConnector.getLoadDate(doc);
        }
-        return false;
+        return -1;
    }

    private void removeIdFromUpdateQueue(String id) {
@ -227,10 +231,10 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
        }
    }
    
-    private void updateIdCache(String id) {
+    private void updateIdCache(String id, long time) {
        if (id == null) return;
        if (MemoryControl.shortStatus()) this.idCache.clear();
-        this.idCache.add(id);
+        this.idCache.put(id, time);
    }
    
    public void ensureAliveDeletionHandler() {
@ -312,7 +316,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
    @Override
    public void deleteById(String id) throws IOException {
        removeIdFromUpdateQueue(id);
-        this.idCache.delete(id);
+        this.idCache.remove(id);
        if (this.deletionHandler.isAlive()) {
            try {this.deleteQueue.put(id);} catch (final InterruptedException e) {}
        } else {
@ -324,7 +328,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
    public void deleteByIds(Collection<String> ids) throws IOException {
        for (String id: ids) {
            removeIdFromUpdateQueue(id);
-            this.idCache.delete(id);
+            this.idCache.remove(id);
        }
        if (this.deletionHandler.isAlive()) {
            for (String id: ids) try {this.deleteQueue.put(id);} catch (final InterruptedException e) {}
@ -335,8 +339,8 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {

    @Override
    public void deleteByQuery(final String querystring) throws IOException {
-        new Thread() {
-            public void run() {
+        //new Thread() {
+        //    public void run() {
                ConcurrentUpdateSolrConnector.this.idCache.clear();
                try {
                    ConcurrentUpdateSolrConnector.this.connector.deleteByQuery(querystring);
@ -345,47 +349,30 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
                    ConcurrentLog.severe("ConcurrentUpdateSolrConnector", e.getMessage(), e);
                }
                ConcurrentUpdateSolrConnector.this.connector.commit(true);
-            }
-        }.start();
+        //    }
+        //}.start();
    }

    @Override
-    public boolean existsById(String id) throws IOException {
-        if (this.idCache.contains(id)) {cacheSuccessSign(); return true;}
-        if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); return false;}
-        if (existIdFromUpdateQueue(id)) {cacheSuccessSign(); return true;}
-        if (this.connector.existsById(id)) {
-            updateIdCache(id);
-            return true;
+    public long getLoadTime(String id) throws IOException {
+        Long date = this.idCache.get(id);
+        if (date != null) {cacheSuccessSign(); return date.longValue();}
+        if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); return -1;}
+        long d = existIdFromUpdateQueue(id);
+        if (d >= 0) {cacheSuccessSign(); return d;}
+        d = this.connector.getLoadTime(id);
+        if (d >= 0) {
+            updateIdCache(id, d);
+            return d;
        }
-        return false;
-    }
-
-    @Override
-    public Set<String> existsByIds(Set<String> ids) throws IOException {
-        HashSet<String> e = new HashSet<String>();
-        if (ids == null || ids.size() == 0) return e;
-        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : e;
-        Set<String> idsC = new HashSet<String>();
-        for (String id: ids) {
-            if (this.idCache.contains(id)) {cacheSuccessSign(); e.add(id); continue;}
-            if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); continue;}
-            if (existIdFromUpdateQueue(id)) {cacheSuccessSign(); e.add(id); continue;}
-            idsC.add(id);
-        }
-        Set<String> e1 = this.connector.existsByIds(idsC);
-        for (String id1: e1) {
-            updateIdCache(id1);
-        }
-        e.addAll(e1);
-        return e;
+        return -1;
    }

    @Override
    public void add(SolrInputDocument solrdoc) throws IOException, SolrException {
        String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName());
        removeIdFromDeleteQueue(id);
-        updateIdCache(id);
+        updateIdCache(id, AbstractSolrConnector.getLoadDate(solrdoc));
        if (this.updateHandler.isAlive()) {
            try {this.updateQueue.put(solrdoc);} catch (final InterruptedException e) {}
        } else {
@ -398,7 +385,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
        for (SolrInputDocument doc: solrdocs) {
            String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
            removeIdFromDeleteQueue(id);
-            updateIdCache(id);
+            updateIdCache(id, AbstractSolrConnector.getLoadDate(doc));
        }
        if (this.updateHandler.isAlive()) {
            for (SolrInputDocument doc: solrdocs) try {this.updateQueue.put(doc);} catch (final InterruptedException e) {}
@ -406,14 +393,14 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
            this.connector.add(solrdocs);
        }
    }
-
+    
    @Override
-    public SolrDocument getDocumentById(String id, String... fields) throws IOException {
+    public SolrDocument getDocumentById(final String id, String... fields) throws IOException {
        if (existIdFromDeleteQueue(id)) return null;
        SolrInputDocument idoc = getFromUpdateQueue(id);
        if (idoc != null) {cacheSuccessSign(); return ClientUtils.toSolrDocument(idoc);}
-        SolrDocument doc = this.connector.getDocumentById(id, fields);
-        if (doc != null) updateIdCache(id);
+        SolrDocument doc = this.connector.getDocumentById(id, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields));
+        if (doc != null) updateIdCache(id, AbstractSolrConnector.getLoadDate(doc));
        return doc;
    }

@ -436,7 +423,16 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
    
    @Override
    public SolrDocumentList getDocumentListByQuery(String querystring, int offset, int count, String... fields) throws IOException, SolrException {
-        return this.connector.getDocumentListByQuery(querystring, offset, count, fields);
+        SolrDocumentList sdl = this.connector.getDocumentListByQuery(querystring, offset, count, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields));
+        /*
+        Iterator<SolrDocument> i = sdl.iterator();
+        while (i.hasNext()) {
+            SolrDocument doc = i.next();
+            String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
+            if (doc != null) updateIdCache(id, AbstractSolrConnector.getLoadDate(doc));
+        }
+        */
+        return sdl;
    }

    @Override
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@ -23,10 +23,8 @@ package net.yacy.cora.federate.solr.connector;

 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.TreeSet;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;

@ -73,11 +71,6 @@ import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;

 public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector {
-
-    private static Set<String> SOLR_ID_FIELDS = new HashSet<String>();
-    static {
-        SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName());
-    }
    
    public static final String SELECT = "/select";
    public static final String CONTEXT = "/solr";
@ -385,38 +378,33 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
        return numFound;
    }

+    /**
+     * check if a given document, identified by url hash as document id exists
+     * @param id the url hash and document id
+     * @return the load date if any entry in solr exists, -1 otherwise
+     * @throws IOException
+     */
    @Override
-    public synchronized boolean existsById(String id) {
-        return getCountByQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id) > 0;
-    }
-    
-    @Override
-    public synchronized Set<String> existsByIds(Set<String> ids) {
-        if (ids == null || ids.size() == 0) return new HashSet<String>();
-        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
-        Set<String> idsr = new TreeSet<String>();
-        final SolrQuery params = new SolrQuery();
-        params.setRows(0);
-        params.setStart(0);
-        params.setFacet(false);
-        params.clearSorts();
-        params.setFields(CollectionSchema.id.getSolrFieldName());
-        params.setIncludeScore(false);
-        SolrQueryRequest req = new SolrQueryRequestBase(this.core, params){};
-        req.getContext().put("path", SELECT);
-        req.getContext().put("webapp", CONTEXT);
+    public synchronized long getLoadTime(String id) {
+        int responseCount = 0;
+        SolrIndexSearcher searcher = null;
+        DocListSearcher docListSearcher = null;
        try {
-	        for (String id: ids) {
-	            params.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id);
-	            SolrQueryResponse rsp = new SolrQueryResponse();
-	            this.requestHandler.handleRequest(req, rsp);
-	            DocList response = ((ResultContext) rsp.getValues().get("response")).docs;
-	            if (response.matches() > 0) idsr.add(id);
-	        }
-        } finally {
-        	req.close();
+            docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName());
+            responseCount = docListSearcher.response.size();
+            if (responseCount == 0) return -1;
+            searcher = docListSearcher.request.getSearcher();
+            DocIterator iterator = docListSearcher.response.iterator();
+            //for (int i = 0; i < responseCount; i++) {
+            Document doc = searcher.doc(iterator.nextDoc(), AbstractSolrConnector.SOLR_ID_and_LOAD_DATE_FIELDS);
+            if (doc == null) return -1;
+            return AbstractSolrConnector.getLoadDate(doc);
+            //}
+        } catch (Throwable e) {} finally { 
+            if (searcher != null) try {searcher.close();} catch (IOException e) {}
+            if (docListSearcher != null) docListSearcher.close();
        }
-        return idsr;
+        return -1;
    }
    
    @Override
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@ -23,9 +23,7 @@ package net.yacy.cora.federate.solr.connector;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.atomic.AtomicLong;

@ -391,20 +389,11 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
    }

    @Override
-    public boolean existsById(String id) throws IOException {
-        return (this.solr0 != null && this.solr0.existsById(id)) || (this.solr1 != null && this.solr1.existsById(id));
-    }
-
-    @Override
-    public Set<String> existsByIds(Set<String> ids) throws IOException {
-        if (ids == null || ids.size() == 0) return new HashSet<String>();
-        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
-        if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
-        if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
-        Set<String> s = new HashSet<String>();
-        s.addAll(this.solr0.existsByIds(ids));
-        s.addAll(this.solr1.existsByIds(ids));
-        return s;
+    public long getLoadTime(String id) throws IOException {
+        if (this.solr0 != null && this.solr1 == null) return this.solr0.getLoadTime(id);
+        if (this.solr0 == null && this.solr1 != null) return this.solr1.getLoadTime(id);
+        if (this.solr0 == null && this.solr1 == null) return -1;
+        return Math.max(this.solr0.getLoadTime(id), this.solr1.getLoadTime(id));
    }

    /*
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@ -23,7 +23,6 @@ package net.yacy.cora.federate.solr.connector;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.BlockingQueue;

 import net.yacy.cora.sorting.ReversibleScoreMap;
@ -105,18 +104,10 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
    /**
     * check if a given document, identified by url hash as document id exists
     * @param id the url hash and document id
-     * @return true if any entry in solr exists
+     * @return the load time if any entry in solr exists, -1 otherwise
     * @throws IOException
     */
-    public boolean existsById(final String id) throws IOException;
-    
-    /**
-     * check a set of ids for existence.
-     * @param ids a collection of document ids
-     * @return a collection of a subset of the ids which exist in the index
-     * @throws IOException
-     */
-    public Set<String> existsByIds(Set<String> ids) throws IOException;
+    public long getLoadTime(final String id) throws IOException;

    /**
     * add a solr input document
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@ -399,8 +399,8 @@ public final class CrawlStacker {
        // check if the url is double registered
        String urlhash = ASCII.String(url.hash());
        final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
-        final Date oldDate = this.indexSegment.fulltext().getLoadDate(urlhash); // TODO: combine the exists-query with this one
-        if (oldDate == null) {
+        final long oldTime = this.indexSegment.fulltext().getLoadTime(urlhash);
+        if (oldTime < 0) {
            if (dbocc != null) {
                // do double-check
                if (dbocc == HarvestProcess.ERRORS) {
@ -410,12 +410,13 @@ public final class CrawlStacker {
                return "double in: " + dbocc.toString();
            }
        } else {
-            final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime();
+            final boolean recrawl = profile.recrawlIfOlder() > oldTime;
            if (recrawl) {
                if (CrawlStacker.log.isInfo())
                    CrawlStacker.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
-                        ((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago.");
+                        ((System.currentTimeMillis() - oldTime) / 60000 / 60 / 24) + " days ago.");
            } else {
+                Date oldDate = new Date(oldTime);
                if (dbocc == null) {
                    return "double in: LURL-DB, oldDate = " + oldDate.toString();
                }
--- a/source/net/yacy/crawler/retrieval/RSSLoader.java
+++ b/source/net/yacy/crawler/retrieval/RSSLoader.java
@ -107,9 +107,9 @@ public class RSSLoader extends Thread {
                ConcurrentLog.logException(e);
            }
        }
-        Map<String, HarvestProcess> existingids = sb.urlExists(urlmap.keySet());
        for (final Map.Entry<String, DigestURL> e: urlmap.entrySet()) {
-            if (existingids.get(e.getKey()) != null) continue;
+            HarvestProcess harvestProcess = sb.urlExists(e.getKey());
+            if (harvestProcess != null) continue;
            list.add(e.getValue());
            indexTriggered.insertIfAbsent(ASCII.getBytes(e.getKey()), new Date());
            loadCount++;
--- a/source/net/yacy/crawler/retrieval/SitemapImporter.java
+++ b/source/net/yacy/crawler/retrieval/SitemapImporter.java
@ -82,7 +82,6 @@ public class SitemapImporter extends Thread {
        // check if the url is known and needs to be recrawled
        Date lastMod = entry.lastmod(null);
        if (lastMod != null) {
-            @SuppressWarnings("deprecation")
            final HarvestProcess dbocc = this.sb.urlExists(ASCII.String(nexturlhash));
            if (dbocc != null && dbocc == HarvestProcess.LOADED) {
                // the url was already loaded. we need to check the date
--- a/source/net/yacy/peers/Transmission.java
+++ b/source/net/yacy/peers/Transmission.java
@ -174,11 +174,10 @@ public class Transmission {
                }
                testids.add(ASCII.String(e.urlhash()));
            }
-            Set<String> existingids = Transmission.this.segment.fulltext().exists(testids);
            i = c.entries();
            while (i.hasNext()) {
                final WordReference e = i.next();
-                if (existingids.contains(ASCII.String(e.urlhash()))) {
+                if (Transmission.this.segment.fulltext().getLoadTime(ASCII.String(e.urlhash())) >= 0) {
                    this.references.put(e.urlhash());
                } else {
                    notFoundx.add(e.urlhash());
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -1596,26 +1596,11 @@ public final class Switchboard extends serverSwitch {
     * @param hash
     * @return if it exists, the name of the database is returned, if it not exists, null is returned
     */
-    @Deprecated
    public HarvestProcess urlExists(final String hash) {
-        if (this.index.exists(hash)) return HarvestProcess.LOADED;
+        if (this.index.getLoadTime(hash) >= 0) return HarvestProcess.LOADED;
        return this.crawlQueues.exists(ASCII.getBytes(hash));
    }

-    /**
-     * tests if hashes occur in any database.
-     * @param ids a collection of url hashes
-     * @return a map from the hash id to: if it exists, the name of the database, otherwise null
-     */
-    public Map<String, HarvestProcess> urlExists(final Set<String> ids) {
-        Set<String> e = this.index.exists(ids);
-        Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>();
-        for (String id: ids) {
-            m.put(id, e.contains(id) ? HarvestProcess.LOADED : this.crawlQueues.exists(ASCII.getBytes(id)));
-        }
-        return m;
-    }
-
    public void urlRemove(final Segment segment, final byte[] hash) {
        segment.fulltext().remove(hash);
        ResultURLs.remove(ASCII.String(hash));
@ -2990,8 +2975,7 @@ public final class Switchboard extends serverSwitch {
        // stacking may fail because of double occurrences of that url. Therefore
        // we must wait here until the url has actually disappeared
        int t = 100;
-        Set<String> ids = new HashSet<String>(1); ids.add(ASCII.String(urlhash));
-        while (t-- > 0 && this.index.exists(ids).size() > 0) {
+        while (t-- > 0 && this.index.getLoadTime(ASCII.String(urlhash)) >= 0) {
            try {Thread.sleep(100);} catch (final InterruptedException e) {}
            ConcurrentLog.fine("Switchboard", "STACKURL: waiting for deletion, t=" + t);
            //if (t == 20) this.index.fulltext().commit(true);
@ -3106,11 +3090,10 @@ public final class Switchboard extends serverSwitch {
        if (searchEvent != null) {
            for (String id: urlmap.keySet()) searchEvent.addHeuristic(ASCII.getBytes(id), heuristicName, true);
        }
-        final Set<String> existing = doublecheck ? this.index.exists(urlmap.keySet()) : null;
        final List<Request> requests = new ArrayList<Request>();
        for (Map.Entry<String, DigestURL> e: urlmap.entrySet()) {
            final String urlName = e.getValue().toNormalform(true);
-            if (doublecheck && existing.contains(e.getKey())) {
+            if (doublecheck && this.index.getLoadTime(e.getKey()) >= 0) {
                this.log.info("addToIndex: double " + urlName);
                continue;
            }
@ -3183,9 +3166,8 @@ public final class Switchboard extends serverSwitch {
    public void addToCrawler(final Collection<DigestURL> urls, final boolean asglobal) {
        Map<String, DigestURL> urlmap = new HashMap<String, DigestURL>();
        for (DigestURL url: urls) urlmap.put(ASCII.String(url.hash()), url);
-        Set<String> existingids = this.index.exists(urlmap.keySet());
        for (Map.Entry<String, DigestURL> e: urlmap.entrySet()) {
-            if (existingids.contains(e.getKey())) continue; // double
+            if (this.index.getLoadTime(e.getKey()) >= 0) continue; // double
            DigestURL url = e.getValue();
            final Request request = this.loader.request(url, true, true);
            final CrawlProfile profile = this.crawler.get(ASCII.getBytes(request.profileHandle()));
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@ -32,7 +32,6 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@ -273,7 +272,7 @@ public final class Fulltext {
        getDefaultConnector().commit(softCommit);
        if (this.writeWebgraph) getWebgraphConnector().commit(softCommit);
    }
-
+/*
    public Date getLoadDate(final String urlHash) {
        if (urlHash == null) return null;
        try {
@ -288,7 +287,7 @@ public final class Fulltext {
            return null;
        }
    }
-
+*/
    public DigestURL getURL(final byte[] urlHash) {
        if (urlHash == null || this.getDefaultConnector() == null) return null;
        
@ -526,36 +525,19 @@ public final class Fulltext {
        return false;
    }

-    @Deprecated
-    public boolean exists(final String urlHash) {
-        if (urlHash == null) return false;
-        try {
-            if (this.getDefaultConnector().existsById(urlHash)) return true;
-        } catch (final Throwable e) {
-            ConcurrentLog.logException(e);
-        }
-        return false;
-    }
-    
    /**
-     * Multiple-test for existing url hashes in the search index.
-     * All given ids are tested and a subset of the given ids are returned.
-     * @param ids
-     * @return a set of ids which exist in the database
+     * get the load time of a resource.
+     * @param urlHash
+     * @return the time in milliseconds since epoch for the load time or -1 if the document does not exist
     */
-    public Set<String> exists(Set<String> ids) {
-        HashSet<String> e = new HashSet<String>();
-        if (ids == null || ids.size() == 0) return e;
-        if (ids.size() == 1) return exists(ids.iterator().next()) ? ids : e;
-        Set<String> idsC = new HashSet<String>();
-        idsC.addAll(ids);
+    public long getLoadTime(final String urlHash) {
+        if (urlHash == null) return -1l;
        try {
-            Set<String> e1 = this.getDefaultConnector().existsByIds(idsC);
-            e.addAll(e1);
-        } catch (final Throwable ee) {
-            ConcurrentLog.logException(ee);
+            return this.getDefaultConnector().getLoadTime(urlHash);
+        } catch (final Throwable e) {
+            ConcurrentLog.logException(e);
        }
-        return e;
+        return -1l;
    }

    public String failReason(final String urlHash) throws IOException {
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -467,19 +467,13 @@ public class Segment {
        }
    }

-    @Deprecated
-    public boolean exists(final String urlhash) {
-        return this.fulltext.exists(urlhash);
-    }
-
    /**
-     * Multiple-test for existing url hashes in the search index.
-     * All given ids are tested and a subset of the given ids are returned.
-     * @param ids
-     * @return a set of ids which exist in the database
+     * get the load time of a resource.
+     * @param urlHash
+     * @return the time in milliseconds since epoch for the load time or -1 if the document does not exist
     */
-    public Set<String> exists(final Set<String> ids) {
-        return this.fulltext.exists(ids);
+    public long getLoadTime(final String urlhash) {
+        return this.fulltext.getLoadTime(urlhash);
    }

    /**