From 1437c45383fdf97c12875b93d7d2c62102398e5f Mon Sep 17 00:00:00 2001
From: reger <reger18@arcor.de>
Date: Thu, 7 Nov 2013 21:30:17 +0100
Subject: [PATCH] merge rc1/master

---
 defaults/solr/solrconfig.xml                  |  39 +-
 defaults/yacy.init                            |   5 -
 htroot/ConfigHTCache_p.html                   |   2 +-
 htroot/ConfigHTCache_p.java                   |   4 +-
 htroot/ContentAnalysis_p.java                 |   2 +-
 htroot/HostBrowser.java                       |   5 +-
 htroot/IndexControlURLs_p.html                |   3 +
 htroot/IndexControlURLs_p.java                |   3 +-
 htroot/RankingSolr_p.java                     |   2 +-
 htroot/yacysearch.java                        |   2 +-
 .../cora/document/id/MultiProtocolURL.java    |   4 +-
 .../solr/connector/AbstractSolrConnector.java |   3 +-
 .../solr/connector/CachedSolrConnector.java   |  10 +-
 .../ConcurrentUpdateSolrConnector.java        |  11 +-
 .../solr/connector/EmbeddedSolrConnector.java |  26 +-
 .../solr/connector/MirrorSolrConnector.java   |  10 +-
 .../solr/connector/RemoteSolrConnector.java   |   6 +
 .../solr/connector/SolrConnector.java         |   9 +-
 .../solr/connector/SolrServerConnector.java   |   2 +-
 .../solr/instance/InstanceMirror.java         |   5 +-
 .../responsewriter/HTMLResponseWriter.java    | 388 +++++++++---------
 source/net/yacy/crawler/CrawlStacker.java     |  10 +-
 source/net/yacy/crawler/data/Cache.java       |   8 +
 source/net/yacy/data/BookmarksDB.java         | 150 +++----
 .../document/parser/html/CharacterCoding.java |   8 +-
 .../document/parser/html/ContentScraper.java  |   8 +-
 .../net/yacy/document/parser/pdfParser.java   |  73 ++--
 .../net/yacy/http/CrashProtectionHandler.java |  14 +-
 source/net/yacy/http/ProxyHandler.java        |   2 +-
 source/net/yacy/http/SSIHandler.java          |   1 -
 source/net/yacy/http/TemplateHandler.java     |   5 -
 source/net/yacy/http/YaCyHttpServer.java      |  18 +-
 source/net/yacy/peers/Transmission.java       |   3 +-
 source/net/yacy/search/ResourceObserver.java  |   2 +-
 source/net/yacy/search/Switchboard.java       |  16 +-
 .../net/yacy/search/SwitchboardConstants.java |   1 -
 source/net/yacy/search/index/Fulltext.java    |  61 +--
 source/net/yacy/search/index/Segment.java     |   7 +-
 source/net/yacy/search/query/QueryGoal.java   |   3 +-
 .../schema/CollectionConfiguration.java       |  65 ++-
 40 files changed, 524 insertions(+), 472 deletions(-)
diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml
index 1234dd0d3..d8044f969 100644
--- a/defaults/solr/solrconfig.xml
+++ b/defaults/solr/solrconfig.xml
@@ -461,19 +461,21 @@
          and old cache.
          -->
         <filterCache class="solr.FastLRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
         
         <!-- Query Result Cache
          
          Caches results of searches - ordered lists of document ids
          (DocList) based on a query, a sort, and the range of documents requested.
          -->
-        <queryResultCache class="solr.LRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        <queryResultCache class="solr.FastLRUCache"
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
         
         <!-- Document Cache
          
@@ -481,10 +483,11 @@
          document).  Since Lucene internal document ids are transient,
          this cache will not be autowarmed.
          -->
-        <documentCache class="solr.LRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        <documentCache class="solr.FastLRUCache"
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
         
         <!-- Field Value Cache
          
@@ -494,9 +497,10 @@
          -->
         <!--
          <fieldValueCache class="solr.FastLRUCache"
-         size="512"
+         size="64"
          autowarmCount="128"
-         showItems="32" />
+         showItems="32"
+         cleanupThread="true"/>
          -->
         
         <!-- Custom Cache
@@ -510,11 +514,12 @@
          -->
         <!--
          <cache name="myUserCache"
-         class="solr.LRUCache"
-         size="4096"
-         initialSize="1024"
-         autowarmCount="1024"
+         class="solr.FastLRUCache"
+         size="64"
+         initialSize="64"
+         autowarmCount="64"
          regenerator="com.mycompany.MyRegenerator"
+         cleanupThread="true"
          />
          -->
         
diff --git a/defaults/yacy.init b/defaults/yacy.init
index d23d09bae..b910a354e 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -797,11 +797,6 @@ search.excludehosth=
 # the cases of nocache, iffresh and ifexist causes an index deletion
 search.verify.delete = true
 
-# images may be treated either as documents that are shown in search results or as objects
-# that are only visible in special search environments, like image search
-search.excludeintext.image = true
-crawler.load.image = true
-
 # remote search details
 remotesearch.maxcount = 10
 remotesearch.maxtime = 3000
diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html
index ae8c851ce..890ae9845 100644
--- a/htroot/ConfigHTCache_p.html
+++ b/htroot/ConfigHTCache_p.html
@@ -19,7 +19,7 @@
           <dt><label for="HTCachePath">The path where the cache is stored</label></dt>
           <dd><input name="HTCachePath" id="HTCachePath" type="text" size="20" maxlength="300" value="#[HTCachePath]#" /></dd>
           <dt><label for="actualCacheSize">The current size of the cache</label></dt>
-          <dd><span id="actualCacheSize">#[actualCacheSize]# MB</span></dd>
+          <dd><span id="actualCacheSize">#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average </span></dd>
           <dt><label for="maxCacheSize">The maximum size of the cache</label></dt>
           <dd><input name="maxCacheSize" id="maxCacheSize" type="text" size="8" maxlength="24" value="#[maxCacheSize]#" /> MB</dd>
           <dt>&nbsp;</dt>
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java
index 73141e65a..48d4df623 100644
--- a/htroot/ConfigHTCache_p.java
+++ b/htroot/ConfigHTCache_p.java
@@ -77,7 +77,9 @@ public class ConfigHTCache_p {
         }
 
         prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
-        prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024));
+        prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
+        prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
+        prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
         prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
         // return rewrite properties
         return prop;
diff --git a/htroot/ContentAnalysis_p.java b/htroot/ContentAnalysis_p.java
index 2ba573ab0..eed8455e7 100644
--- a/htroot/ContentAnalysis_p.java
+++ b/htroot/ContentAnalysis_p.java
@@ -34,7 +34,7 @@ public class ContentAnalysis_p {
 
         // clean up all search events
         SearchEventCache.cleanupEvents(true);
-        sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+        sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
 
         if (post != null && post.containsKey("EnterDoublecheck")) {
             Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));
diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 833b77f00..8d53f191f 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -553,7 +553,6 @@ public class HostBrowser {
                     }
                 } catch (final IOException e) {
                 }
-                
             }
             this.references_external = (rc_external == null || rc_external.intValue() <= 0) ? 0 : rc_external.intValue();
             this.references_exthosts = (rc_exthosts == null || rc_exthosts.intValue() <= 0) ? 0 : rc_exthosts.intValue();
@@ -562,7 +561,7 @@ public class HostBrowser {
             StringBuilder sbi = new StringBuilder();
             int c = 0;
             for (String s: references_internal_urls) {
-                sbi.append("<a href='").append("/HostBrowser.html?path=" + s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
+                sbi.append("<a href='").append(s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
                 c++;
                 if (c % 80 == 0) sbi.append("<br/>");
             }
@@ -570,7 +569,7 @@ public class HostBrowser {
             StringBuilder sbe = new StringBuilder();
             c = 0;
             for (String s: references_external_urls) {
-                sbe.append("<a href='").append("/HostBrowser.html?path=" + s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
+                sbe.append("<a href='").append(s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
                 c++;
                 if (c % 80 == 0) sbe.append("<br/>");
             }
diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html
index b83d63754..6b96b1b17 100644
--- a/htroot/IndexControlURLs_p.html
+++ b/htroot/IndexControlURLs_p.html
@@ -193,6 +193,9 @@ function updatepage(str) {
         <dt class="TableCellDark">URL Filter</dt>
         <dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
         </dd>
+        <dt class="TableCellDark">query</dt>
+        <dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
+        </dd>
         <dt class="TableCellDark">Export Format</dt>
         <dd>Only Domain:
             <input type="radio" name="format" value="dom-text" />Plain Text List (domains only)&nbsp;&nbsp;
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java
index 94d46ba0e..48da0982c 100644
--- a/htroot/IndexControlURLs_p.java
+++ b/htroot/IndexControlURLs_p.java
@@ -261,7 +261,8 @@ public class IndexControlURLs_p {
             final File f = new File(s);
             f.getParentFile().mkdirs();
             final String filter = post.get("exportfilter", ".*");
-            final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
+            final String query = post.get("exportquery", "*:*");
+            final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);
 
             prop.put("lurlexport_exportfile", s);
             prop.put("lurlexport_urlcount", running.count());
diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java
index 04784f938..91e543a11 100644
--- a/htroot/RankingSolr_p.java
+++ b/htroot/RankingSolr_p.java
@@ -38,7 +38,7 @@ public class RankingSolr_p {
 
         // clean up all search events
         SearchEventCache.cleanupEvents(true);
-        sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+        sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
 
         int profileNr = 0;
         if (post != null) profileNr = post.getInt("profileNr", profileNr);
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 284a0b15e..b79c8061b 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -360,7 +360,7 @@ public class yacysearch {
 
             // check available memory and clean up if necessary
             if ( !MemoryControl.request(8000000L, false) ) {
-                indexSegment.clearCache();
+                indexSegment.clearCaches();
                 SearchEventCache.cleanupEvents(false);
             }
 
diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java
index de91810f7..c1b2000bf 100644
--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@@ -57,6 +57,7 @@ import net.yacy.cora.protocol.TimeoutRequest;
 import net.yacy.cora.protocol.ftp.FTPClient;
 import net.yacy.cora.protocol.http.HTTPClient;
 import net.yacy.cora.util.CommonPattern;
+import net.yacy.document.parser.html.CharacterCoding;
 
 /**
  * MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
@@ -66,7 +67,6 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
 
     public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators
 
-    private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
     private static final long serialVersionUID = -1173233022912141884L;
     private static final long SMB_TIMEOUT = 5000;
 
@@ -636,7 +636,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
         } else {
             this.searchpart = this.path.substring(r + 1);
             // strip &amp;
-            Matcher matcher = ampPattern.matcher(this.searchpart);
+            Matcher matcher = CharacterCoding.ampPattern.matcher(this.searchpart);
             while (matcher.find()) {
                 this.searchpart = matcher.replaceAll("&");
                 matcher.reset(this.searchpart);
diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
index e2f6f31d3..65a1e2783 100644
--- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
@@ -21,7 +21,6 @@
 package net.yacy.cora.federate.solr.connector;
 
 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -235,7 +234,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
      * @return a collection of a subset of the ids which exist in the index
      * @throws IOException
      */
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
         if (ids == null || ids.size() == 0) return new HashSet<String>();
         // construct raw query
         final SolrQuery params = new SolrQuery();
diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
index c96fe2d33..eaf93603c 100644
--- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
@@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
         this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
     }
 
-    public void clearCache() {
+    public void clearCaches() {
         this.hitCache.clear();
         this.missCache.clear();
         this.documentCache.clear();
@@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
 
     @Override
     public synchronized void close() {
+        this.clearCaches();
         if (this.solr != null) this.solr.close();
         this.solr = null;
-        this.clearCache();
     }
 
     /**
@@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
      */
     @Override
     public void clear() throws IOException {
-        this.clearCache();
+        this.clearCaches();
         if (this.solr != null) this.solr.clear();
     }
 
@@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
 
     @Override
     public void deleteByQuery(final String querystring) throws IOException {
-        this.clearCache();
+        this.clearCaches();
         this.solr.deleteByQuery(querystring);
     }
 
@@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
     }
     
     private void addToCache(SolrDocumentList list, boolean doccache) {
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
         for (final SolrDocument solrdoc: list) {
             addToCache(solrdoc, doccache);
         }
diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
index 8eff5f315..ddbf550ec 100644
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
         ensureAliveUpdateHandler();
     }
 
+    @Override
+    public void clearCaches() {
+        this.connector.clearCaches();
+        this.idCache.clear();
+    }
+
     /**
      * used for debugging
      */
@@ -326,10 +332,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
     }
 
     @Override
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
         HashSet<String> e = new HashSet<String>();
         if (ids == null || ids.size() == 0) return e;
-        Collection<String> idsC = new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : e;
+        Set<String> idsC = new HashSet<String>();
         for (String id: ids) {
             if (this.idCache.has(ASCII.getBytes(id))) {cacheSuccessSign(); e.add(id); continue;}
             if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); continue;}
diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
index 6348c79a3..10d36a9c9 100644
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@@ -22,7 +22,6 @@
 package net.yacy.cora.federate.solr.connector;
 
 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.BlockingQueue;
@@ -35,6 +34,7 @@ import net.yacy.search.schema.CollectionSchema;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.search.Query;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -48,10 +48,14 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.SearchHandler;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.request.UnInvertedField;
 import org.apache.solr.response.ResultContext;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.QueryResultKey;
+import org.apache.solr.search.SolrCache;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
 
@@ -89,6 +93,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
         super.init(this.instance.getServer(coreName));
     }
 
+    public void clearCaches() {
+        SolrConfig solrConfig = this.core.getSolrConfig();
+        @SuppressWarnings("unchecked")
+        SolrCache<String, UnInvertedField> fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+        if (fieldValueCache != null) fieldValueCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<Query, DocSet> filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
+        if (filterCache != null) filterCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<QueryResultKey, DocList> queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
+        if (queryResultCache != null) queryResultCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<Integer, Document> documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
+        if (documentCache != null) documentCache.clear();
+    }
+    
     public SolrInstance getInstance() {
         return this.instance;
     }
@@ -224,9 +244,9 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
     }
     
     @Override
-    public Set<String> existsByIds(Collection<String> ids) {
+    public Set<String> existsByIds(Set<String> ids) {
         if (ids == null || ids.size() == 0) return new HashSet<String>();
-        if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set<String>) ids : new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
         StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
         for (String id: ids) {
             sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
index 3f7a1453c..19fa604c5 100644
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
         this.solr0 = solr0;
         this.solr1 = solr1;
     }
+
+    @Override
+    public void clearCaches() {
+        if (this.solr0 != null) this.solr0.clearCaches();
+        if (this.solr1 != null) this.solr1.clearCaches();
+    }
     
     public boolean isConnected0() {
         return this.solr0 != null;
@@ -347,7 +353,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
     }
 
     @Override
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
+        if (ids == null || ids.size() == 0) return new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
         if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
         if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
         Set<String> s = new HashSet<String>();
diff --git a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
index 4e2a9369f..0ab5f8b31 100644
--- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
@@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
         super.close();
     }
 
+    @Override
+    public void clearCaches() {
+        // we do not have a direct access to the caches here, thus we simply do nothing.
+    }
+
     @Override
     public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
         // during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
         }
         System.exit(0);
     }
+
 }
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
index e7a3dd957..f28d26f09 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 
 public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
-
+   
+    /**
+     * clear all caches: inside solr and ouside solr within the implementations of this interface
+     */
+    public void clearCaches();
+    
     /**
      * get the size of the index
      * @return number of results if solr is queries with a catch-all pattern
@@ -106,7 +111,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
      * @return a collection of a subset of the ids which exist in the index
      * @throws IOException
      */
-    public Set<String> existsByIds(Collection<String> ids) throws IOException;
+    public Set<String> existsByIds(Set<String> ids) throws IOException;
     
     /**
      * check if a given document exists in solr
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
index f12d43950..aec6352f0 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
@@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
     public SolrServer getServer() {
         return this.server;
     }
-
+    
     @Override
     public void commit(final boolean softCommit) {
         synchronized (this.server) {
diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
index 6b9b7a939..1d49fd537 100644
--- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
+++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
@@ -24,7 +24,6 @@ import java.util.Collection;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
-import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
 import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
 import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
 import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@@ -161,9 +160,9 @@ public class InstanceMirror {
         return msc;
     }
     
-    public void clearCache() {
+    public void clearCaches() {
         for (SolrConnector csc: this.connectorCache.values()) {
-            if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache();
+            csc.clearCaches();
         }
         for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
     }
diff --git a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
index 19125afda..a4d3c38be 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
@@ -1,195 +1,193 @@
-/**
- *  HTMLResponseWriter
- *  Copyright 2013 by Michael Peter Christen
- *  First released 09.06.2013 at http://yacy.net
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public License
- *  along with this program in the file lgpl21.txt
- *  If not, see <http://www.gnu.org/licenses/>.
- */
-
-package net.yacy.cora.federate.solr.responsewriter;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Date;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import net.yacy.cora.federate.solr.SolrType;
-import net.yacy.search.schema.CollectionSchema;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexableField;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.XML;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.QueryResponseWriter;
-import org.apache.solr.response.ResultContext;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.TextField;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocList;
-import org.apache.solr.search.SolrIndexSearcher;
-
-public class HTMLResponseWriter implements QueryResponseWriter {
-
-    private static final Set<String> DEFAULT_FIELD_LIST = null;
-    private static final Pattern dqp = Pattern.compile("\"");
-    
-    public HTMLResponseWriter() {
-        super();
-    }
-
-    @Override
-    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
-        return "text/html";
-    }
-
-    @Override
-    public void init(@SuppressWarnings("rawtypes") NamedList n) {
-    }
-
-    @Override
-    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
-        NamedList<?> values = rsp.getValues();
-        assert values.get("responseHeader") != null;
-        assert values.get("response") != null;
-
-        writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
-        //writer.write("<!--\n");
-        //writer.write("this is a XHTML+RDFa file. It contains RDF annotations with dublin core properties\n");
-        //writer.write("you can validate it with http://validator.w3.org/\n");
-        //writer.write("-->\n");
-        writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\"\n");
-        writer.write("      xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
-        writer.write("      xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
-        writer.write("      xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n");
-        writer.write("<head profile=\"http://www.w3.org/2003/g/data-view\">\n");
-        //writer.write("<link rel=\"transformation\" href=\"http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl\"/>\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
-        NamedList<Object> paramsList = request.getOriginalParams().toNamedList();
-        paramsList.remove("wt");
-        String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
-        writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
-        writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</div>\n");
-      
-        DocList response = ((ResultContext) values.get("response")).docs;
-        final int sz = response.size();
-        if (sz > 0) {
-            SolrIndexSearcher searcher = request.getSearcher();
-            DocIterator iterator = response.iterator();
-            IndexSchema schema = request.getSchema();
-
-            int id = iterator.nextDoc();
-            Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
-            LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
-            
-            String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
-            if (sz == 1) {
-                writer.write("<title>" + title + "</title>\n</head><body>\n");
-            } else {
-                writer.write("<title>Document List</title>\n</head><body>\n");
-            }
-            writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
-            writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</span></div>\n");
-
-            writeDoc(writer, tdoc, title);
-
-            while (iterator.hasNext()) {
-                id = iterator.nextDoc();
-                doc = searcher.doc(id, DEFAULT_FIELD_LIST);
-                tdoc = translateDoc(schema, doc);
-                title = tdoc.get(CollectionSchema.title.getSolrFieldName());
-                writeDoc(writer, tdoc, title);
-            }
-        } else {
-            writer.write("<title>No Document Found</title>\n</head><body>\n");
-        }
-       
-        writer.write("</body></html>\n");
-    }
-
-    private static final void writeDoc(Writer writer, LinkedHashMap<String, String> tdoc, String title) throws IOException {
-        writer.write("<form name=\"yacydoc" + title + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
-        writer.write("<fieldset>\n");
-        writer.write("<h1 property=\"dc:Title\">" + title + "</h1>\n");
-        writer.write("<dl>\n");
-        for (Map.Entry<String, String> entry: tdoc.entrySet()) {
-            writer.write("<dt>");
-            writer.write(entry.getKey());
-            writer.write("</dt><dd>");
-            XML.escapeAttributeValue(entry.getValue(), writer);
-            writer.write("</dd>\n");
-        }
-        writer.write("</dl>\n");
-        writer.write("</fieldset>\n");
-        writer.write("</form>\n");
-    }
-    
-    static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) {
-        List<IndexableField> fields = doc.getFields();
-        int sz = fields.size();
-        int fidx1 = 0, fidx2 = 0;
-        LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
-        while (fidx1 < sz) {
-            IndexableField value = fields.get(fidx1);
-            String fieldName = value.name();
-            fidx2 = fidx1 + 1;
-            while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) {
-                fidx2++;
-            }
-            SchemaField sf = schema.getFieldOrNull(fieldName);
-            if (sf == null) sf = new SchemaField(fieldName, new TextField());
-            FieldType type = sf.getType();
-            
-            if (fidx1 + 1 == fidx2) {
-                if (sf.multiValued()) {
-                    String sv = value.stringValue();
-                    kv.put(fieldName, field2string(type, sv));
-                } else {
-                    kv.put(fieldName, field2string(type, value.stringValue()));
-                }
-            } else {
-                for (int i = fidx1; i < fidx2; i++) {
-                    String sv = fields.get(i).stringValue();
-                    kv.put(fieldName + "_" + i, field2string(type, sv));
-                }
-            }
-            
-            fidx1 = fidx2;
-        }
-        return kv;
-    }
-
-    @SuppressWarnings("deprecation")
-    private static String field2string(final FieldType type, final String value) {
-        String typeName = type.getTypeName();
-        if (typeName.equals(SolrType.bool.printName())) {
-            return "F".equals(value) ? "false" : "true";
-        } else if (typeName.equals(SolrType.date.printName())) {
-            return org.apache.solr.schema.DateField.formatExternal(new Date(Long.parseLong(value))); // this is declared deprecated in solr 4.2.1 but is still used as done here
-        }
-        return value;
-    }
-
-    // XML.escapeCharData(val, writer);
-}
+/**
+ *  HTMLResponseWriter
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 09.06.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.federate.solr.responsewriter;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import net.yacy.cora.federate.solr.SolrType;
+import net.yacy.search.schema.CollectionSchema;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.XML;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.QueryResponseWriter;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TextField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+
+public class HTMLResponseWriter implements QueryResponseWriter {
+
+    private static final Set<String> DEFAULT_FIELD_LIST = null;
+    private static final Pattern dqp = Pattern.compile("\"");
+    
+    public HTMLResponseWriter() {
+        super();
+    }
+
+    @Override
+    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
+        return "text/html";
+    }
+
+    @Override
+    public void init(@SuppressWarnings("rawtypes") NamedList n) {
+    }
+
+    @Override
+    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
+        NamedList<?> values = rsp.getValues();
+        assert values.get("responseHeader") != null;
+        assert values.get("response") != null;
+
+        writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
+        //writer.write("<!--\n");
+        //writer.write("this is a XHTML+RDFa file. It contains RDF annotations with dublin core properties\n");
+        //writer.write("you can validate it with http://validator.w3.org/\n");
+        //writer.write("-->\n");
+        writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\"\n");
+        writer.write("      xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
+        writer.write("      xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
+        writer.write("      xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n");
+        writer.write("<head profile=\"http://www.w3.org/2003/g/data-view\">\n");
+        //writer.write("<link rel=\"transformation\" href=\"http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl\"/>\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
+        NamedList<Object> paramsList = request.getOriginalParams().toNamedList();
+        paramsList.remove("wt");
+        String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
+
+        DocList response = ((ResultContext) values.get("response")).docs;
+        final int sz = response.size();
+        if (sz > 0) {
+            SolrIndexSearcher searcher = request.getSearcher();
+            DocIterator iterator = response.iterator();
+            IndexSchema schema = request.getSchema();
+
+            int id = iterator.nextDoc();
+            Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
+            LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
+            
+            String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+            if (sz == 1) {
+                writer.write("<title>" + title + "</title>\n</head><body>\n");
+            } else {
+                writer.write("<title>Document List</title>\n</head><body>\n");
+            }
+            writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
+            writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</span></div>\n");
+
+            writeDoc(writer, tdoc, title);
+
+            while (iterator.hasNext()) {
+                id = iterator.nextDoc();
+                doc = searcher.doc(id, DEFAULT_FIELD_LIST);
+                tdoc = translateDoc(schema, doc);
+                title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+                writeDoc(writer, tdoc, title);
+            }
+        } else {
+            writer.write("<title>No Document Found</title>\n</head><body>\n");
+        }
+       
+        writer.write("</body></html>\n");
+    }
+
+    private static final void writeDoc(Writer writer, LinkedHashMap<String, String> tdoc, String title) throws IOException {
+        writer.write("<form name=\"yacydoc" + title + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
+        writer.write("<fieldset>\n");
+        writer.write("<h1 property=\"dc:Title\">" + title + "</h1>\n");
+        writer.write("<dl>\n");
+        for (Map.Entry<String, String> entry: tdoc.entrySet()) {
+            writer.write("<dt>");
+            writer.write(entry.getKey());
+            writer.write("</dt><dd>");
+            XML.escapeAttributeValue(entry.getValue(), writer);
+            writer.write("</dd>\n");
+        }
+        writer.write("</dl>\n");
+        writer.write("</fieldset>\n");
+        writer.write("</form>\n");
+    }
+    
+    static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) {
+        List<IndexableField> fields = doc.getFields();
+        int sz = fields.size();
+        int fidx1 = 0, fidx2 = 0;
+        LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
+        while (fidx1 < sz) {
+            IndexableField value = fields.get(fidx1);
+            String fieldName = value.name();
+            fidx2 = fidx1 + 1;
+            while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) {
+                fidx2++;
+            }
+            SchemaField sf = schema.getFieldOrNull(fieldName);
+            if (sf == null) sf = new SchemaField(fieldName, new TextField());
+            FieldType type = sf.getType();
+            
+            if (fidx1 + 1 == fidx2) {
+                if (sf.multiValued()) {
+                    String sv = value.stringValue();
+                    kv.put(fieldName, field2string(type, sv));
+                } else {
+                    kv.put(fieldName, field2string(type, value.stringValue()));
+                }
+            } else {
+                for (int i = fidx1; i < fidx2; i++) {
+                    String sv = fields.get(i).stringValue();
+                    kv.put(fieldName + "_" + i, field2string(type, sv));
+                }
+            }
+            
+            fidx1 = fidx2;
+        }
+        return kv;
+    }
+
+    @SuppressWarnings("deprecation")
+    private static String field2string(final FieldType type, final String value) {
+        String typeName = type.getTypeName();
+        if (typeName.equals(SolrType.bool.printName())) {
+            return "F".equals(value) ? "false" : "true";
+        } else if (typeName.equals(SolrType.date.printName())) {
+            return org.apache.solr.schema.DateField.formatExternal(new Date(Long.parseLong(value))); // this is declared deprecated in solr 4.2.1 but is still used as done here
+        }
+        return value;
+    }
+
+    // XML.escapeCharData(val, writer);
+}
diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java
index a3b5da25f..211fa2e50 100644
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@@ -55,6 +55,7 @@ import net.yacy.crawler.retrieval.HTTPLoader;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.SMBLoader;
 import net.yacy.crawler.robots.RobotsTxt;
+import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.citation.CitationReference;
 import net.yacy.kelondro.rwi.IndexCell;
 import net.yacy.kelondro.workflow.WorkflowProcessor;
@@ -347,17 +348,10 @@ public final class CrawlStacker {
 
         // check availability of parser and maxfilesize
         String warning = null;
-        boolean loadImages = Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-        if (!loadImages && Switchboard.getSwitchboard().getConfig(SwitchboardConstants.CRAWLER_LOAD_IMAGE, "").equals("true;")) {
-            // dammit semicolon
-            // TODO: remove this shit later
-            Switchboard.getSwitchboard().setConfig(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-            loadImages = true;
-        }
         ContentDomain contentDomain = entry.url().getContentDomainFromExt();
         if ((maxFileSize >= 0 && entry.size() > maxFileSize) ||
             contentDomain == ContentDomain.APP  ||
-            (!loadImages && contentDomain == ContentDomain.IMAGE) ||
+            (contentDomain == ContentDomain.IMAGE && TextParser.supportsExtension(entry.url()) != null) ||
             contentDomain == ContentDomain.AUDIO  ||
             contentDomain == ContentDomain.VIDEO ||
             contentDomain == ContentDomain.CTRL) {
diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java
index f1d72354f..9973f08a0 100644
--- a/source/net/yacy/crawler/data/Cache.java
+++ b/source/net/yacy/crawler/data/Cache.java
@@ -182,6 +182,14 @@ public final class Cache {
     public static long getActualCacheSize() {
         return fileDBunbuffered.length();
     }
+    
+    /**
+     * get the current actual cache size
+     * @return
+     */
+    public static long getActualCacheDocCount() {
+        return fileDBunbuffered.size();
+    }
 
     /**
      * close the databases
diff --git a/source/net/yacy/data/BookmarksDB.java b/source/net/yacy/data/BookmarksDB.java
index 1c11b4b15..d9c0140a6 100644
--- a/source/net/yacy/data/BookmarksDB.java
+++ b/source/net/yacy/data/BookmarksDB.java
@@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.order.NaturalOrder;
 import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.kelondro.blob.MapHeap;
+import net.yacy.kelondro.data.meta.URIMetadataRow;
+import net.yacy.kelondro.index.RowHandleSet;
 
 public class BookmarksDB {
 
@@ -147,11 +150,6 @@ public class BookmarksDB {
             ConcurrentLog.logException(e);
         }
     }
-    public String addBookmark(final Bookmark bookmark){
-        saveBookmark(bookmark);
-        return bookmark.getUrlHash();
-
-    }
 
     public Bookmark getBookmark(final String urlHash) throws IOException {
         try {
@@ -214,18 +212,13 @@ public class BookmarksDB {
         final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
         final String tagHash=BookmarkHelper.tagHash(tagName);
         final Tag tag=getTag(tagHash);
-        Set<String> hashes=new HashSet<String>();
-        if (tag != null) {
-            hashes=getTag(tagHash).getUrlHashes();
-        }
+        RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes();
         if (priv) {
-            set.addAll(hashes);
+            for (byte[] hash: hashes) set.add(ASCII.String(hash));
         } else {
-        	final Iterator<String> it=hashes.iterator();
-            Bookmark bm;
-            while(it.hasNext()){
+            for (byte[] hash: hashes) {
                 try {
-                    bm = getBookmark(it.next());
+                    Bookmark bm = getBookmark(ASCII.String(hash));
                     if (bm != null && bm.getPublic()) {
                         set.add(bm.getUrlHash());
                     }
@@ -249,7 +242,7 @@ public class BookmarksDB {
      * retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
      * @param hash an object of type String, containing a tagHash
      */
-    public Tag getTag(final String hash){
+    private Tag getTag(final String hash){
         return this.tags.get(hash); //null if it does not exists
     }
 
@@ -257,7 +250,7 @@ public class BookmarksDB {
      * store a Tag in tagsTable or remove an empty tag
      * @param tag an object of type Tag to be stored/removed
      */
-    public void putTag(final Tag tag){
+    private void putTag(final Tag tag){
     	if (tag == null) return;
         if (tag.isEmpty()) {
             this.tags.remove(tag.getTagHash());
@@ -266,7 +259,7 @@ public class BookmarksDB {
         }
     }
 
-    public void removeTag(final String hash) {
+    private void removeTag(final String hash) {
         this.tags.remove(hash);
     }
 
@@ -301,7 +294,7 @@ public class BookmarksDB {
     	return set.iterator();
     }
 
-    public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
+    private Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
     	final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
     	Iterator<String> it=null;
     	final Iterator<String> bit=getBookmarksIterator(tagName, priv);
@@ -347,14 +340,14 @@ public class BookmarksDB {
 
     	final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName));
     	if (oldTag != null) {
-            final Set<String> urlHashes = oldTag.getUrlHashes();	// preserve urlHashes of oldTag
+            final RowHandleSet urlHashes = oldTag.getUrlHashes();	// preserve urlHashes of oldTag
             removeTag(BookmarkHelper.tagHash(oldName));							// remove oldHash from TagsDB
 
             Bookmark bookmark;
             Set<String> tagSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
-            for (final String urlHash : urlHashes) {									// looping through all bookmarks which were tagged with oldName
+            for (final byte[] urlHash : urlHashes) {									// looping through all bookmarks which were tagged with oldName
                 try {
-                    bookmark = getBookmark(urlHash);
+                    bookmark = getBookmark(ASCII.String(urlHash));
                     tagSet = bookmark.getTags();
                     tagSet.remove(oldName);
                     bookmark.setTags(tagSet, true);                     // might not be needed, but doesn't hurt
@@ -371,9 +364,9 @@ public class BookmarksDB {
     public void addTag(final String selectTag, final String newTag) {
 
     	Bookmark bookmark;
-    	for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) {	// looping through all bookmarks which were tagged with selectTag
+    	for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) {	// looping through all bookmarks which were tagged with selectTag
             try {
-                bookmark = getBookmark(urlHash);
+                bookmark = getBookmark(ASCII.String(urlHash));
                 bookmark.addTag(newTag);
                 saveBookmark(bookmark);
             } catch (final IOException e) {
@@ -389,51 +382,24 @@ public class BookmarksDB {
      * Subclass of bookmarksDB, which provides the Tag object-type
      */
     public class Tag {
-        public static final String URL_HASHES = "urlHashes";
-        public static final String TAG_NAME =  "tagName";
         private final String tagHash;
-        private final Map<String, String> mem;
-        private Set<String> urlHashes;
-
-        public Tag(final String hash, final Map<String, String> map){
-            this.tagHash = hash;
-            this.mem = map;
-            if (this.mem.containsKey(URL_HASHES)) {
-                this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES));
-            } else {
-                this.urlHashes = new HashSet<String>();
-            }
-        }
+        private final String tagName;
+        private RowHandleSet urlHashes;
 
-        public Tag(final String name, final HashSet<String> entries){
+        private Tag(final String name) {
             this.tagHash = BookmarkHelper.tagHash(name);
-            this.mem = new HashMap<String, String>();
-            //mem.put(URL_HASHES, listManager.arraylist2string(entries));
-            this.urlHashes = entries;
-            this.mem.put(TAG_NAME, name);
-        }
-
-        public Tag(final String name){
-            this(name, new HashSet<String>());
-        }
-
-        public Map<String, String> getMap(){
-            this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes));
-            return this.mem;
+            this.tagName = name;
+            this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10);
         }
 
         /**
          * get the lowercase Tagname
          */
         public String getTagName(){
-            /*if(this.mem.containsKey(TAG_NAME)){
-                return (String) this.mem.get(TAG_NAME);
-            }
-            return "";*/
             return getFriendlyName().toLowerCase();
         }
 
-        public String getTagHash(){
+        private String getTagHash(){
             return this.tagHash;
         }
 
@@ -441,37 +407,33 @@ public class BookmarksDB {
          * @return the tag name, with all uppercase chars
          */
         public String getFriendlyName(){
-            /*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){
-                return (String) this.mem.get(TAG_FRIENDLY_NAME);
-            }
-            return getTagName();*/
-            if(this.mem.containsKey(TAG_NAME)){
-                return this.mem.get(TAG_NAME);
-            }
-            return "notagname";
+            return this.tagName;
         }
 
-        public Set<String> getUrlHashes(){
+        private RowHandleSet getUrlHashes(){
             return this.urlHashes;
         }
 
-        public boolean hasPublicItems(){
+        private boolean hasPublicItems(){
             return getBookmarksIterator(getTagName(), false).hasNext();
         }
 
-        public void addUrl(final String urlHash){
-            this.urlHashes.add(urlHash);
+        private void addUrl(final String urlHash){
+            try {
+                this.urlHashes.put(ASCII.getBytes(urlHash));
+            } catch (SpaceExceededException e) {
+            }
         }
 
-        public void delete(final String urlHash){
-            this.urlHashes.remove(urlHash);
+        private void delete(final String urlHash){
+            this.urlHashes.remove(ASCII.getBytes(urlHash));
         }
 
         public int size(){
             return this.urlHashes.size();
         }
 
-        public boolean isEmpty() {
+        private boolean isEmpty() {
             return this.urlHashes.isEmpty();
         }
     }
@@ -481,27 +443,19 @@ public class BookmarksDB {
      */
     public class Bookmark {
 
-        public static final String BOOKMARK_URL = "bookmarkUrl";
+        private static final String BOOKMARK_URL = "bookmarkUrl";
         public static final String BOOKMARK_TITLE = "bookmarkTitle";
         public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc";
-        public static final String BOOKMARK_TAGS = "bookmarkTags";
-        public static final String BOOKMARK_PUBLIC = "bookmarkPublic";
-        public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
-        public static final String BOOKMARK_OWNER = "bookmarkOwner";
-        public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
+        private static final String BOOKMARK_TAGS = "bookmarkTags";
+        private static final String BOOKMARK_PUBLIC = "bookmarkPublic";
+        private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
+        private static final String BOOKMARK_OWNER = "bookmarkOwner";
+        private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
         private final String urlHash;
         private Set<String> tagNames;
         private long timestamp;
         private final Map<String, String> entry;
 
-        public Bookmark(final String urlHash, final Map<String, String> map) {
-            this.entry = map;
-            this.urlHash = urlHash;
-            this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
-            if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
-            loadTimestamp();
-        }
-
         public Bookmark(final DigestURL url) {
             this.entry = new HashMap<String, String>();
             this.urlHash = ASCII.String(url.hash());
@@ -529,11 +483,15 @@ public class BookmarksDB {
             this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url));
         }
 
-        public Bookmark(final Map<String, String> map) throws MalformedURLException {
-            this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map);
+        private Bookmark(final Map<String, String> map) throws MalformedURLException {
+            this.entry = map;
+            this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash());
+            this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
+            if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
+            loadTimestamp();
         }
 
-        Map<String, String> toMap() {
+        private Map<String, String> toMap() {
             this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames));
             this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
             return this.entry;
@@ -688,11 +646,11 @@ public class BookmarksDB {
     /**
      * Subclass of bookmarksDB, which provides the bookmarkIterator object-type
      */
-    public class bookmarkIterator implements Iterator<Bookmark> {
+    private class bookmarkIterator implements Iterator<Bookmark> {
 
         Iterator<byte[]> bookmarkIter;
 
-        public bookmarkIterator(final boolean up) throws IOException {
+        private bookmarkIterator(final boolean up) throws IOException {
             //flushBookmarkCache(); //XXX: this will cost performance
             this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false);
             //this.nextEntry = null;
@@ -722,14 +680,14 @@ public class BookmarksDB {
     /**
      * Comparator to sort objects of type Bookmark according to their timestamps
      */
-    public class bookmarkComparator implements Comparator<String> {
+    private class bookmarkComparator implements Comparator<String> {
 
         private final boolean newestFirst;
 
         /**
          * @param newestFirst newest first, or oldest first?
          */
-        public bookmarkComparator(final boolean newestFirst){
+        private bookmarkComparator(final boolean newestFirst){
             this.newestFirst = newestFirst;
         }
 
@@ -752,13 +710,13 @@ public class BookmarksDB {
         }
     }
 
-    public static final TagComparator tagComparator = new TagComparator();
-    public static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
+    private static final TagComparator tagComparator = new TagComparator();
+    private static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
 
     /**
      * Comparator to sort objects of type Tag according to their names
      */
-    public static class TagComparator implements Comparator<Tag>, Serializable {
+    private static class TagComparator implements Comparator<Tag>, Serializable {
 
     	/**
          * generated serial
@@ -772,7 +730,7 @@ public class BookmarksDB {
 
     }
 
-    public static class TagSizeComparator implements Comparator<Tag>, Serializable {
+    private static class TagSizeComparator implements Comparator<Tag>, Serializable {
 
     	/**
          * generated serial
diff --git a/source/net/yacy/document/parser/html/CharacterCoding.java b/source/net/yacy/document/parser/html/CharacterCoding.java
index 213c975b0..f93300cbd 100644
--- a/source/net/yacy/document/parser/html/CharacterCoding.java
+++ b/source/net/yacy/document/parser/html/CharacterCoding.java
@@ -26,12 +26,15 @@ package net.yacy.document.parser.html;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 /**
  * Contains methods to convert between Unicode and XML/HTML encoding.
  */
 public final class CharacterCoding {
 
+    /** Ampersand pattern */
+    public final static Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
     /** Ampersand character in unicode encoding. */
     private static final char AMP_UNICODE = "\u0026".charAt(0);
     /** Ampersand character in HTML encoding. */
@@ -276,14 +279,15 @@ public final class CharacterCoding {
         }
         return sb.toString();
     }
-
+    
     /**
      * Replaces HTML-encoded characters with unicode representation.
      * @param text text with character to replace
      * @return text with replaced characters
      */
-    public static String html2unicode(final String text) {
+    public static String html2unicode(String text) {
         if (text == null) return null;
+        text = ampPattern.matcher(text).replaceAll("&"); // sometimes a double-replacement is necessary.
         int p = 0, p1, q;
         final StringBuilder sb = new StringBuilder(text.length());
         String s;
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 66931a720..285cf26a1 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -204,11 +204,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     }
 
     @Override
-    public void scrapeText(final char[] newtext, final String insideTag) {
+    public void scrapeText(final char[] newtext0, final String insideTag) {
         // System.out.println("SCRAPE: " + UTF8.String(newtext));
         if (insideTag != null && ("script".equals(insideTag) || "style".equals(insideTag))) return;
         int p, pl, q, s = 0;
-
+        char[] newtext = CharacterCoding.html2unicode(new String(newtext0)).toCharArray();
+        
         // match evaluation pattern
         this.evaluationScores.match(Element.text, newtext);
 
@@ -466,7 +467,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) {
         // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text));
         if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
-            final String href = tagopts.getProperty("href", EMPTY_STRING);
+            String href = tagopts.getProperty("href", EMPTY_STRING);
+            href = CharacterCoding.html2unicode(href);
             AnchorURL url;
             if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
                 final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java
index 72181ca7a..d74114180 100644
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@@ -32,27 +32,15 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.util.Date;
 
-import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.exceptions.CryptographyException;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
 import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
-import org.apache.pdfbox.pdmodel.font.PDCIDFont;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
-import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
-import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
-import org.apache.pdfbox.pdmodel.font.PDType0Font;
-import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont;
-import org.apache.pdfbox.pdmodel.font.PDType1CFont;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.apache.pdfbox.pdmodel.font.PDType3Font;
 import org.apache.pdfbox.util.PDFTextStripper;
 
 import net.yacy.cora.document.id.AnchorURL;
@@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser {
                 false,
                 docDate)};
     }
-
-    @SuppressWarnings("static-access")
+    
     public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() {
         // thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever
         // AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT!
         // Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain.
-        PDFont.clearResources();
-        COSName.clearResources();
-        PDType1Font.clearResources();
-        PDTrueTypeFont.clearResources();
-        PDType0Font.clearResources();
-        PDType1AfmPfbFont.clearResources();
-        PDType3Font.clearResources();
-        PDType1CFont.clearResources();
-        PDCIDFont.clearResources();
-        PDCIDFontType0Font.clearResources();
-        PDCIDFontType2Font.clearResources();
-        PDMMType1Font.clearResources();
-        PDSimpleFont.clearResources();
+        ResourceCleaner cl = new ResourceCleaner();
+        cl.clearClassResources("org.apache.pdfbox.cos.COSName");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont");
+    }
+
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    private static class ResourceCleaner {
+        Method findLoadedClass;
+        private ClassLoader sys;
+        public ResourceCleaner() {
+            try {
+                this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class });
+                this.findLoadedClass.setAccessible(true);
+                this.sys = ClassLoader.getSystemClassLoader();
+            } catch (Throwable e) {
+                e.printStackTrace();
+                this.findLoadedClass = null;
+                this.sys = null;
+            }
+        }
+        public void clearClassResources(String name) {
+            if (this.findLoadedClass == null) return;
+            try {
+                Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name);
+                if (pdfparserpainclass != null) {
+                    Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {});
+                    if (clearResources != null) clearResources.invoke(null);
+                }
+            } catch (Throwable e) {
+                e.printStackTrace();
+            }
+        }
     }
     
     /**
diff --git a/source/net/yacy/http/CrashProtectionHandler.java b/source/net/yacy/http/CrashProtectionHandler.java
index 257780f61..bbb4fb917 100644
--- a/source/net/yacy/http/CrashProtectionHandler.java
+++ b/source/net/yacy/http/CrashProtectionHandler.java
@@ -37,12 +37,12 @@ public class CrashProtectionHandler extends HandlerWrapper implements Handler, H
 	}
 	
 	private void writeResponse(HttpServletRequest request, HttpServletResponse response, Exception exc) throws IOException {
-		PrintWriter out = response.getWriter();
-		out.println("Ops!");
-		out.println();
-		out.println("Message: " + exc.getMessage());
-		exc.printStackTrace(out);
-		response.setContentType("text/plain");
-        response.setStatus(500);
+            PrintWriter out = response.getWriter();
+            out.println("Ops!");
+            out.println();
+            out.println("Message: " + exc.getMessage());
+            exc.printStackTrace(out);
+            response.setContentType("text/plain");
+            response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
 	}
 }
diff --git a/source/net/yacy/http/ProxyHandler.java b/source/net/yacy/http/ProxyHandler.java
index acef4b3c0..7d7d208df 100644
--- a/source/net/yacy/http/ProxyHandler.java
+++ b/source/net/yacy/http/ProxyHandler.java
@@ -91,7 +91,7 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
 			HttpServletResponse response) throws IOException, ServletException {
 
 		RequestHeader proxyHeaders = convertHeaderFromJetty(request);
-                final String httpVer = (String) request.getHeader(HeaderFramework.CONNECTION_PROP_HTTP_VER);
+                final String httpVer = request.getHeader(HeaderFramework.CONNECTION_PROP_HTTP_VER);
                 setViaHeader (proxyHeaders, httpVer);
 		proxyHeaders.remove(RequestHeader.KEEP_ALIVE);
 		proxyHeaders.remove(RequestHeader.CONTENT_LENGTH);
diff --git a/source/net/yacy/http/SSIHandler.java b/source/net/yacy/http/SSIHandler.java
index 314b747d0..095861368 100644
--- a/source/net/yacy/http/SSIHandler.java
+++ b/source/net/yacy/http/SSIHandler.java
@@ -27,7 +27,6 @@ package net.yacy.http;
 import java.io.IOException;
 import java.io.OutputStream;
 
-import javax.servlet.RequestDispatcher;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
diff --git a/source/net/yacy/http/TemplateHandler.java b/source/net/yacy/http/TemplateHandler.java
index 156489561..d13e0e00d 100644
--- a/source/net/yacy/http/TemplateHandler.java
+++ b/source/net/yacy/http/TemplateHandler.java
@@ -97,11 +97,6 @@ public class TemplateHandler extends AbstractHandler implements Handler {
         htDocsPath = Switchboard.getSwitchboard().htDocsPath.getPath();
     }
 
-    @Override
-    protected void doStop() throws Exception {
-        super.doStop();
-    }
-
     /** Returns a path to the localized or default file according to the parameter localeSelection
      * @param path relative from htroot
      * @param localeSelection language of localized file; locale.language from switchboard is used if localeSelection.equals("") */
diff --git a/source/net/yacy/http/YaCyHttpServer.java b/source/net/yacy/http/YaCyHttpServer.java
index 22c881aa7..e869c0572 100644
--- a/source/net/yacy/http/YaCyHttpServer.java
+++ b/source/net/yacy/http/YaCyHttpServer.java
@@ -17,13 +17,13 @@ import java.net.SocketException;
  */
 public interface YaCyHttpServer {
     
-    abstract public void startupServer() throws Exception;
-    abstract public void stop() throws Exception;
-    abstract public void setMaxSessionCount(int cnt);
-    abstract public InetSocketAddress generateSocketAddress(String port) throws SocketException;
-    abstract public int getMaxSessionCount();
-    abstract public int getJobCount();
-    abstract public boolean withSSL();
-    abstract public void reconnect(int milsec);
-    abstract public String getVersion();
+    abstract void startupServer() throws Exception;
+    abstract void stop() throws Exception;
+    abstract void setMaxSessionCount(int cnt);
+    abstract InetSocketAddress generateSocketAddress(String port) throws SocketException;
+    abstract int getMaxSessionCount();
+    abstract int getJobCount();
+    abstract boolean withSSL();
+    abstract void reconnect(int milsec);
+    abstract String getVersion();
 }
diff --git a/source/net/yacy/peers/Transmission.java b/source/net/yacy/peers/Transmission.java
index 47799fd7c..44fd4a807 100644
--- a/source/net/yacy/peers/Transmission.java
+++ b/source/net/yacy/peers/Transmission.java
@@ -25,7 +25,6 @@
 package net.yacy.peers;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -164,7 +163,7 @@ public class Transmission {
             final ReferenceContainer<WordReference> c = (remaining >= container.size()) ? container : trimContainer(container, remaining);
             // iterate through the entries in the container and check if the reference is in the repository
             final List<byte[]> notFoundx = new ArrayList<byte[]>();
-            Collection<String> testids = new HashSet<String>();
+            Set<String> testids = new HashSet<String>();
             Iterator<WordReference>  i = c.entries();
             while (i.hasNext()) {
                 final WordReference e = i.next();
diff --git a/source/net/yacy/search/ResourceObserver.java b/source/net/yacy/search/ResourceObserver.java
index 9cc6a58e7..32e8d2396 100644
--- a/source/net/yacy/search/ResourceObserver.java
+++ b/source/net/yacy/search/ResourceObserver.java
@@ -129,7 +129,7 @@ public class ResourceObserver {
     	if(MemoryControl.properState()) return Space.HIGH;
     	
         // clear some caches - @all: are there more of these, we could clear here?
-		this.sb.index.clearCache();
+		this.sb.index.clearCaches();
         SearchEventCache.cleanupEvents(true);
         this.sb.trail.clear();
         Switchboard.urlBlacklist.clearblacklistCache();
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index e392cab37..0307b7e01 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -1585,7 +1585,7 @@ public final class Switchboard extends serverSwitch {
      * @param ids a collection of url hashes
      * @return a map from the hash id to: if it exists, the name of the database, otherwise null
      */
-    public Map<String, HarvestProcess> urlExists(final Collection<String> ids) {
+    public Map<String, HarvestProcess> urlExists(final Set<String> ids) {
         Set<String> e = this.index.exists(ids);
         Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>();
         for (String id: ids) {
@@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch {
 
             // clear caches if necessary
             if ( !MemoryControl.request(128000000L, false) ) {
-                this.index.clearCache();
+                this.index.clearCaches();
                 SearchEventCache.cleanupEvents(false);
                 this.trail.clear();
                 GuiHandler.clear();
@@ -2556,12 +2556,16 @@ public final class Switchboard extends serverSwitch {
            ) {
             // get the hyperlinks
             final Map<DigestURL, String> hl = Document.getHyperlinks(documents);
-            boolean loadImages = getConfigBool(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-            if (loadImages) hl.putAll(Document.getImagelinks(documents));
+            for (Map.Entry<DigestURL, String> entry: Document.getImagelinks(documents).entrySet()) {
+                if (TextParser.supportsExtension(entry.getKey()) == null) hl.put(entry.getKey(), entry.getValue());
+            }
+            
             
             // add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links
             if (response.profile().directDocByURL()) {
-                if (!loadImages) hl.putAll(Document.getImagelinks(documents));
+                for (Map.Entry<DigestURL, String> entry: Document.getImagelinks(documents).entrySet()) {
+                    if (TextParser.supportsExtension(entry.getKey()) != null) hl.put(entry.getKey(), entry.getValue());
+                }
                 hl.putAll(Document.getApplinks(documents));
                 hl.putAll(Document.getVideolinks(documents));
                 hl.putAll(Document.getAudiolinks(documents));
@@ -2905,7 +2909,7 @@ public final class Switchboard extends serverSwitch {
         // stacking may fail because of double occurrences of that url. Therefore
         // we must wait here until the url has actually disappeared
         int t = 100;
-        Collection<String> ids = new ArrayList<String>(1); ids.add(ASCII.String(urlhash));
+        Set<String> ids = new HashSet<String>(1); ids.add(ASCII.String(urlhash));
         while (t-- > 0 && this.index.exists(ids).size() > 0) {
             try {Thread.sleep(100);} catch (final InterruptedException e) {}
             ConcurrentLog.fine("Switchboard", "STACKURL: waiting for deletion, t=" + t);
diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java
index a4ac06708..d6dfd24c8 100644
--- a/source/net/yacy/search/SwitchboardConstants.java
+++ b/source/net/yacy/search/SwitchboardConstants.java
@@ -323,7 +323,6 @@ public final class SwitchboardConstants {
      * <p><code>public static final String <strong>CRAWLER_THREADS_ACTIVE_MAX</strong> = "crawler.MaxActiveThreads"</code></p>
      * <p>Name of the setting how many active crawler-threads may maximal be running on the same time</p>
      */
-    public static final String CRAWLER_LOAD_IMAGE               = "crawler.load.image";
     public static final String CRAWLER_THREADS_ACTIVE_MAX       = "crawler.MaxActiveThreads";
     public static final String CRAWLER_FOLLOW_REDIRECTS         = "crawler.http.FollowRedirects"; // ignore the target url and follow to the redirect
     public static final String CRAWLER_RECORD_REDIRECTS         = "crawler.http.RecordRedirects"; // record the ignored redirected page to the index store
diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java
index 46389247c..cc127ecbe 100644
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@@ -225,10 +225,10 @@ public final class Fulltext {
         }
     }
 
-    public void clearCache() {
+    public void clearCaches() {
         if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache();
         if (this.statsDump != null) this.statsDump.clear();
-        this.solrInstances.clearCache();
+        this.solrInstances.clearCaches();
         this.statsDump = null;
     }
 
@@ -250,7 +250,7 @@ public final class Fulltext {
                 for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear();
             }
             this.commit(false);
-            this.solrInstances.clearCache();
+            this.solrInstances.clearCaches();
         }
     }
 
@@ -260,7 +260,7 @@ public final class Fulltext {
             if (instance != null) {
                 for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear();
             }
-            this.solrInstances.clearCache();
+            this.solrInstances.clearCaches();
         }
     }
 
@@ -400,7 +400,7 @@ public final class Fulltext {
             throw new IOException(e.getMessage(), e);
         }
         this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
     }
 
     public void putEdges(final Collection<SolrInputDocument> edges) throws IOException {
@@ -412,7 +412,7 @@ public final class Fulltext {
             throw new IOException(e.getMessage(), e);
         }
         this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
     }
 
     /**
@@ -432,7 +432,7 @@ public final class Fulltext {
             throw new IOException(e.getMessage(), e);
         }
         this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
     }
 
     /**
@@ -617,10 +617,11 @@ public final class Fulltext {
      * @param ids
      * @return a set of ids which exist in the database
      */
-    public Set<String> exists(Collection<String> ids) {
+    public Set<String> exists(Set<String> ids) {
         HashSet<String> e = new HashSet<String>();
         if (ids == null || ids.size() == 0) return e;
-        Collection<String> idsC = new HashSet<String>();
+        if (ids.size() == 1) return exists(ids.iterator().next()) ? ids : e;
+        Set<String> idsC = new HashSet<String>();
         idsC.addAll(ids);
         if (this.urlIndexFile != null) {
             Iterator<String> idsi = idsC.iterator();
@@ -751,12 +752,12 @@ public final class Fulltext {
     }
     
     // export methods
-    public Export export(final File f, final String filter, final int format, final boolean dom) {
+    public Export export(final File f, final String filter, final String query, final int format, final boolean dom) {
         if ((this.exportthread != null) && (this.exportthread.isAlive())) {
             ConcurrentLog.warn("LURL-EXPORT", "cannot start another export thread, already one running");
             return this.exportthread;
         }
-        this.exportthread = new Export(f, filter, format, dom);
+        this.exportthread = new Export(f, filter, query, format, dom);
         this.exportthread.start();
         return this.exportthread;
     }
@@ -769,14 +770,15 @@ public final class Fulltext {
         private final File f;
         private final Pattern pattern;
         private int count;
-        private String failure;
+        private String failure, query;
         private final int format;
         private final boolean dom;
 
-        private Export(final File f, final String filter, final int format, boolean dom) {
+        private Export(final File f, final String filter, final String query, final int format, boolean dom) {
             // format: 0=text, 1=html, 2=rss/xml
             this.f = f;
             this.pattern = filter == null ? null : Pattern.compile(filter);
+            this.query = query == null? "*:*" : query;
             this.count = 0;
             this.failure = null;
             this.format = format;
@@ -805,7 +807,7 @@ public final class Fulltext {
                 
                
                 if (this.dom) {
-                    Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
+                    Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
                     ReversibleScoreMap<String> stats = scores.get(CollectionSchema.host_s.getSolrFieldName());
                     for (final String host: stats) {
                         if (this.pattern != null && !this.pattern.matcher(host).matches()) continue;
@@ -814,21 +816,19 @@ public final class Fulltext {
                         this.count++;
                     }
                 } else {
-                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
+                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
                             CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
                             CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
                     SolrDocument doc;
-                    ArrayList<?> title;
-                    String url, author, hash;
-                    String[] descriptions;
+                    String url, hash, title, author, description;
                     Integer size;
                     Date date;
                     while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
-                        hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
-                        url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
-                        title = (ArrayList<?>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
-                        author = (String) doc.getFieldValue(CollectionSchema.author.getSolrFieldName());
-                        descriptions = (String[]) doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName());
+                        hash = getStringFrom(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+                        url = getStringFrom(doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
+                        title = getStringFrom(doc.getFieldValue(CollectionSchema.title.getSolrFieldName()));
+                        author = getStringFrom(doc.getFieldValue(CollectionSchema.author.getSolrFieldName()));
+                        description = getStringFrom(doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName()));
                         size = (Integer) doc.getFieldValue(CollectionSchema.size_i.getSolrFieldName());
                         date = (Date) doc.getFieldValue(CollectionSchema.last_modified.getSolrFieldName());
                         if (this.pattern != null && !this.pattern.matcher(url).matches()) continue;
@@ -836,16 +836,14 @@ public final class Fulltext {
                             pw.println(url);
                         }
                         if (this.format == 1) {
-                            if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</a>");
+                            if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml(title, true) + "</a>");
                         }
                         if (this.format == 2) {
                             pw.println("<item>");
-                            if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</title>");
+                            if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml(title, true) + "</title>");
                             pw.println("<link>" + MultiProtocolURL.escape(url) + "</link>");
                             if (author != null && !author.isEmpty()) pw.println("<author>" + CharacterCoding.unicode2xml(author, true) + "</author>");
-                            if (descriptions != null && descriptions.length > 0) {
-                                for (String d: descriptions) pw.println("<description>" + CharacterCoding.unicode2xml(d, true) + "</description>");
-                            }
+                            if (description != null && !description.isEmpty()) pw.println("<description>" + CharacterCoding.unicode2xml(description, true) + "</description>");
                             if (date != null) pw.println("<pubDate>" + HeaderFramework.formatRFC1123(date) + "</pubDate>");
                             if (size != null) pw.println("<yacy:size>" + size.intValue() + "</yacy:size>");
                             pw.println("<guid isPermaLink=\"false\">" + hash + "</guid>");
@@ -883,6 +881,13 @@ public final class Fulltext {
         public int count() {
             return this.count;
         }
+        
+        @SuppressWarnings("unchecked")
+		private String getStringFrom(final Object o) {
+        	if (o == null) return "";
+        	if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
+        	return (String) o;
+        }
 
     }
     
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index 918458837..617d5269c 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -29,7 +29,6 @@ package net.yacy.search.index;
 import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
-import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -443,7 +442,7 @@ public class Segment {
      * @param ids
      * @return a set of ids which exist in the database
      */
-    public Set<String> exists(final Collection<String> ids) {
+    public Set<String> exists(final Set<String> ids) {
         return this.fulltext.exists(ids);
     }
 
@@ -504,10 +503,10 @@ public class Segment {
         }
     }
     
-    public void clearCache() {
+    public void clearCaches() {
         if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache();
         if (this.termIndex != null) this.termIndex.clearCache();
-        this.fulltext.clearCache();
+        this.fulltext.clearCaches();
     }
 
     public File getLocation() {
diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java
index 745bbb2ac..50861de59 100644
--- a/source/net/yacy/search/query/QueryGoal.java
+++ b/source/net/yacy/search/query/QueryGoal.java
@@ -242,7 +242,8 @@ public class QueryGoal {
         // add filter to prevent that results come from failed urls
         q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
         q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
-        q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))");
+        q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
+        q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
         
         // parse special requests
         if (isCatchall()) return q;
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index adda277b2..31b3c35e2 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -898,17 +898,19 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
             String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
                     CollectionSchema.process_sxt.getSolrFieldName() + ":" + ProcessType.CITATION.toString();
             hostscore = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
-            if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
-
+            ConcurrentLog.info("CollectionConfiguration", "collecting " + hostscore.size() + " hosts");
+            int countcheck = 0;
             for (String host: hostscore.keyList(true)) {
                 // Patch the citation index for links with canonical tags.
                 // This shall fulfill the following requirement:
-                // If a document A links to B and B contains a 'canonical C', then the citation rank coputation shall consider that A links to C and B does not link to C.
+                // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
                 // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
                 String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]";
+                long patchquerycount = collectionConnector.getCountByQuery(patchquery);
                 BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 60000L, 50,
                         CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
                 SolrDocument doc_B;
+                int patchquerycountcheck = 0;
                 try {
                     while ((doc_B = documents_with_canonical_tag.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                         // find all documents which link to the canonical doc
@@ -926,10 +928,12 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                             CitationReference doc_A_citation = doc_A_ids_iterator.next();
                             segment.urlCitation().add(doc_C_url.hash(), doc_A_citation);
                         }
+                        patchquerycountcheck++;
                     }
                 } catch (InterruptedException e) {
                 } catch (SpaceExceededException e) {
                 }
+                if (patchquerycount != patchquerycountcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous patchquery count for host " + host + ": expected=" + patchquerycount + ", counted=" + patchquerycountcheck);
                 
                 // do the citation rank computation
                 if (hostscore.get(host) <= 0) continue;
@@ -939,12 +943,14 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                 while (convergence_attempts++ < 30) {
                     if (crh.convergenceStep()) break;
                 }
-                ConcurrentLog.info("CollectionConfiguration.CRHost", "convergence for host " + host + " after " + convergence_attempts + " steps");
+                ConcurrentLog.info("CollectionConfiguration", "convergence for host " + host + " after " + convergence_attempts + " steps");
                 // we have now the cr for all documents of a specific host; we store them for later use
                 Map<byte[], CRV> crn = crh.normalize();
                 //crh.log(crn);
                 ranking.putAll(crn); // accumulate this here for usage in document update later
+                countcheck++;
             }
+            if (hostscore.size() != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous host count: expected=" + hostscore.size() + ", counted=" + countcheck);
         } catch (final IOException e2) {
             hostscore = new ClusteredScoreMap<String>();
         }
@@ -952,13 +958,15 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         // process all documents at the webgraph for the outgoing links of this document
         SolrDocument doc;
         if (webgraphConnector != null) {
-            for (String host: hostscore.keyList(true)) {
-                if (hostscore.get(host) <= 0) continue;
-                // select all webgraph edges and modify their cr value
-                BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(
-                        WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\"",
-                        0, 10000000, 60000, 50);
-                try {
+            try {
+                for (String host: hostscore.keyList(true)) {
+                    if (hostscore.get(host) <= 0) continue;
+                    // select all webgraph edges and modify their cr value
+                    String query = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\"";
+                    long count = webgraphConnector.getCountByQuery(query);
+                    ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph");
+                    BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 60000, 50);
+                    int countcheck = 0;
                     while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                         boolean changed = false;
                         SolrInputDocument sid = segment.fulltext().getWebgraphConfiguration().toSolrInputDocument(doc, null);
@@ -978,21 +986,29 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                             webgraphConnector.add(sid);
                         } catch (SolrException e) {
                         } catch (IOException e) {
-                       }
+                        }
+                        countcheck++;
                     }
-                } catch (final InterruptedException e) {}
+                    if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous webgraph document count for host " + host + ": expected=" + count + ", counted=" + countcheck);
+                }
+            } catch (final IOException e2) {
+                ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
+            } catch (final InterruptedException e3) {
+                ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
             }
         }
         
         // process all documents in collection
-        BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(
-                (harvestkey == null ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
-                CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]",
-                0, 10000, 60000, 50);
+        String query = (harvestkey == null ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
+                CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
         int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0;
         Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id
         Set<String> uniqueURLs = new HashSet<String>();
         try {
+            long count = collectionConnector.getCountByQuery(query);
+            ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey);
+            BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(query, 0, 10000, 60000, 50);
+            int countcheck = 0;
             while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                 // for each to-be-processed entry work on the process tag
                 Collection<Object> proctags = doc.getFieldValues(CollectionSchema.process_sxt.getSolrFieldName());
@@ -1031,8 +1047,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                     if (!hostExtentCache.containsKey(hosthash)) {
                         StringBuilder q = new StringBuilder();
                         q.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append("\" AND ").append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200");
-                        long count = segment.fulltext().getDefaultConnector().getCountByQuery(q.toString());
-                        hostExtentCache.put(hosthash, count);
+                        long hostExtentCount = segment.fulltext().getDefaultConnector().getCountByQuery(q.toString());
+                        hostExtentCache.put(hosthash, hostExtentCount);
                     }
                     if (postprocessing_references(rrCache, doc, sid, url, hostExtentCache)) proccount_referencechange++;
                     
@@ -1047,13 +1063,18 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                     proccount++;
                 } catch (final Throwable e1) {
                 }
+                countcheck++;
             }
+            if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous collection document count for harvestkey " + harvestkey + ": expected=" + count + ", counted=" + countcheck);
             ConcurrentLog.info("CollectionConfiguration", "cleanup_processing: re-calculated " + proccount+ " new documents, " +
                         proccount_clickdepthchange + " clickdepth changes, " +
                         proccount_referencechange + " reference-count changes, " +
                         proccount_uniquechange + " unique field changes, " +
                         proccount_citationchange + " citation ranking changes.");
-        } catch (final InterruptedException e) {
+        } catch (final InterruptedException e2) {
+            ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
+        } catch (IOException e3) {
+            ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
         }
         return proccount;
     }
@@ -1148,8 +1169,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                 if (entry == null || entry.getValue() == null) continue;
                 try {
                     String url = (String) connector.getDocumentById(ASCII.String(entry.getKey()), CollectionSchema.sku.getSolrFieldName()).getFieldValue(CollectionSchema.sku.getSolrFieldName());
-                    ConcurrentLog.info("CollectionConfiguration.CRHost", "CR for " + url);
-                    ConcurrentLog.info("CollectionConfiguration.CRHost", ">> " + entry.getValue().toString());
+                    ConcurrentLog.info("CollectionConfiguration", "CR for " + url);
+                    ConcurrentLog.info("CollectionConfiguration", ">> " + entry.getValue().toString());
                 } catch (final IOException e) {
                     ConcurrentLog.logException(e);
                 }