Merge branch 'master' of git://gitorious.org/yacy/rc1 into blacklist_structure

12 years ago · 376f9cd9d0
parent a020697d64 89c0aa0e74
commit 376f9cd9d0
112 changed files with 304 additions and 133 deletions
--- a/addon/YaCy.app/Contents/MacOS/JavaApplicationStub
+++ b/addon/YaCy.app/Contents/MacOS/JavaApplicationStub
--- a/addon/installer/logo.bmp
+++ b/addon/installer/logo.bmp
--- a/addon/testkeys
+++ b/addon/testkeys
--- a/build.nsi
+++ b/build.nsi
@ -226,7 +226,9 @@ Section "Uninstall"
 	RMDir /r "$INSTDIR\classes"
 	RMDir /r "$INSTDIR\defaults"
 	RMDir /r "$INSTDIR\htroot"
+	RMDir /r "$INSTDIR\langstats"
 	RMDir /r "$INSTDIR\lib"
+	RMDir /r "$INSTDIR\libbuild"
 	RMDir /r "$INSTDIR\libx"
 	RMDir /r "$INSTDIR\locales"
 	RMDir /r "$INSTDIR\ranking"
--- a/build.properties
+++ b/build.properties
@ -3,7 +3,7 @@ javacSource=1.6
 javacTarget=1.6

 # Release Configuration
-releaseVersion=1.51
+releaseVersion=1.52
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 releaseFileParentDir=yacy
--- a/defaults/solr/schema.xml
+++ b/defaults/solr/schema.xml
@ -90,9 +90,10 @@
      and back compatibility is not guaranteed.  Names with both leading and
      trailing underscores (e.g. _version_) are reserved.
   -->
-        
+
   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
-   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
+   <field name="sku" type="string" indexed="true" stored="true" omitNorms="true"/>
+   <!--<field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>-->
   <field name="name" type="text_general" indexed="true" stored="true"/>
   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
--- a/defaults/solr/solrconfig.xml
+++ b/defaults/solr/solrconfig.xml
--- a/defaults/yacy.logging
+++ b/defaults/yacy.logging
@ -44,6 +44,9 @@ java.util.logging.FileHandler.level = ALL
 java.util.logging.FileHandler.formatter=net.yacy.kelondro.logging.SimpleLogFormatter
 java.util.logging.FileHandler.encoding=UTF-8

+# Properties for crawler
+net.yacy.crawler.robots.RobotsTxt.level = SEVERE
+
 # Properties for the GuiHandler
 net.yacy.kelondro.logging.GuiHandler.level = ALL
 net.yacy.kelondro.logging.GuiHandler.formatter=net.yacy.kelondro.logging.SimpleLogFormatter
@ -79,18 +82,42 @@ httpclient.wire.level = OFF
 org.apache.pdfbox.level = INFO

 # Properties for solr
-org.apache.solr.core.Config.level = INFO
+org.apache.solr.client.solrj.impl.HttpClientUtil.level = SEVERE
+org.apache.solr.core.Config.level = SEVERE
+org.apache.solr.core.CoreContainer.level = SEVERE
+org.apache.solr.core.JmxMonitoredMap.level = SEVERE
+org.apache.solr.core.RequestHandlers.level = SEVERE
+org.apache.solr.core.SolrConfig.level = SEVERE
 org.apache.solr.core.SolrCore.level = SEVERE
-org.apache.solr.core.SolrResourceLoader.level = INFO
+org.apache.solr.core.SolrResourceLoader.level = SEVERE
 org.apache.solr.core.CachingDirectoryFactory.level = OFF
 org.apache.solr.util.plugin.AbstractPluginLoader.level = INFO
 org.apache.solr.schema.IndexSchema.level = INFO
 org.apache.solr.schema.FieldTypePluginLoader.level = INFO
+org.apache.solr.handler.component.HttpShardHandlerFactory.level = SEVERE
+org.apache.solr.handler.component.QueryElevationComponent.level = SEVERE
+org.apache.solr.handler.component.SearchHandler.level = SEVERE
+org.apache.solr.handler.component.SpellCheckComponent.level = SEVERE
+org.apache.solr.handler.loader.XMLLoader.level = SEVERE
+org.apache.solr.handler.ReplicationHandler.level = SEVERE
 org.apache.solr.handler.UpdateRequestHandler.level = INFO
-org.apache.solr.handler.loader.XMLLoader.level = INFO
-org.apache.solr.search.SolrIndexSearcher.level = INFO
+org.apache.solr.response.XSLTResponseWriter.level = SEVERE
+org.apache.solr.schema.FileExchangeRateProvider.level = SEVERE
+org.apache.solr.schema.IndexSchema.level = SEVERE
+org.apache.solr.search.SolrIndexSearcher.level = SEVERE
+org.apache.solr.spelling.DirectSolrSpellChecker.level = SEVERE
 org.apache.solr.update.processor.LogUpdateProcessor.level = OFF
+org.apache.solr.update.DefaultSolrCoreState.level = SEVERE
+org.apache.solr.update.SolrCoreState.level = SEVERE
 org.apache.solr.update.SolrIndexWriter.level = INFO
+org.apache.solr.update.UpdateHandler.level = SEVERE
+
+# Properties for jena
+com.hp.hpl.jena.util.FileManager.level = SEVERE
+com.hp.hpl.jena.util.LocationMapper.level = SEVERE
+com.hp.hpl.jena.util.LocatorClassLoader.level = SEVERE
+com.hp.hpl.jena.util.LocatorFile.level = SEVERE
+com.hp.hpl.jena.util.SystemUtils.level = SEVERE

 # Properties for the YaCy solr interface
 net.yacy.cora.federate.solr.connector.SolrServerConnector.level = INFO
@ -98,3 +125,6 @@ net.yacy.cora.federate.solr.connector.SolrServerConnector.level = INFO
 # java properties
 javax.management.misc.level = INFO
 javax.management.mbeanserver.level = INFO
+
+# Properties for Collection
+CollectionConfiguration.CRHost.level = SEVERE
--- a/htroot/ConfigUpdate_p.html
+++ b/htroot/ConfigUpdate_p.html
@ -118,6 +118,8 @@
      <p>manual update:<br/>apt-get update &amp;&amp; apt-get install yacy</p>
      <p>automatic update: add the following line to /etc/crontab<br/>0 6 * * * root apt-get update &amp;&amp; apt-get -y --force-yes install yacy</p>
      ::
+      <p>YaCy has been installed to the Program Files directory. Automatic update is not possible.<br/>Download and install the latest version from the web page <a href="http://www.yacy.net/">http://www.yacy.net/</a></p>
+      ::
    #(/candeploy)#
    
    #%env/templates/footer.template%#
--- a/htroot/ConfigUpdate_p.java
+++ b/htroot/ConfigUpdate_p.java
@ -55,6 +55,9 @@ public class ConfigUpdate_p {
        if (yacyBuildProperties.isPkgManager()) {
            prop.put("candeploy", "2");
            return prop;
+        } else if (OS.isWindows && sb.appPath.toString().indexOf("Program Files") > -1) {
+            prop.put("candeploy", "3");
+            return prop;
        } else if (OS.canExecUnix || OS.isWindows) {
            // we can deploy a new system with (i.e.)
            // cd DATA/RELEASE;tar xfz $1;cp -Rf yacy/* ../../;rm -Rf yacy
--- a/htroot/CrawlCheck_p.java
+++ b/htroot/CrawlCheck_p.java
@ -28,7 +28,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.robots.RobotsTxtEntry;
@ -88,19 +87,19 @@ public class CrawlCheck_p {
                    robotsEntry = sb.robots.getEntry(u, sb.peers.myBotIDs());
                    if (robotsEntry == null) {
                        prop.put("table_list_" + row + "_robots", "no robots");
-                        prop.put("table_list_" + row + "_crawldelay", CrawlQueues.queuedMinLoadDelay + " ms");
+                        prop.put("table_list_" + row + "_crawldelay", ClientIdentification.minLoadDelay() + " ms");
                        prop.put("table_list_" + row + "_sitemap", "");
                    } else {
                        robotsAllowed = !robotsEntry.isDisallowed(u);
                        prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
-                        prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms");
+                        prop.put("table_list_" + row + "_crawldelay", Math.max(ClientIdentification.minLoadDelay(), robotsEntry.getCrawlDelayMillis()) + " ms");
                        prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true));
                    }
                    
                    // try to load the url
                    if (robotsAllowed) try {
                        Request request = sb.loader.request(u, true, false);
-                        final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                        final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                        if (response == null) {
                            prop.put("table_list_" + row + "_access", "no response");
                        } else {
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -43,7 +43,6 @@ import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.crawler.CrawlSwitchboard;
 import net.yacy.crawler.data.CrawlProfile;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.SitemapImporter;
@ -288,7 +287,7 @@ public class Crawler_p {
                        // download document
                        Document scraper;
                        try {
-                            scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                            scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                            // get links and generate filter
                            for (DigestURI u: scraper.getAnchors().keySet()) {
                                newRootURLs.add(u);
@ -445,6 +444,7 @@ public class Crawler_p {
                                            0,
                                            0,
                                            0),
+                                    null,
                                    sb.peers.mySeed().hash.getBytes(),
                                    new Date(),
                                    1,
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@ -27,7 +27,6 @@ import net.yacy.cora.geo.OpenGeoDBLocation;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.document.LibraryProvider;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -67,7 +66,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geon0Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
                LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
@ -109,7 +108,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geon1Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
                LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
@ -151,7 +150,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geon2Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
                LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
@ -193,7 +192,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geo1Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
                LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
@ -236,7 +235,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("drw0Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file());
                LibraryProvider.activateDeReWo();
@ -280,7 +279,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("pnd0Load")) {
            // load from the net
            try {
-                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file());
                LibraryProvider.activatePND();
--- a/htroot/IndexReIndexMonitor_p.java
+++ b/htroot/IndexReIndexMonitor_p.java
@ -53,7 +53,7 @@ public class IndexReIndexMonitor_p {
                prop.put("showstartbutton", 0);
            }            
        } else {
-            if (post != null && post.containsKey("reindexnow")) {
+            if (post != null && post.containsKey("reindexnow") && sb.index.fulltext().connectedLocalSolr()) {
                migration.reindexToschema(sb);
                prop.put("showstartbutton", 0);
                prop.put("querysize", "0");
--- a/htroot/Load_RSS_p.java
+++ b/htroot/Load_RSS_p.java
@ -42,7 +42,6 @@ import net.yacy.cora.util.CommonPattern;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.crawler.HarvestProcess;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.retrieval.RSSLoader;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.data.WorkTables;
@ -267,7 +266,7 @@ public class Load_RSS_p {
        RSSReader rss = null;
        if (url != null) try {
            prop.put("url", url.toNormalform(true));
-            final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+            final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
            final byte[] resource = response == null ? null : response.getContent();
            rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
        } catch (final IOException e) {
--- a/htroot/Status.java
+++ b/htroot/Status.java
@ -32,6 +32,7 @@ import java.util.Date;

 import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.util.Memory;
 import net.yacy.kelondro.io.ByteCount;
 import net.yacy.kelondro.util.Formatter;
 import net.yacy.kelondro.util.MemoryControl;
@ -317,6 +318,7 @@ public class Status
        prop.put("totalMemory", Formatter.bytesToString(MemoryControl.total()));
        prop.put("maxMemory", Formatter.bytesToString(MemoryControl.maxMemory()));
        prop.put("processors", WorkflowProcessor.availableCPU);
+        prop.put("load", Memory.load());

        // proxy traffic
        //prop.put("trafficIn",bytesToString(httpdByteCountInputStream.getGlobalCount()));
--- a/htroot/Status_p.inc
+++ b/htroot/Status_p.inc
@ -3,18 +3,17 @@
  <legend>System Status</legend>
  <dl>

-    <dt>Process</dt>
-    <dd>#[versionpp]#
+    <dt>System</dt>
+    <dd>YaCy version #[versionpp]#
    #(peerStatistics)#
      Unknown
      ::
      <div>Uptime: #[uptime]#</div>
-      #(/peerStatistics)#
-      </dd>
-    
-    <dt>System Resources</dt>
-    <dd>Processors: #[processors]#</dd>
+    #(/peerStatistics)#
+    <div>Processors: #[processors]#</div>
+    <div>Load: #[load]#</div>
    
+    </dd>
    <dt>Protection</dt>
    <dd>#(protection)#
      <strong>Password is missing.</strong>
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -45,7 +45,6 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.data.Cache;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@ -169,7 +168,7 @@ public class ViewFile {

        Response response = null;
        try {
-            response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+            response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
        } catch (final IOException e) {
            prop.put("error", "4");
            prop.put("error_errorText", "error loading resource: " + e.getMessage());
--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@ -39,7 +39,6 @@ import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.storage.ConcurrentARC;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.data.URLLicense;
 import net.yacy.document.ImageParser;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -105,7 +104,7 @@ public class ViewImage {
        if (image == null) {
            byte[] resourceb = null;
            if (url != null) try {
-                resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
            } catch (final IOException e) {
                ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage());
            }
--- a/htroot/api/feed.rss
+++ b/htroot/api/feed.rss
--- a/htroot/api/getpageinfo.java
+++ b/htroot/api/getpageinfo.java
@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.robots.RobotsTxtEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.repository.Blacklist.BlacklistType;
@ -97,7 +96,7 @@ public class getpageinfo {
                }
                net.yacy.document.Document scraper = null;
                if (u != null) try {
-                    scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                    scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                } catch (final IOException e) {
                    ConcurrentLog.logException(e);
                    // bad things are possible, i.e. that the Server responds with "403 Bad Behavior"
--- a/htroot/api/getpageinfo_p.java
+++ b/htroot/api/getpageinfo_p.java
@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.robots.RobotsTxtEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.repository.Blacklist.BlacklistType;
@ -97,7 +96,7 @@ public class getpageinfo_p {
                }
                net.yacy.document.Document scraper = null;
                if (u != null) try {
-                    scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                    scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                } catch (final IOException e) {
                    ConcurrentLog.logException(e);
                    // bad things are possible, i.e. that the Server responds with "403 Bad Behavior"
--- a/htroot/api/latency_p.java
+++ b/htroot/api/latency_p.java
@ -26,9 +26,9 @@ import java.util.Iterator;
 import java.util.Map;

 import net.yacy.cora.date.GenericFormatter;
+import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.data.Latency;
-import net.yacy.crawler.data.NoticedURL;
 import net.yacy.crawler.data.Latency.Host;
 import net.yacy.server.serverObjects;
 import net.yacy.server.serverSwitch;
@ -52,7 +52,7 @@ public class latency_p {
            prop.put("domains_" + c + "_count", host.count());
            prop.put("domains_" + c + "_average", host.average());
            prop.put("domains_" + c + "_robots", host.robotsDelay());
-            prop.put("domains_" + c + "_flux", host.flux(NoticedURL.minimumGlobalDeltaInit));
+            prop.put("domains_" + c + "_flux", host.flux(ClientIdentification.minimumGlobalDeltaInit));
            c++;
        }
        prop.put("domains", c);
--- a/htroot/api/status_p.java
+++ b/htroot/api/status_p.java
@ -26,6 +26,9 @@
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.crawler.CrawlSwitchboard;
+import net.yacy.crawler.data.CrawlProfile;
+import net.yacy.kelondro.index.RowHandleSet;
 import net.yacy.kelondro.io.ByteCount;
 import net.yacy.kelondro.util.MemoryControl;
 import net.yacy.kelondro.workflow.WorkflowProcessor;
@ -100,6 +103,29 @@ public class status_p {
        prop.putNum("noloadCrawlSize", sb.crawlQueues.noloadCrawlJobSize());
        prop.put("noloadCrawlState", STATE_RUNNING);

+        // generate crawl profile table
+        int count = 0;
+        final int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
+        CrawlProfile profile;
+        // put active crawls into list
+        String hosts = "";
+        for (final byte[] h: sb.crawler.getActive()) {
+            profile = sb.crawler.getActive(h);
+            if (CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) continue;
+            profile.putProfileEntry("crawlProfiles_list_", prop, true, false, count, domlistlength);
+            RowHandleSet urlhashes = sb.crawler.getURLHashes(h);
+            prop.put("crawlProfiles_list_" + count + "_count", urlhashes == null ? "unknown" : Integer.toString(urlhashes.size()));
+            if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) {
+                hosts = hosts + "," + profile.name();
+            }
+            count++;
+        }
+        prop.put("crawlProfiles_list", count);
+        prop.put("crawlProfiles_count", count);
+        prop.put("crawlProfiles", count == 0 ? 0 : 1);
+        
+        prop.put("postprocessingRunning", Switchboard.postprocessingRunning ? 1 : 0);
+        
        // return rewrite properties
        return prop;
    }
--- a/htroot/api/status_p.xml
+++ b/htroot/api/status_p.xml
@ -49,5 +49,24 @@
    <size>#[noloadCrawlSize]#</size>
    <state>#[noloadCrawlState]#</state>
  </noloadcrawlerqueue>
+
+<!-- crawl profile list -->
+#(crawlProfiles)#::
+  <crawls count="#[count]#">
+  #{list}#
+    <crawl>
+      <name>#[name]#</name>
+      <count>#[count]#</count>
+      <handle>#[handle]#</handle>
+      <depth>#[depth]#</depth>
+      <status>#(terminateButton)#terminated::alive#(/terminateButton)#</status>
+    </crawl>
+  #{/list}# 
+  </crawls>
+#(/crawlProfiles)#
+  
+  <postprocessing>
+    <status>#(postprocessingRunning)#idle::busy#(/postprocessingRunning)#</status>
+  </postprocessing>
  
 </status>
--- a/htroot/api/webstructure.java
+++ b/htroot/api/webstructure.java
@ -35,7 +35,6 @@ import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.kelondro.data.citation.CitationReference;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.rwi.IndexCell;
@ -98,7 +97,7 @@ public class webstructure {
                prop.put("references", 1);
                net.yacy.document.Document scraper = null;
                if (url != null) try {
-                    scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                    scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                } catch (final IOException e) {
                    ConcurrentLog.logException(e);
                }
--- a/htroot/env/terminal.css
+++ b/htroot/env/terminal.css
--- a/htroot/js/highslide/graphics/controlbar2.gif
+++ b/htroot/js/highslide/graphics/controlbar2.gif
--- a/htroot/js/highslide/graphics/controlbar3.gif
+++ b/htroot/js/highslide/graphics/controlbar3.gif
--- a/htroot/js/highslide/graphics/controlbar4-hover.gif
+++ b/htroot/js/highslide/graphics/controlbar4-hover.gif
--- a/htroot/js/highslide/graphics/controlbar4.gif
+++ b/htroot/js/highslide/graphics/controlbar4.gif
--- a/htroot/js/highslide/graphics/fullexpand.gif
+++ b/htroot/js/highslide/graphics/fullexpand.gif
--- a/htroot/js/highslide/graphics/geckodimmer.png
+++ b/htroot/js/highslide/graphics/geckodimmer.png
--- a/htroot/js/highslide/graphics/loader.gif
+++ b/htroot/js/highslide/graphics/loader.gif
--- a/htroot/js/highslide/graphics/loader.white.gif
+++ b/htroot/js/highslide/graphics/loader.white.gif
--- a/htroot/js/highslide/graphics/outlines/Outlines.psd
+++ b/htroot/js/highslide/graphics/outlines/Outlines.psd
--- a/htroot/js/highslide/graphics/outlines/beveled.png
+++ b/htroot/js/highslide/graphics/outlines/beveled.png
--- a/htroot/js/highslide/graphics/outlines/drop-shadow.png
+++ b/htroot/js/highslide/graphics/outlines/drop-shadow.png
--- a/htroot/js/highslide/graphics/outlines/glossy-dark.png
+++ b/htroot/js/highslide/graphics/outlines/glossy-dark.png
--- a/htroot/js/highslide/graphics/outlines/outer-glow.png
+++ b/htroot/js/highslide/graphics/outlines/outer-glow.png
--- a/htroot/js/highslide/graphics/outlines/rounded-black.png
+++ b/htroot/js/highslide/graphics/outlines/rounded-black.png
--- a/htroot/js/highslide/graphics/outlines/rounded-white.png
+++ b/htroot/js/highslide/graphics/outlines/rounded-white.png
--- a/htroot/js/highslide/graphics/resize.gif
+++ b/htroot/js/highslide/graphics/resize.gif
--- a/htroot/js/highslide/graphics/zoomin.cur
+++ b/htroot/js/highslide/graphics/zoomin.cur
--- a/htroot/js/highslide/graphics/zoomout.cur
+++ b/htroot/js/highslide/graphics/zoomout.cur
--- a/htroot/js/highslide/highslide.js
+++ b/htroot/js/highslide/highslide.js
--- a/htroot/js/query.js
+++ b/htroot/js/query.js
--- a/htroot/js/rss2.js
+++ b/htroot/js/rss2.js
--- a/htroot/processing/domaingraph/applet/domaingraph.jar
+++ b/htroot/processing/domaingraph/applet/domaingraph.jar
--- a/htroot/processing/domaingraph/applet/domaingraph.pde
+++ b/htroot/processing/domaingraph/applet/domaingraph.pde
--- a/htroot/processing/domaingraph/applet/loading.gif
+++ b/htroot/processing/domaingraph/applet/loading.gif
--- a/htroot/processing/domaingraph/domaingraph.pde
+++ b/htroot/processing/domaingraph/domaingraph.pde
--- a/htroot/processing/put_in_libraries/animation.zip
+++ b/htroot/processing/put_in_libraries/animation.zip
--- a/htroot/processing/put_in_libraries/physics.zip
+++ b/htroot/processing/put_in_libraries/physics.zip
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@ -163,6 +163,7 @@ public final class crawlReceipt {
        sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
        sb.crawlQueues.errorURL.push(
                entry.toBalancerEntry(iam),
+                null,
                youare.getBytes(),
                null,
                0,
--- a/htroot/yacy/ui/colorpicker_images/Thumbs.db
+++ b/htroot/yacy/ui/colorpicker_images/Thumbs.db
--- a/htroot/yacy/ui/colorpicker_images/blank.gif
+++ b/htroot/yacy/ui/colorpicker_images/blank.gif
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_background.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_background.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_hex.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_hex.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_b.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_b.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_h.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_h.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_s.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_hsb_s.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_indic.gif
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_indic.gif
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_overlay.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_overlay.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_b.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_b.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_g.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_g.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_r.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_rgb_r.png
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_select.gif
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_select.gif
--- a/htroot/yacy/ui/colorpicker_images/colorpicker_submit.png
+++ b/htroot/yacy/ui/colorpicker_images/colorpicker_submit.png
--- a/htroot/yacy/ui/colorpicker_images/custom_background.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_background.png
--- a/htroot/yacy/ui/colorpicker_images/custom_hex.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_hex.png
--- a/htroot/yacy/ui/colorpicker_images/custom_hsb_b.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_hsb_b.png
--- a/htroot/yacy/ui/colorpicker_images/custom_hsb_h.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_hsb_h.png
--- a/htroot/yacy/ui/colorpicker_images/custom_hsb_s.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_hsb_s.png
--- a/htroot/yacy/ui/colorpicker_images/custom_indic.gif
+++ b/htroot/yacy/ui/colorpicker_images/custom_indic.gif
--- a/htroot/yacy/ui/colorpicker_images/custom_rgb_b.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_rgb_b.png
--- a/htroot/yacy/ui/colorpicker_images/custom_rgb_g.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_rgb_g.png
--- a/htroot/yacy/ui/colorpicker_images/custom_rgb_r.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_rgb_r.png
--- a/htroot/yacy/ui/colorpicker_images/custom_submit.png
+++ b/htroot/yacy/ui/colorpicker_images/custom_submit.png
--- a/htroot/yacy/ui/colorpicker_images/select.png
+++ b/htroot/yacy/ui/colorpicker_images/select.png
--- a/htroot/yacy/ui/colorpicker_images/select2.png
+++ b/htroot/yacy/ui/colorpicker_images/select2.png
--- a/htroot/yacy/ui/colorpicker_images/slider.png
+++ b/htroot/yacy/ui/colorpicker_images/slider.png
--- a/htroot/yacy/ui/css/colorpicker.css
+++ b/htroot/yacy/ui/css/colorpicker.css
--- a/htroot/yacy/ui/js/jquery-1.3.2.min.js
+++ b/htroot/yacy/ui/js/jquery-1.3.2.min.js
--- a/htroot/yacy/ui/js/jquery-ui-1.7.2.min.js
+++ b/htroot/yacy/ui/js/jquery-ui-1.7.2.min.js
--- a/htroot/yacy/ui/js/jquery.tagcloud.min.js
+++ b/htroot/yacy/ui/js/jquery.tagcloud.min.js
--- a/htroot/yacy/ui/js/jquery.tinysort.min.js
+++ b/htroot/yacy/ui/js/jquery.tinysort.min.js
--- a/htroot/yacy/urls.java
+++ b/htroot/yacy/urls.java
@ -82,6 +82,7 @@ public class urls {
                // place url to notice-url db
                sb.crawlQueues.delegatedURL.push(
                                entry,
+                                null,
                                sb.peers.mySeed().hash.getBytes(),
                                new Date(),
                                0,
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -100,8 +100,8 @@ public class yacysearch {
        final Switchboard sb = (Switchboard) env;
        sb.localSearchLastAccess = System.currentTimeMillis();

-        final boolean searchAllowed =
-            sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
+        final boolean authorized = sb.verifyAuthentication(header);
+        final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || authorized;

        boolean authenticated = sb.adminAuthenticated(header) >= 2;
        if ( !authenticated ) {
@ -161,7 +161,7 @@ public class yacysearch {
        boolean p2pmode = sb.peers != null && sb.peers.sizeConnected() > 0 && indexReceiveGranted;
        boolean global = post == null || (post.get("resource", "local").equals("global") && p2pmode);
        boolean stealthmode = p2pmode && !global;
-        prop.put("topmenu_resource-select", stealthmode ? 2 : global ? 1 : 0);
+        prop.put("topmenu_resource-select", !authorized ? 0 : stealthmode ? 2 : global ? 1 : 0);
        
        if ( post == null || indexSegment == null || env == null || !searchAllowed ) {
            if (indexSegment == null) ConcurrentLog.info("yacysearch", "indexSegment == null");
--- a/lib/jcifs-1.3.15-LICENSE.txt
+++ b/lib/jcifs-1.3.15-LICENSE.txt
--- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
+++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
@ -125,7 +125,9 @@ public class InstanceMirror {
    public EmbeddedSolrConnector getDefaultEmbeddedConnector() {
        if (this.defaultEmbeddedConnector != null) return this.defaultEmbeddedConnector;
        this.defaultEmbeddedConnector = this.solr0 == null ? null : new EmbeddedSolrConnector(this.solr0);
-        this.embeddedCache.put(this.getDefaultCoreName(), this.defaultEmbeddedConnector);
+        String coreName = this.getDefaultCoreName();
+        if (coreName == null) return null;
+        this.embeddedCache.put(coreName, this.defaultEmbeddedConnector);
        return this.defaultEmbeddedConnector;
    }

--- a/source/net/yacy/cora/protocol/ClientIdentification.java
+++ b/source/net/yacy/cora/protocol/ClientIdentification.java
@ -27,7 +27,10 @@ package net.yacy.cora.protocol;

 public class ClientIdentification {

+    public static final long MIN_LOAD_DELAY = 500;
    public static final int DEFAULT_TIMEOUT = 10000;
+    public static final int minimumLocalDeltaInit  =  10; // the minimum time difference between access of the same local domain
+    public static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain
    
    /**
     * provide system information (this is part of YaCy protocol)
@ -116,4 +119,8 @@ public class ClientIdentification {

        return location;
    }
+    
+    public static long minLoadDelay() {
+        return MIN_LOAD_DELAY;
+    }
 }
--- a/source/net/yacy/cora/util/Memory.java
+++ b/source/net/yacy/cora/util/Memory.java
@ -20,6 +20,9 @@

 package net.yacy.cora.util;

+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+
 public class Memory {

    private static final Runtime runtime = Runtime.getRuntime();
@ -45,7 +48,7 @@ public class Memory {
     * @return bytes
     */
    public static final long maxMemory() {
-        return runtime.maxMemory();
+        return runtime.maxMemory(); // can be Long.MAX_VALUE if unlimited
    }

    /**
@ -63,5 +66,34 @@ public class Memory {
    public static final long used() {
        return total() - free();
    }
-
+    
+    /**
+     * get the system load within the last minute
+     * @return the system load or a negative number if the load is not available
+     */
+    public static double load() {
+        return ManagementFactory.getOperatingSystemMXBean().getSystemLoadAverage();
+    }
+    
+    /**
+     * find out the number of thread deadlocks. WARNING: this is a time-consuming task
+     * @return the number of deadlocked threads
+     */
+    public static long deadlocks() {
+        long[] deadlockIDs = ManagementFactory.getThreadMXBean().findDeadlockedThreads();
+        if (deadlockIDs == null) return 0;
+        return deadlockIDs.length;
+    }
+    
+    /**
+     * write deadlocked threads as to the log as warning
+     */
+    public static void logDeadlocks() {
+        long[] deadlockIDs = ManagementFactory.getThreadMXBean().findDeadlockedThreads();
+        if (deadlockIDs == null) return;
+        ThreadInfo[] infos = ManagementFactory.getThreadMXBean().getThreadInfo(deadlockIDs, true, true);
+        for (ThreadInfo ti : infos) {
+            ConcurrentLog.warn("DEADLOCKREPORT", ti.toString());
+        }
+    }
 }
--- a/source/net/yacy/crawler/Balancer.java
+++ b/source/net/yacy/crawler/Balancer.java
@ -267,7 +267,7 @@ public class Balancer {
     * @throws IOException
     * @throws SpaceExceededException
     */
-    public String push(final Request entry, final RobotsTxt robots) throws IOException, SpaceExceededException {
+    public String push(final Request entry, CrawlProfile profile, final RobotsTxt robots) throws IOException, SpaceExceededException {
        assert entry != null;
        final byte[] hash = entry.url().hash();
        synchronized (this) {
@ -278,6 +278,11 @@ public class Balancer {
            if (this.double_push_check.size() > MAX_DOUBLE_PUSH_CHECK || MemoryControl.shortStatus()) this.double_push_check.clear();
            this.double_push_check.put(hash);

+            // increase dom counter
+            if (profile != null && profile.domMaxPages() != Integer.MAX_VALUE && profile.domMaxPages() > 0) {
+                profile.domInc(entry.url().getHost());
+            }
+            
            // add to index
            final int s = this.urlFileIndex.size();
            this.urlFileIndex.put(entry.toRow());
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@ -149,7 +149,8 @@ public final class CrawlStacker {

            // if the url was rejected we store it into the error URL db
            if (rejectReason != null) {
-                this.nextQueue.errorURL.push(entry, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle()));
+                this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
            }
        } catch (final Exception e) {
            CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
@ -341,30 +342,25 @@ public final class CrawlStacker {
            entry.url().getContentDomain() == ContentDomain.AUDIO  ||
            entry.url().getContentDomain() == ContentDomain.VIDEO ||
            entry.url().getContentDomain() == ContentDomain.CTRL) {
-            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry, this.robots);
+            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry, profile, this.robots);
            //if (warning != null && this.log.isFine()) this.log.logFine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning);
            return null;
        }
-        
-        // add domain to profile domain list
-        if (profile.domMaxPages() != Integer.MAX_VALUE && profile.domMaxPages() > 0) {
-            profile.domInc(entry.url().getHost());
-        }

        if (global) {
            // it may be possible that global == true and local == true, so do not check an error case against it
            if (proxy) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle());
            if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
-            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.GLOBAL, entry, this.robots);
+            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.GLOBAL, entry, profile, this.robots);
        } else if (local) {
            if (proxy) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle());
            if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
-            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, this.robots);
+            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots);
        } else if (proxy) {
            if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
-            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, this.robots);
+            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots);
        } else if (remote) {
-            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry, this.robots);
+            warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry, profile, this.robots);
        }
        if (warning != null && this.log.isFine()) this.log.fine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true) + " - not pushed: " + warning);

--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@ -63,7 +63,6 @@ import net.yacy.search.SwitchboardConstants;

 public class CrawlQueues {

-    public static final long queuedMinLoadDelay = 500;
    private static final String ERROR_DB_FILENAME = "urlError4.db";
    private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";

@ -588,6 +587,7 @@ public class CrawlQueues {
                        + this.noticeURL.stackSize(NoticedURL.StackType.REMOTE) + "]";
        try {
            final Request urlEntry = this.noticeURL.pop(NoticedURL.StackType.REMOTE, true, this.sb.crawler, this.sb.robots);
+            if (urlEntry == null) return false;
            final String profileHandle = urlEntry.profileHandle();
            // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
            // profileHandle = " + profileHandle + ", urlEntry.url = " +
@ -612,6 +612,7 @@ public class CrawlQueues {
        private Request request;
        private final Integer code;
        private final long start;
+        private final CrawlProfile profile;

        private Loader(final Request entry) {
            this.start = System.currentTimeMillis();
@ -619,6 +620,7 @@ public class CrawlQueues {
            this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED);
            this.code = Integer.valueOf(entry.hashCode());
            this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse
+            this.profile = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
        }

        private long age() {
@ -637,6 +639,7 @@ public class CrawlQueues {
                    //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
                    CrawlQueues.this.errorURL.push(
                            this.request,
+                            profile,
                            ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                            new Date(),
                            1,
@ -652,8 +655,7 @@ public class CrawlQueues {
                    // returns null if everything went fine, a fail reason string if a problem occurred
                    try {
                        this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
-                        final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
-                        final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
+                        final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                        if (response == null) {
                            this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                            if (CrawlQueues.this.log.isFine()) {
@ -677,6 +679,7 @@ public class CrawlQueues {
                    if (result != null) {
                        CrawlQueues.this.errorURL.push(
                                this.request,
+                                profile,
                                ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                                new Date(),
                                1,
@ -690,6 +693,7 @@ public class CrawlQueues {
            } catch (final Exception e) {
                CrawlQueues.this.errorURL.push(
                        this.request,
+                        profile,
                        ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                        new Date(),
                        1,
--- a/source/net/yacy/crawler/data/NoticedURL.java
+++ b/source/net/yacy/crawler/data/NoticedURL.java
@ -39,6 +39,7 @@ import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.storage.HandleSet;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
+import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.crawler.Balancer;
 import net.yacy.crawler.CrawlSwitchboard;
 import net.yacy.crawler.retrieval.Request;
@ -51,9 +52,6 @@ public class NoticedURL {
        LOCAL, GLOBAL, REMOTE, NOLOAD;
    }

-    private static final int minimumLocalDeltaInit  =  10; // the minimum time difference between access of the same local domain
-    public  static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain
-
    private Balancer coreStack;      // links found by crawling to depth-1
    private Balancer limitStack;     // links found by crawling at target depth
    private Balancer remoteStack;    // links from remote crawl orders
@ -65,11 +63,11 @@ public class NoticedURL {
            final boolean useTailCache,
            final boolean exceed134217727) {
        ConcurrentLog.info("NoticedURL", "CREATING STACKS at " + cachePath.toString());
-        this.coreStack = new Balancer(cachePath, "urlNoticeCoreStack", minimumLocalDeltaInit, minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
-        this.limitStack = new Balancer(cachePath, "urlNoticeLimitStack", minimumLocalDeltaInit, minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
+        this.coreStack = new Balancer(cachePath, "urlNoticeCoreStack", ClientIdentification.minimumLocalDeltaInit, ClientIdentification.minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
+        this.limitStack = new Balancer(cachePath, "urlNoticeLimitStack", ClientIdentification.minimumLocalDeltaInit, ClientIdentification.minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
        //overhangStack = new plasmaCrawlBalancer(overhangStackFile);
-        this.remoteStack = new Balancer(cachePath, "urlNoticeRemoteStack", minimumLocalDeltaInit, minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
-        this.noloadStack = new Balancer(cachePath, "urlNoticeNoLoadStack", minimumLocalDeltaInit, minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
+        this.remoteStack = new Balancer(cachePath, "urlNoticeRemoteStack", ClientIdentification.minimumLocalDeltaInit, ClientIdentification.minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
+        this.noloadStack = new Balancer(cachePath, "urlNoticeNoLoadStack", ClientIdentification.minimumLocalDeltaInit, ClientIdentification.minimumGlobalDeltaInit, myAgentIDs, useTailCache, exceed134217727);
    }

    public int getMinimumLocalDelta() {
@ -172,13 +170,13 @@ public class NoticedURL {
     * @param entry
     * @return null if this was successful or a String explaining what went wrong in case of an error
     */
-    public String push(final StackType stackType, final Request entry, final RobotsTxt robots) {
+    public String push(final StackType stackType, final Request entry, CrawlProfile profile, final RobotsTxt robots) {
        try {
            switch (stackType) {
-                case LOCAL: return this.coreStack.push(entry, robots);
-                case GLOBAL: return this.limitStack.push(entry, robots);
-                case REMOTE: return this.remoteStack.push(entry, robots);
-                case NOLOAD: return this.noloadStack.push(entry, robots);
+                case LOCAL: return this.coreStack.push(entry, profile, robots);
+                case GLOBAL: return this.limitStack.push(entry, profile, robots);
+                case REMOTE: return this.remoteStack.push(entry, profile, robots);
+                case NOLOAD: return this.noloadStack.push(entry, profile, robots);
                default: return "stack type unknown";
            }
        } catch (final Exception er) {
@ -271,7 +269,7 @@ public class NoticedURL {
        try {
            final Request entry = pop(fromStack, false, cs, robots);
            if (entry != null) {
-                final String warning = push(toStack, entry, robots);
+                final String warning = push(toStack, entry, null, robots);
                if (warning != null) {
                    ConcurrentLog.warn("NoticedURL", "shift from " + fromStack + " to " + toStack + ": " + warning);
                }
--- a/source/net/yacy/crawler/data/ZURL.java
+++ b/source/net/yacy/crawler/data/ZURL.java
@ -172,6 +172,7 @@ public class ZURL implements Iterable<ZURL.Entry> {

    public void push(
            final Request bentry,
+            final CrawlProfile profile,
            final byte[] executor,
            final Date workdate,
            final int workcount,
@ -190,7 +191,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
            // send the error to solr
            try {
-                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
+                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
                this.fulltext.getDefaultConnector().add(errorDoc);
            } catch (final IOException e) {
                ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@ -101,6 +101,7 @@ public class FTPLoader {
        // create new ftp client
        final FTPClient ftpClient = new FTPClient();

+        final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
        // get a connection
        if (openConnection(ftpClient, entryUrl)) {
            // test if the specified file is a directory
@ -130,7 +131,6 @@ public class FTPLoader {
                    final ResponseHeader responseHeader = new ResponseHeader(200);
                    responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
                    responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
-                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
                    response = new Response(
                            request,
                            requestHeader,
@ -156,7 +156,7 @@ public class FTPLoader {
        if (berr.size() > 0 || response == null) {
            // some error logging
            final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
-            this.sb.crawlQueues.errorURL.push(request, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
            throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
        }

--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@ -70,20 +70,20 @@ public final class HTTPLoader {
        this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000);
    }

-    public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    public Response load(final Request entry, CrawlProfile profile, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
        Latency.updateBeforeLoad(entry.url());
        final long start = System.currentTimeMillis();
-        final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
+        final Response doc = load(entry, profile, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
        Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
        return doc;
    }

-    private Response load(final Request request, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {

        byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash);

        if (retryCount < 0) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
            throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
        }

@ -99,7 +99,7 @@ public final class HTTPLoader {
        // check if url is in blacklist
        final String hostlow = host.toLowerCase();
        if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }

@ -146,7 +146,7 @@ public final class HTTPLoader {
            redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();

            if (redirectionUrlString.isEmpty()) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
                throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
            }

@ -160,32 +160,32 @@ public final class HTTPLoader {
            this.sb.webStructure.generateCitationReference(url, redirectionUrl);
            
            if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
            }

    	    if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
                // if we are already doing a shutdown we don't need to retry crawling
                if (Thread.currentThread().isInterrupted()) {
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
                    throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                }

                // check if the url was already loaded
                if (Cache.has(redirectionUrl.hash())) { // customer request
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
                    throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
                }

                // retry crawling with new url
                request.redirectURL(redirectionUrl);
-                return load(request, retryCount - 1, maxFileSize, blacklistType, timeout);
+                return load(request, profile, retryCount - 1, maxFileSize, blacklistType, timeout);
    	    }
            // we don't want to follow redirects
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
            throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        } else if (responseBody == null) {
    	    // no response, reject file
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
            throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
    	} else if (statusCode == 200 || statusCode == 203) {
            // the transfer is ok
@ -196,12 +196,11 @@ public final class HTTPLoader {

            // check length again in case it was not possible to get the length before loading
            if (maxFileSize >= 0 && contentLength > maxFileSize) {
-            	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
+            	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
            	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
            }

            // create a new cache entry
-            final CrawlProfile profile = request.profileHandle() == null ? null : this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
            response = new Response(
                    request,
                    requestHeader,
@ -214,7 +213,7 @@ public final class HTTPLoader {
            return response;
    	} else {
            // if the response has not the right response type then reject file
-        	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
+        	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
            throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        }
    }
--- a/Show More
+++ b/Show More