diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index 510f16b03..75075c106 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -59,7 +59,6 @@ import net.yacy.search.index.Fulltext; import net.yacy.search.index.Segment.ReferenceReport; import net.yacy.search.index.Segment.ReferenceReportCache; import net.yacy.search.query.QueryParams; -import net.yacy.search.schema.CollectionConfiguration.FailDoc; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -466,7 +465,7 @@ public class HostBrowser { prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false"); StoreType type = (StoreType) entry.getValue(); try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;} - HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash(), true); + HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS); boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED; boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/"); @@ -482,8 +481,8 @@ public class HostBrowser { FailType failType = errorDocs.get(entry.getKey()); if (failType == null) { // maybe this is only in the errorURL - FailDoc faildoc = sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())); - prop.putHTML("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS && faildoc != null ? faildoc.getFailReason() : "unknown error"); + //Metadata faildoc = sb.index.fulltext().getDefaultConnector().getMetadata(ASCII.String(uri.hash())); + prop.putHTML("files_list_" + c + "_type_stored_error", "unknown error"); } else { String ids = ASCII.String(uri.hash()); InfoCacheEntry ice = infoCache.get(ids); diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java index 0dcca5efd..e5448fb43 100644 --- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java +++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java @@ -32,6 +32,7 @@ import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.MirrorSolrConnector; import net.yacy.cora.federate.solr.connector.RemoteSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; +import net.yacy.kelondro.util.MemoryControl; public class InstanceMirror { @@ -165,7 +166,8 @@ public class InstanceMirror { if (msc != null) return msc; EmbeddedSolrConnector esc = getEmbeddedConnector(corename); RemoteSolrConnector rsc = getRemoteConnector(corename); - msc = new ConcurrentUpdateSolrConnector(new MirrorSolrConnector(esc, rsc), RemoteInstance.queueSizeByMemory(), 10000, Runtime.getRuntime().availableProcessors()); + int cacheSize = (int) (MemoryControl.available() / 30000); // will return about 10000 for standard ram size + msc = new ConcurrentUpdateSolrConnector(new MirrorSolrConnector(esc, rsc), RemoteInstance.queueSizeByMemory(), cacheSize, Runtime.getRuntime().availableProcessors()); //msc = new MirrorSolrConnector(esc, rsc); this.mirrorConnectorCache.put(corename, msc); return msc; diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index a1f33d4da..e0e6a30b8 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -43,6 +43,7 @@ import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.solr.FailCategory; +import net.yacy.cora.federate.solr.connector.SolrConnector.Metadata; import net.yacy.cora.order.Base64Order; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.ftp.FTPClient; @@ -60,7 +61,6 @@ import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.FilterEngine; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.schema.CollectionConfiguration; public final class CrawlStacker { @@ -379,22 +379,27 @@ public final class CrawlStacker { public String checkAcceptanceInitially(final DigestURL url, final CrawlProfile profile) { // check if the url is double registered - final HarvestProcess dbocc = this.nextQueue.exists(url.hash(), false); // returns the name of the queue if entry exists + final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists if (dbocc != null) { return "double in: " + dbocc.name(); } String urlhash = ASCII.String(url.hash()); - final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash); - final Date oldDate = errorEntry == null ? null : errorEntry.getFailDate(); + Metadata oldEntry = null; + try { + oldEntry = this.indexSegment.fulltext().getDefaultConnector().getMetadata(urlhash); + } catch (IOException e) { + ConcurrentLog.logException(e); + } + final Long oldDate = oldEntry == null ? null : oldEntry.date; if (oldDate == null) { return null; // no evidence that we know that url } - final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime(); + final boolean recrawl = profile.recrawlIfOlder() > oldDate.longValue(); final String urlstring = url.toString(); if (recrawl) { if (CrawlStacker.log.isInfo()) CrawlStacker.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " + - ((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago."); + ((System.currentTimeMillis() - oldDate.longValue()) / 60000 / 60 / 24) + " days ago."); } else { return "double in: LURL-DB, oldDate = " + oldDate.toString(); } diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index d7f16c881..55c938502 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -143,7 +143,7 @@ public class CrawlQueues { * @param hash * @return if the hash exists, the name of the database is returned, otherwise null is returned */ - public HarvestProcess exists(final byte[] hash, final boolean checkErrorCache) { + public HarvestProcess exists(final byte[] hash) { if (this.delegatedURL.containsKey(ASCII.String(hash))) { return HarvestProcess.DELEGATED; } @@ -155,9 +155,6 @@ public class CrawlQueues { return HarvestProcess.WORKER; } } - if (checkErrorCache && this.errorURL.exists(hash)) { - return HarvestProcess.ERRORS; - } return null; } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 0e2daa6f3..8a5c122f4 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -98,6 +98,7 @@ import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.solr.FailCategory; import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.SchemaConfiguration; +import net.yacy.cora.federate.solr.connector.SolrConnector.Metadata; import net.yacy.cora.federate.solr.instance.RemoteInstance; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.order.Base64Order; @@ -1616,7 +1617,16 @@ public final class Switchboard extends serverSwitch { */ public HarvestProcess urlExists(final String hash) { if (this.index.getLoadTime(hash) >= 0) return HarvestProcess.LOADED; - return this.crawlQueues.exists(ASCII.getBytes(hash), true); + HarvestProcess hp = this.crawlQueues.exists(ASCII.getBytes(hash)); + if (hp != null) return hp; + try { + Metadata md = this.index.fulltext().getDefaultConnector().getMetadata(hash); + if (md == null) return null; + return HarvestProcess.LOADED; // todo: can also be in error + } catch (IOException e) { + ConcurrentLog.logException(e); + return null; + } } public void urlRemove(final Segment segment, final byte[] hash) { diff --git a/source/net/yacy/search/index/ErrorCache.java b/source/net/yacy/search/index/ErrorCache.java index 29a21cdae..85235123a 100644 --- a/source/net/yacy/search/index/ErrorCache.java +++ b/source/net/yacy/search/index/ErrorCache.java @@ -167,7 +167,8 @@ public class ErrorCache { } return l; } - + + /* public CollectionConfiguration.FailDoc get(final String urlhash) { CollectionConfiguration.FailDoc failDoc = null; synchronized (this.cache) { @@ -185,7 +186,6 @@ public class ErrorCache { return null; } } - public boolean exists(final byte[] urlHash) { String urlHashString = ASCII.String(urlHash); try { @@ -200,7 +200,7 @@ public class ErrorCache { return false; } } - +*/ public void clearStack() { synchronized (this.cache) { this.cache.clear();