From 069562a14d15b42a96c7815ed62eec78fd66cb21 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 15 Jun 2007 23:47:08 +0000 Subject: [PATCH] fixed problem with re-crawl; replaced error file-db with ram-db git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3900 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateIndexingQueue_p.java | 2 +- htroot/yacy/query.java | 4 ++-- source/de/anomic/plasma/plasmaCrawlStacker.java | 2 +- source/de/anomic/plasma/plasmaCrawlZURL.java | 7 +++++++ source/de/anomic/plasma/plasmaSwitchboard.java | 3 ++- source/de/anomic/server/serverAbstractSwitch.java | 5 ++++- source/de/anomic/yacy/yacyClient.java | 4 ++-- source/de/anomic/yacy/yacySeedDB.java | 2 +- 8 files changed, 20 insertions(+), 9 deletions(-) diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index 9d59f9d55..2fa92c213 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -65,7 +65,7 @@ public class IndexCreateIndexingQueue_p { plasmaSwitchboard switchboard = (plasmaSwitchboard) env; serverObjects prop = new serverObjects(); prop.put("rejected", 0); - int showRejectedCount = 10; + int showRejectedCount = 100; int showLimit = 100; if (post != null) { diff --git a/htroot/yacy/query.java b/htroot/yacy/query.java index 382100b7d..a57f07248 100644 --- a/htroot/yacy/query.java +++ b/htroot/yacy/query.java @@ -61,9 +61,9 @@ public final class query { // return variable that accumulates replacements final plasmaSwitchboard sb = (plasmaSwitchboard) ss; + if (sb == null) { return null; } final serverObjects prop = new serverObjects(); - if (prop == null || sb == null) { return null; } - + if ((sb.isRobinsonMode()) && (!sb.isPublicRobinson()) && (!sb.isInMyCluster((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP)))) { diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 8122f8fad..5dee78f64 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -383,7 +383,7 @@ public final class plasmaCrawlStacker { // apply recrawl rule if ((dbocc != null) && (!(recrawl))) { reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")"; - //this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); + this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); return reason; } diff --git a/source/de/anomic/plasma/plasmaCrawlZURL.java b/source/de/anomic/plasma/plasmaCrawlZURL.java index 48e449fde..6abf8ddd0 100644 --- a/source/de/anomic/plasma/plasmaCrawlZURL.java +++ b/source/de/anomic/plasma/plasmaCrawlZURL.java @@ -36,6 +36,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; +import de.anomic.kelondro.kelondroRowSet; import de.anomic.net.URL; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; @@ -57,10 +58,16 @@ public class plasmaCrawlZURL { private LinkedList rejectedStack = new LinkedList(); // strings: url public plasmaCrawlZURL(File cachePath, String tablename) { + // creates a new ZURL in a file cachePath.mkdirs(); urlIndexFile = new kelondroFlexTable(cachePath, tablename, -1, rowdef, true); } + public plasmaCrawlZURL() { + // creates a new ZUR in RAM + urlIndexFile = new kelondroRowSet(rowdef, 0); + } + public int size() { return urlIndexFile.size() ; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 38db5cdf8..12ab7bd42 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1067,7 +1067,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // start indexing management log.logConfig("Starting Indexing Management"); noticeURL = new plasmaCrawlNURL(plasmaPath); - errorURL = new plasmaCrawlZURL(plasmaPath, "urlError.db"); + errorURL = new plasmaCrawlZURL(); // fresh error DB each startup; can be hold in RAM and reduces IO; + //errorURL = new plasmaCrawlZURL(plasmaPath, "urlError.db"); delegatedURL = new plasmaCrawlZURL(plasmaPath, "urlDelegated.db"); wordIndex = new plasmaWordIndex(indexPrimaryPath, indexSecondaryPath, ramRWI_time, log); diff --git a/source/de/anomic/server/serverAbstractSwitch.java b/source/de/anomic/server/serverAbstractSwitch.java index 51258a935..271d2613a 100644 --- a/source/de/anomic/server/serverAbstractSwitch.java +++ b/source/de/anomic/server/serverAbstractSwitch.java @@ -43,6 +43,7 @@ package de.anomic.server; import java.io.File; import java.io.IOException; import java.net.InetAddress; +import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -156,7 +157,9 @@ public abstract class serverAbstractSwitch implements serverSwitch { access.put(new Long(System.currentTimeMillis()), accessPath); // write back to tracker - accessTracker.put(host, clearTooOldAccess(access)); + try { + accessTracker.put(host, clearTooOldAccess(access)); + } catch (ConcurrentModificationException e) {}; } public TreeMap accessTrack(String host) { diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 1448072b0..7ae334806 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -131,7 +131,7 @@ public final class yacyClient { result = nxTools.table( httpc.wput(url, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", - 105000, + 12000, null, null, (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, @@ -275,7 +275,7 @@ public final class yacyClient { "&env=" + seedHash ), target.getHexHash() + ".yacyh", - 10000, + 8000, null, null, (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 1545a01ba..b662df43d 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -255,7 +255,7 @@ public final class yacySeedDB { // address has therefore the form // address ::= ('.yacy'|'.yacyh'){'='{':'