fixed problem with re-crawl; replaced error file-db with ram-db

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3900 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent c7a614830a
commit 069562a14d

@ -65,7 +65,7 @@ public class IndexCreateIndexingQueue_p {
plasmaSwitchboard switchboard = (plasmaSwitchboard) env; plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
prop.put("rejected", 0); prop.put("rejected", 0);
int showRejectedCount = 10; int showRejectedCount = 100;
int showLimit = 100; int showLimit = 100;
if (post != null) { if (post != null) {

@ -61,9 +61,9 @@ public final class query {
// return variable that accumulates replacements // return variable that accumulates replacements
final plasmaSwitchboard sb = (plasmaSwitchboard) ss; final plasmaSwitchboard sb = (plasmaSwitchboard) ss;
if (sb == null) { return null; }
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
if (prop == null || sb == null) { return null; }
if ((sb.isRobinsonMode()) && if ((sb.isRobinsonMode()) &&
(!sb.isPublicRobinson()) && (!sb.isPublicRobinson()) &&
(!sb.isInMyCluster((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP)))) { (!sb.isInMyCluster((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP)))) {

@ -383,7 +383,7 @@ public final class plasmaCrawlStacker {
// apply recrawl rule // apply recrawl rule
if ((dbocc != null) && (!(recrawl))) { if ((dbocc != null) && (!(recrawl))) {
reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")"; reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
//this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
return reason; return reason;
} }

@ -36,6 +36,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.net.URL; import de.anomic.net.URL;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
@ -57,10 +58,16 @@ public class plasmaCrawlZURL {
private LinkedList rejectedStack = new LinkedList(); // strings: url private LinkedList rejectedStack = new LinkedList(); // strings: url
public plasmaCrawlZURL(File cachePath, String tablename) { public plasmaCrawlZURL(File cachePath, String tablename) {
// creates a new ZURL in a file
cachePath.mkdirs(); cachePath.mkdirs();
urlIndexFile = new kelondroFlexTable(cachePath, tablename, -1, rowdef, true); urlIndexFile = new kelondroFlexTable(cachePath, tablename, -1, rowdef, true);
} }
public plasmaCrawlZURL() {
// creates a new ZUR in RAM
urlIndexFile = new kelondroRowSet(rowdef, 0);
}
public int size() { public int size() {
return urlIndexFile.size() ; return urlIndexFile.size() ;
} }

@ -1067,7 +1067,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start indexing management // start indexing management
log.logConfig("Starting Indexing Management"); log.logConfig("Starting Indexing Management");
noticeURL = new plasmaCrawlNURL(plasmaPath); noticeURL = new plasmaCrawlNURL(plasmaPath);
errorURL = new plasmaCrawlZURL(plasmaPath, "urlError.db"); errorURL = new plasmaCrawlZURL(); // fresh error DB each startup; can be hold in RAM and reduces IO;
//errorURL = new plasmaCrawlZURL(plasmaPath, "urlError.db");
delegatedURL = new plasmaCrawlZURL(plasmaPath, "urlDelegated.db"); delegatedURL = new plasmaCrawlZURL(plasmaPath, "urlDelegated.db");
wordIndex = new plasmaWordIndex(indexPrimaryPath, indexSecondaryPath, ramRWI_time, log); wordIndex = new plasmaWordIndex(indexPrimaryPath, indexSecondaryPath, ramRWI_time, log);

@ -43,6 +43,7 @@ package de.anomic.server;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.InetAddress; import java.net.InetAddress;
import java.util.ConcurrentModificationException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
@ -156,7 +157,9 @@ public abstract class serverAbstractSwitch implements serverSwitch {
access.put(new Long(System.currentTimeMillis()), accessPath); access.put(new Long(System.currentTimeMillis()), accessPath);
// write back to tracker // write back to tracker
accessTracker.put(host, clearTooOldAccess(access)); try {
accessTracker.put(host, clearTooOldAccess(access));
} catch (ConcurrentModificationException e) {};
} }
public TreeMap accessTrack(String host) { public TreeMap accessTrack(String host) {

@ -131,7 +131,7 @@ public final class yacyClient {
result = nxTools.table( result = nxTools.table(
httpc.wput(url, httpc.wput(url,
yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh",
105000, 12000,
null, null,
null, null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
@ -275,7 +275,7 @@ public final class yacyClient {
"&env=" + seedHash "&env=" + seedHash
), ),
target.getHexHash() + ".yacyh", target.getHexHash() + ".yacyh",
10000, 8000,
null, null,
null, null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,

@ -255,7 +255,7 @@ public final class yacySeedDB {
// address has therefore the form // address has therefore the form
// address ::= (<peername>'.yacy'|<peerhexhash>'.yacyh'){'='<ip>{':'<port}} // address ::= (<peername>'.yacy'|<peerhexhash>'.yacyh'){'='<ip>{':'<port}}
// clusterdef ::= {address}{','address}* // clusterdef ::= {address}{','address}*
String[] addresses = clusterdefinition.split(","); String[] addresses = (clusterdefinition.length() == 0) ? new String[0] : clusterdefinition.split(",");
TreeMap clustermap = new TreeMap(kelondroBase64Order.enhancedCoder); TreeMap clustermap = new TreeMap(kelondroBase64Order.enhancedCoder);
yacySeed seed; yacySeed seed;
String hash, yacydom, ipport; String hash, yacydom, ipport;

Loading…
Cancel
Save