bugfixes and migration attempt toward new kelondroFlex db

- more synchronization
- bugfix for remove in collections
- bugfix in kelondroFlex (wrong exception condition!)
- options to use RAM, FLEX and TREE tables for Crawl URL stacker
- default for Crawl URL stacker is now FLEX (!)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2746 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 48f81acc0e
commit 6396f5971e

@ -3,11 +3,11 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.48
#releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseVersion=0.481
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
#releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseFileParentDir=yacy
releaseNr=$Revision$

@ -37,13 +37,13 @@ public class kelondroBytesIntMap {
this.ki = ki;
}
public int geti(byte[] key) throws IOException {
public synchronized int geti(byte[] key) throws IOException {
kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}
public int puti(byte[] key, int i) throws IOException {
public synchronized int puti(byte[] key, int i) throws IOException {
kelondroRow.Entry newentry = ki.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
@ -52,18 +52,18 @@ public class kelondroBytesIntMap {
return (int) oldentry.getColLong(1);
}
public int removei(byte[] key) throws IOException {
public synchronized int removei(byte[] key) throws IOException {
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.remove(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}
public int size() throws IOException {
public synchronized int size() throws IOException {
return ki.size();
}
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex
// col[0] = key
// col[1] = integer as {b265}

@ -408,7 +408,9 @@ public class kelondroCollectionIndex {
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
kelondroRowSet removedCollection = getdelete(indexrow, true, false);
index.remove(key);
return removedCollection;
}
}

@ -128,7 +128,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro
}
public synchronized void remove(int index) throws IOException {
if (index >= size()) throw new IOException("remove: index " + index + " out of bounds " + size());
if (index >= super.USAGE.allCount()) throw new IOException("remove: index " + index + " out of bounds " + super.USAGE.allCount());
// get the node at position index
Handle h = new Handle(index);

@ -155,7 +155,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return r;
}
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new rowIterator(up, rotating, firstKey);
}

@ -47,7 +47,7 @@ public class kelondroRAMIndex implements kelondroIndex {
return this.order;
}
public int size() {
public synchronized int size() {
return this.index.size();
}
@ -55,19 +55,19 @@ public class kelondroRAMIndex implements kelondroIndex {
return this.rowdef;
}
public Entry get(byte[] key) {
public synchronized Entry get(byte[] key) {
return (kelondroRow.Entry) index.get(key);
}
public Entry put(Entry row) {
public synchronized Entry put(Entry row) {
return (kelondroRow.Entry) index.put(row.getColBytes(0), row);
}
public Entry remove(byte[] key) {
public synchronized Entry remove(byte[] key) {
return (kelondroRow.Entry) index.remove(key);
}
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
return index.values().iterator();
}

@ -64,6 +64,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRAMIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
@ -75,6 +76,11 @@ import de.anomic.yacy.yacyCore;
public final class plasmaCrawlStacker {
// keys for different database types
public static final int QUEUE_DB_TYPE_RAM = 0;
public static final int QUEUE_DB_TYPE_TREE = 1;
public static final int QUEUE_DB_TYPE_FLEX = 2;
final WorkerPool theWorkerPool;
private GenericObjectPool.Config theWorkerPoolConfig = null;
final ThreadGroup theWorkerThreadGroup = new ThreadGroup("stackCrawlThreadGroup");
@ -83,10 +89,10 @@ public final class plasmaCrawlStacker {
//private boolean stopped = false;
private stackCrawlQueue queue;
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, boolean newdb) {
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, int dbtype) {
this.sb = sb;
this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, newdb);
this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, dbtype);
this.log.logInfo(this.queue.size() + " entries in the stackCrawl queue.");
this.log.logInfo("STACKCRAWL thread initialized.");
@ -248,7 +254,7 @@ public final class plasmaCrawlStacker {
public String stackCrawl(String nexturlString, String referrerString, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) throws InterruptedException {
// stacks a crawl item. The position can also be remote
// returns null if successful, a reason string if not successful
this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
long startTime = System.currentTimeMillis();
String reason = null; // failure reason
@ -384,8 +390,7 @@ public final class plasmaCrawlStacker {
boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
if ((dbocc != null) && (!(recrawl))) {
reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
//this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
return reason;
}
@ -597,9 +602,9 @@ public final class plasmaCrawlStacker {
private File cacheStacksPath;
private int bufferkb;
private long preloadTime;
private boolean newdb;
private int dbtype;
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boolean newdb) {
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, int dbtype) {
// init the read semaphore
this.readSync = new serverSemaphore (0);
@ -613,7 +618,7 @@ public final class plasmaCrawlStacker {
this.cacheStacksPath = cacheStacksPath;
this.bufferkb = bufferkb;
this.preloadTime = preloadTime;
this.newdb = newdb;
this.dbtype = dbtype;
openDB();
try {
@ -650,9 +655,13 @@ public final class plasmaCrawlStacker {
}
private void deleteDB() {
if (this.newdb) {
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
// do nothing..
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
kelondroFlexTable.delete(cacheStacksPath, "urlPreNotice1.table");
} else {
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.delete();
}
@ -661,7 +670,10 @@ public final class plasmaCrawlStacker {
private void openDB() {
if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path
if (this.newdb) {
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
this.urlEntryCache = new kelondroRAMIndex(kelondroBase64Order.enhancedCoder, plasmaCrawlNURL.rowdef);
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
String newCacheName = "urlPreNotice1.table";
cacheStacksPath.mkdirs();
try {
@ -670,7 +682,8 @@ public final class plasmaCrawlStacker {
e.printStackTrace();
System.exit(-1);
}
} else {
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef);

@ -606,7 +606,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
serverInstantThread.oneTimeJob(yc, "loadSeeds", yacyCore.log, 3000);
// initializing the stackCrawlThread
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, getConfigBool("useFlexTableForPreNURL", false));
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, (int) getConfigLong("tableTypeForPreNURL", 0));
//this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL);
//this.sbStackCrawlThread.start();

@ -497,7 +497,7 @@ xpstopw=true
80_indexing_busysleep=100
80_indexing_memprereq=2097152
82_crawlstack_idlesleep=5000
82_crawlstack_busysleep=10
82_crawlstack_busysleep=0
82_crawlstack_memprereq=1048576
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000
@ -815,10 +815,11 @@ currentSkin=
# temporary flag for new database structure. set only true for testing
# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
# table-types: RAM = 0, TREE = 1, FLEX = 2;
useCollectionIndex=false
useFlexTableForNURL=false
useFlexTableForEURL=true
useFlexTableForPreNURL=false
tableTypeForPreNURL=2
# flag to show surftipps on index.html page
showSurftipps = true

Loading…
Cancel
Save