diff --git a/build.properties b/build.properties index 8b097ed2d..25d954c73 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.481 +releaseVersion=0.482 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index eaa44ca34..b85223a75 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -369,7 +369,7 @@ public class dir { phrase.length(), /*size*/ condenser.RESULT_NUMB_WORDS ); - switchboard.urlPool.loadedURL.store(newEntry, false); + switchboard.urlPool.loadedURL.store(newEntry); switchboard.urlPool.loadedURL.stack( newEntry, "____________", /*initiator*/ diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index aed450308..eb29cf5f6 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -130,7 +130,7 @@ public final class crawlReceipt { "\n\tURL properties: "+ propStr); } else try { // put new entry into database - switchboard.urlPool.loadedURL.store(entry, false); + switchboard.urlPool.loadedURL.store(entry); switchboard.urlPool.loadedURL.stack(entry, youare, iam, 1); // generating url hash diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 281fd48da..9ae72dfb7 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -106,7 +106,7 @@ public final class transferURL { lEntry = null; blocked++; } else try { - sb.urlPool.loadedURL.store(lEntry, true); + sb.urlPool.loadedURL.store(lEntry); sb.urlPool.loadedURL.stack(lEntry, iam, iam, 3); yacyCore.log.logFine("transferURL: received URL '" + lEntry.url() + "' from peer " + otherPeerName); received++; diff --git a/source/dbtest.java b/source/dbtest.java index bc7f5a5ba..c943be5af 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -139,14 +139,14 @@ public class dbtest { try { final kelondroRow.Entry entryBytes = getTable().get(entry.getKey()); if (entryBytes != null) { - System.out.println("ENTRY=" + entryBytes.getColString(1, null)); + //System.out.println("ENTRY=" + entryBytes.getColString(1, null)); final STEntry dbEntry = new STEntry(entryBytes.getColBytes(0), entryBytes.getColBytes(1)); if (!dbEntry.isValid()) { System.out.println("INVALID: " + dbEntry); - } else { + }/* else { System.out.println("_VALID_: " + dbEntry); getTable().remove(entry.getKey()); - } + }*/ } } catch (IOException e) { System.err.println(e); @@ -572,6 +572,10 @@ final class dbTable implements kelondroIndex { } } + public kelondroRow.Entry removeOne() { + return null; + } + public Iterator rows(boolean up, boolean rotating, byte[] startKey) throws IOException { // Objects are of type byte[][] return null; @@ -595,6 +599,15 @@ final class dbTable implements kelondroIndex { public kelondroOrder order() { return this.order; } + + public int primarykey() { + return 0; + } + + public kelondroProfile profile() { + return new kelondroProfile(); + } + } diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index df6f3d3f8..60a9f6158 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -301,7 +301,7 @@ public class indexContainer extends kelondroRowSet { System.out.println("DEBUG: JOIN METHOD BY ENUMERATION"); indexContainer conj = new indexContainer(null); // start with empty search result if (!((i1.order().signature().equals(i2.order().signature())) && - (i1.orderColumn() == i2.orderColumn()))) return conj; // ordering must be equal + (i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal Iterator e1 = i1.entries(); Iterator e2 = i2.entries(); int c; diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java index 12240d564..c92380fc5 100644 --- a/source/de/anomic/index/indexURL.java +++ b/source/de/anomic/index/indexURL.java @@ -36,8 +36,6 @@ import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroIndex; -import de.anomic.kelondro.kelondroRAMIndex; -import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.net.URL; import de.anomic.server.serverByteBuffer; @@ -412,48 +410,24 @@ public class indexURL { // the class object - protected kelondroIndex urlIndexFile = null; - protected kelondroRAMIndex urlIndexCache = null; + protected kelondroIndex urlIndexFile = null; public indexURL() { urlIndexFile = null; - urlIndexCache = null; } public int size() { try { - return urlIndexFile.size() + ((urlIndexCache == null) ? 0 : urlIndexCache.size()); + return urlIndexFile.size() ; } catch (IOException e) { return 0; } } - - public void flushCacheSome() { - if (urlIndexCache == null) return; - if (urlIndexCache.size() == 0) return; - int flush = Math.max(1, urlIndexCache.size() / 10); - while (flush-- > 0) flushCacheOnce(); - } - - public void flushCacheOnce() { - if (urlIndexCache == null) return; - if (urlIndexCache.size() == 0) return; - synchronized (urlIndexCache) { - Iterator i = urlIndexCache.rows(true, false, null); - if (i.hasNext()) try { - urlIndexFile.put((kelondroRow.Entry) i.next()); - i.remove(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } public boolean remove(String hash) { if (hash == null) return false; try { urlIndexFile.remove(hash.getBytes()); - if (urlIndexCache != null) synchronized (urlIndexCache) {urlIndexCache.remove(hash.getBytes());} return true; } catch (IOException e) { return false; @@ -461,19 +435,10 @@ public class indexURL { } public void close() throws IOException { - while ((urlIndexCache != null) && (urlIndexCache.size() > 0)) flushCacheOnce(); if (urlIndexFile != null) { urlIndexFile.close(); urlIndexFile = null; } - if (urlIndexCache != null) { - urlIndexCache.close(); - urlIndexCache = null; - } - } - - public int writeCacheSize() { - return (urlIndexCache == null) ? 0 : urlIndexCache.size(); } public int cacheNodeChunkSize() { diff --git a/source/de/anomic/kelondro/kelondroBufferedIndex.java b/source/de/anomic/kelondro/kelondroBufferedIndex.java new file mode 100644 index 000000000..bfda75832 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroBufferedIndex.java @@ -0,0 +1,197 @@ +// kelondroBufferedIndex.java +// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany +// first published 16.10.2006 on http://www.anomic.de +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.IOException; +import java.util.Date; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +import de.anomic.server.serverMemory; + +public class kelondroBufferedIndex implements kelondroIndex { + + // this implements a write buffer on index objects + + private static final long memBlockLimit = 2000000; // do not fill cache further if the amount of available memory is less that this + private static final int bufferFlushLimit = 10000; + private static final int bufferFlushMinimum = 1000; + private TreeMap buffer; + private kelondroIndex index; + + public kelondroBufferedIndex(kelondroIndex theIndex) { + index = theIndex; + buffer = (theIndex.order() == null) ? new TreeMap() : new TreeMap(theIndex.order()); + } + + public synchronized void flush() throws IOException { + if (buffer.size() == 0) return; + Iterator i = buffer.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + index.put((kelondroRow.Entry) entry.getValue()); + } + buffer.clear(); + } + + public synchronized void flushOnce() throws IOException { + if (buffer.size() == 0) return; + Iterator i = buffer.entrySet().iterator(); + Map.Entry entry; + if (i.hasNext()) { + entry = (Map.Entry) i.next(); + index.put((kelondroRow.Entry) entry.getValue()); + } + } + + public void flushSome() throws IOException { + if (buffer.size() == 0) return; + int flush = Math.max(1, buffer.size() / 10); + while (flush-- > 0) flushOnce(); + } + + public synchronized int size() throws IOException { + return buffer.size() + index.size(); + } + + public int writeBufferSize() { + return buffer.size(); + } + + public synchronized String toString() { + try {flush();} catch (IOException e) {} + return index.toString(); + } + + public synchronized kelondroRow.Entry get(byte[] key) throws IOException { + long handle = index.profile().startRead(); + kelondroRow.Entry entry = null; + entry = (kelondroRow.Entry) buffer.get(key); + if (entry == null) entry = index.get(key); + index.profile().stopRead(handle); + return entry; + } + + public synchronized void add(kelondroRow.Entry newentry) throws IOException { + assert (index instanceof kelondroRowSet); + ((kelondroRowSet) index).add(newentry); + } + + public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { + return put(row); + } + + public synchronized kelondroRow.Entry put(kelondroRow.Entry newentry) throws IOException { + long handle = index.profile().startWrite(); + byte[] key = newentry.getColBytes(index.primarykey()); + kelondroRow.Entry oldentry = null; + oldentry = (kelondroRow.Entry) buffer.get(key); + if (oldentry == null) { + // try the collection + oldentry = index.get(key); + if (oldentry == null) { + // this was not anywhere + buffer.put(key, newentry); + if (((buffer.size() > bufferFlushMinimum) && (serverMemory.available() > memBlockLimit)) + || (buffer.size() > bufferFlushLimit)) + flush(); + } else { + // replace old entry + index.put(newentry); + } + } else { + // the entry is already in buffer + // simply replace old entry + buffer.put(key, newentry); + } + index.profile().stopWrite(handle); + return oldentry; + } + + public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { + long handle = index.profile().startDelete(); + kelondroRow.Entry oldentry = null; + oldentry = (kelondroRow.Entry) buffer.remove(key); + if (oldentry == null) { + // try the collection + return index.remove(key); + } + index.profile().stopDelete(handle); + return oldentry; + } + + public synchronized kelondroRow.Entry removeOne() throws IOException { + long handle = index.profile().startDelete(); + if (buffer.size() > 0) { + byte[] key = (byte[]) buffer.keySet().iterator().next(); + kelondroRow.Entry entry = (kelondroRow.Entry) buffer.remove(key); + index.profile().stopDelete(handle); + return entry; + } else { + kelondroRow.Entry entry = index.removeOne(); + index.profile().stopDelete(handle); + return entry; + } + } + + public kelondroProfile profile() { + return index.profile(); + } + + public synchronized void close() throws IOException { + flush(); + buffer = null; + index.close(); + } + + public kelondroOrder order() { + return index.order(); + } + + public int primarykey() { + return index.primarykey(); + } + + public kelondroRow row() throws IOException { + return index.row(); + } + + public synchronized Iterator rows() throws IOException { + return rows(true, false, null); + } + + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + flush(); + return index.rows(up, rotating, firstKey); + } + + public static kelondroBufferedIndex getRAMIndex(kelondroRow rowdef, int initSize) { + return new kelondroBufferedIndex(new kelondroRowSet(rowdef, kelondroNaturalOrder.naturalOrder, 0, initSize)); + } +} diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index f5894550e..3fa5837d4 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -61,6 +61,13 @@ public class kelondroBytesIntMap { return (int) indexentry.getColLong(1); } + public synchronized int removeonei() throws IOException { + if (ki.size() == 0) return -1; + kelondroRow.Entry indexentry = ki.removeOne(); + if (indexentry == null) return -1; + return (int) indexentry.getColLong(1); + } + public synchronized int size() throws IOException { return ki.size(); } @@ -76,4 +83,8 @@ public class kelondroBytesIntMap { return ki.order(); } + public kelondroProfile profile() { + return ki.profile(); + } + } diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index ca03e9135..2932f8d19 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -99,7 +99,8 @@ public class kelondroCollectionIndex { boolean ramIndexGeneration = false; boolean fileIndexGeneration = !(new File(path, filenameStub + ".index").exists()); - if (ramIndexGeneration) index = new kelondroRAMIndex(indexOrder, indexRow()); + //if (ramIndexGeneration) index = new kelondroRAMIndex(indexOrder, indexRow()); + if (ramIndexGeneration) index = new kelondroBufferedIndex(new kelondroRowSet(indexRow(), indexOrder, 0, 0)); if (fileIndexGeneration) index = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(), indexOrder); // open array files diff --git a/source/de/anomic/kelondro/kelondroFlexSplitTable.java b/source/de/anomic/kelondro/kelondroFlexSplitTable.java index 8970f5b70..2a7442191 100644 --- a/source/de/anomic/kelondro/kelondroFlexSplitTable.java +++ b/source/de/anomic/kelondro/kelondroFlexSplitTable.java @@ -93,6 +93,10 @@ public class kelondroFlexSplitTable implements kelondroIndex { return this.objectOrder; } + public int primarykey() { + return 0; + } + public synchronized int size() throws IOException { Iterator i = tables.values().iterator(); int s = 0; @@ -102,6 +106,14 @@ public class kelondroFlexSplitTable implements kelondroIndex { return s; } + public synchronized kelondroProfile profile() { + kelondroProfile[] profiles = new kelondroProfile[tables.size()]; + Iterator i = tables.values().iterator(); + int c = 0; + while (i.hasNext()) profiles[c++] = ((kelondroFlexTable) i.next()).profile(); + return kelondroProfile.consolidate(profiles); + } + public kelondroRow row() throws IOException { return this.rowdef; } @@ -150,6 +162,24 @@ public class kelondroFlexSplitTable implements kelondroIndex { return null; } + public synchronized kelondroRow.Entry removeOne() throws IOException { + Iterator i = tables.values().iterator(); + kelondroFlexTable table, maxtable = null; + int maxcount = -1; + while (i.hasNext()) { + table = (kelondroFlexTable) i.next(); + if (table.size() > maxcount) { + maxtable = table; + maxcount = table.size(); + } + } + if (maxtable == null) { + return null; + } else { + return maxtable.removeOne(); + } + } + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { return new rowIter(); } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 18ca3b2aa..6712eddef 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -80,7 +80,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException { - kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0); + kelondroBufferedIndex ri = new kelondroBufferedIndex(new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0)); //kelondroRowSet ri = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), 0); //ri.setOrdering(objectOrder, 0); Iterator content = super.col[0].contentNodes(-1); @@ -93,7 +93,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr indexentry = ri.row().newEntry(); indexentry.setCol(0, node.getValueRow()); indexentry.setCol(1, i); - ri.put(indexentry); + ri.add(indexentry); if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); @@ -101,7 +101,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } System.out.print(" -ordering- "); System.out.flush(); - ri.trim(); + ri.flush(); return ri; } @@ -144,23 +144,32 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { - int i = index.geti(row.getColBytes(0)); - if (i < 0) { - index.puti(row.getColBytes(0), super.add(row)); - return null; - } - return super.set(i, row); + int i = index.geti(row.getColBytes(0)); + if (i < 0) { + index.puti(row.getColBytes(0), super.add(row)); + return null; + } + return super.set(i, row); } public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { - int i = index.removei(key); - if (i < 0) return null; - kelondroRow.Entry r; - r = super.get(i); - super.remove(i); - return r; + int i = index.removei(key); + if (i < 0) return null; + kelondroRow.Entry r; + r = super.get(i); + super.remove(i); + return r; } + public synchronized kelondroRow.Entry removeOne() throws IOException { + int i = index.removeonei(); + if (i < 0) return null; + kelondroRow.Entry r; + r = super.get(i); + super.remove(i); + return r; + } + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { return new rowIterator(up, rotating, firstKey); } @@ -198,4 +207,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return index.order(); } + public int primarykey() { + return 0; + } + + public kelondroProfile profile() { + return index.profile(); + } + } diff --git a/source/de/anomic/kelondro/kelondroIndex.java b/source/de/anomic/kelondro/kelondroIndex.java index ca426cfb7..ab4fc76c2 100644 --- a/source/de/anomic/kelondro/kelondroIndex.java +++ b/source/de/anomic/kelondro/kelondroIndex.java @@ -57,12 +57,15 @@ import java.util.Iterator; public interface kelondroIndex { public kelondroOrder order(); + public int primarykey(); public int size() throws IOException; + public kelondroProfile profile(); public kelondroRow row() throws IOException; public kelondroRow.Entry get(byte[] key) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException; public kelondroRow.Entry remove(byte[] key) throws IOException; + public kelondroRow.Entry removeOne() throws IOException; public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException; public void close() throws IOException; } diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index e6250422b..99c5a9137 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -24,36 +24,95 @@ package de.anomic.kelondro; +import java.io.IOException; +import java.util.Iterator; + //import java.util.Random; -public class kelondroIntBytesMap extends kelondroRowBufferedSet { +public class kelondroIntBytesMap { + private kelondroBufferedIndex index; + public kelondroIntBytesMap(int payloadSize, int initSize) { - super(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), kelondroNaturalOrder.naturalOrder, 0, initSize); + index = kelondroBufferedIndex.getRAMIndex(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize); + } + + public int size() { + try { + return index.size(); + } catch (IOException e) { + return 0; + } } public byte[] getb(int ii) { - kelondroRow.Entry indexentry = super.get(kelondroNaturalOrder.encodeLong((long) ii, 4)); + kelondroRow.Entry indexentry; + try {indexentry = index.get(kelondroNaturalOrder.encodeLong((long) ii, 4));} catch (IOException e) {return null;} if (indexentry == null) return null; return indexentry.getColBytes(1); } + public void addb(int ii, byte[] value) { + kelondroRow.Entry newentry; + try { + newentry = index.row().newEntry(); + newentry.setCol(0, (long) ii); + newentry.setCol(1, value); + index.add(newentry); + } catch (IOException e) {} + } + + public byte[] putb(int ii, byte[] value) { - kelondroRow.Entry newentry = super.row().newEntry(); - newentry.setCol(0, (long) ii); - newentry.setCol(1, value); - kelondroRow.Entry oldentry = super.put(newentry); - if (oldentry == null) return null; - return oldentry.getColBytes(1); + kelondroRow.Entry newentry; + try { + newentry = index.row().newEntry(); + newentry.setCol(0, (long) ii); + newentry.setCol(1, value); + kelondroRow.Entry oldentry = index.put(newentry); + if (oldentry == null) return null; + return oldentry.getColBytes(1); + } catch (IOException e) { + return null; + } } public byte[] removeb(int ii) { - if (size() == 0) { + try { + if (index.size() == 0) return null; + kelondroRow.Entry indexentry = index.remove(kelondroNaturalOrder.encodeLong((long) ii, 4)); + if (indexentry == null) return null; + return indexentry.getColBytes(1); + } catch (IOException e) { return null; } - kelondroRow.Entry indexentry = super.remove(kelondroNaturalOrder.encodeLong((long) ii, 4)); - if (indexentry == null) return null; - return indexentry.getColBytes(1); + } + + public byte[] removeoneb() { + try { + if (index.size() == 0) return null; + kelondroRow.Entry indexentry = index.removeOne(); + if (indexentry == null) return null; + return indexentry.getColBytes(1); + } catch (IOException e) { + return null; + } + } + + public Iterator rows() { + try { + return index.rows(); + } catch (IOException e) { + return null; + } + } + + public void flush() { + try {index.flush();} catch (IOException e) {} + } + + public kelondroProfile profile() { + return index.profile(); } public static void main(String[] args) { diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index b7792215f..9e85aa4f8 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -38,17 +38,23 @@ public class kelondroRAMIndex implements kelondroIndex { private TreeMap index; private kelondroOrder order; private kelondroRow rowdef; + private kelondroProfile profile; public kelondroRAMIndex(kelondroOrder defaultOrder, kelondroRow rowdef) { this.index = new TreeMap(defaultOrder); this.order = defaultOrder; this.rowdef = rowdef; + this.profile = new kelondroProfile(); } public kelondroOrder order() { return this.order; } + public int primarykey() { + return 0; + } + public synchronized int size() { return this.index.size(); } @@ -73,6 +79,11 @@ public class kelondroRAMIndex implements kelondroIndex { return (kelondroRow.Entry) index.remove(key); } + public synchronized Entry removeOne() { + if (this.index.size() == 0) return null; + return remove((byte[]) index.keySet().iterator().next()); + } + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) { return index.values().iterator(); } @@ -81,4 +92,8 @@ public class kelondroRAMIndex implements kelondroIndex { index = null; } + public kelondroProfile profile() { + return profile; + } + } diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 2e9f4e72f..963dc0314 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -433,10 +433,10 @@ public class kelondroRecords { Node n; while ((System.currentTimeMillis() < stop) && (cacheHeaders.size() < cacheSize) && (i.hasNext())) { n = (Node) i.next(); - cacheHeaders.putb(n.handle.index, n.headChunk); + cacheHeaders.addb(n.handle.index, n.headChunk); count++; } - cacheHeaders.trim(); + cacheHeaders.flush(); logFine("preloaded " + count + " records into cache"); } catch (kelondroException e) { // the contentNodes iterator had a time-out; we don't do a preload @@ -831,7 +831,7 @@ public class kelondroRecords { if ((cacheHeaders.size() < cacheSize) && (serverMemory.available() >= memBlock)) return true; // no need to flush cache space // just delete any of the entries - cacheHeaders.removeOne(); + cacheHeaders.removeoneb(); cacheFlush++; return true; } @@ -1332,4 +1332,9 @@ public class kelondroRecords { entryFile.profile() }; } + + public kelondroProfile profile() { + return kelondroProfile.consolidate(profiles()); + } + } diff --git a/source/de/anomic/kelondro/kelondroRowBufferedSet.java b/source/de/anomic/kelondro/kelondroRowBufferedSet.java deleted file mode 100644 index 13886ac64..000000000 --- a/source/de/anomic/kelondro/kelondroRowBufferedSet.java +++ /dev/null @@ -1,181 +0,0 @@ -// kelondroRowBufferedSet.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 21.06.2006 on http://www.anomic.de -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro; - -import java.util.Date; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.TreeMap; - -import de.anomic.server.serverMemory; - -public class kelondroRowBufferedSet implements kelondroIndex { - - private static final long memBlockLimit = 2000000; // do not fill cache further if the amount of available memory is less that this - private static final int bufferFlushLimit = 10000; - private static final int bufferFlushMinimum = 1000; - private kelondroProfile profile; - private TreeMap buffer; - private kelondroRowSet store; - - public kelondroRowBufferedSet(kelondroRow rowdef, kelondroOrder objectOrder, int orderColumn, int objectCount) { - store = new kelondroRowSet(rowdef, objectCount); - assert (objectOrder != null); - store.setOrdering(objectOrder, orderColumn); - buffer = new TreeMap(objectOrder); - profile = new kelondroProfile(); - } - - private final void flush() { - // call only in synchronized environment - Iterator i = buffer.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - store.add((kelondroRow.Entry) entry.getValue()); - } - buffer.clear(); - } - - public synchronized final void trim() { - flush(); - store.trim(); - } - - public synchronized void removeOne() { - if (buffer.size() == 0) { - store.removeOne(); - } else try { - // buffer.remove(buffer.keySet().iterator().next()); - buffer.remove(buffer.lastKey()); - } catch (NoSuchElementException e) {} - } - - public synchronized void clear() { - store.clear(); - buffer.clear(); - } - - public synchronized int size() { - return buffer.size() + store.size(); - } - - public synchronized Iterator rows() { - flush(); - return store.rows(); - } - - public synchronized void uniq() { - flush(); - store.uniq(); - } - - public synchronized String toString() { - flush(); - return store.toString(); - } - - public synchronized kelondroRow.Entry get(byte[] key) { - long handle = profile.startRead(); - kelondroRow.Entry entry = null; - entry = (kelondroRow.Entry) buffer.get(key); - if (entry == null) entry = store.get(key); - profile.stopRead(handle); - return entry; - } - - public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) { - return put(row); - } - - public synchronized kelondroRow.Entry put(kelondroRow.Entry newentry) { - long handle = profile.startWrite(); - byte[] key = newentry.getColBytes(store.sortColumn); - kelondroRow.Entry oldentry = null; - oldentry = (kelondroRow.Entry) buffer.get(key); - if (oldentry == null) { - // try the collection - oldentry = store.get(key); - if (oldentry == null) { - // this was not anywhere - buffer.put(key, newentry); - if (((buffer.size() > bufferFlushMinimum) && (serverMemory.available() > memBlockLimit)) - || (buffer.size() > bufferFlushLimit)) - flush(); - } else { - // replace old entry - store.put(newentry); - } - } else { - // the entry is already in buffer - // simply replace old entry - buffer.put(key, newentry); - } - profile.stopWrite(handle); - return oldentry; - } - - public synchronized kelondroRow.Entry remove(byte[] key) { - long handle = profile.startDelete(); - kelondroRow.Entry oldentry = null; - oldentry = (kelondroRow.Entry) buffer.remove(key); - if (oldentry == null) { - // try the collection - return store.remove(key); - } - profile.stopDelete(handle); - return oldentry; - } - - public synchronized void removeMarkedAll(kelondroRowCollection c) { - long handle = profile.startDelete(); - flush(); - store.removeMarkedAll(c); - profile.stopDelete(handle); - } - - public kelondroProfile profile() { - return store.profile(); - } - - public synchronized void close() { - flush(); - store.close(); - } - - public kelondroOrder order() { - return store.order(); - } - - public kelondroRow row() { - return store.row(); - } - - public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) { - flush(); - return store.rows(up, rotating, firstKey); - } -} diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 2ab15cfa3..94b4df676 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -245,11 +245,14 @@ public class kelondroRowCollection { this.lastTimeWrote = System.currentTimeMillis(); } - public void removeOne() { - if (chunkcount == 0) return; - if (chunkcount == sortBound) sortBound--; - chunkcount--; - this.lastTimeWrote = System.currentTimeMillis(); + public kelondroRow.Entry removeOne() { + synchronized (chunkcache) { + if (chunkcount == 0) return null; + if (chunkcount == sortBound) sortBound--; + chunkcount--; + this.lastTimeWrote = System.currentTimeMillis(); + return get(chunkcount); + } } public void clear() { diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 6d3892416..93f746daa 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -62,6 +62,12 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd this.profile = new kelondroProfile(); } + public kelondroRowSet(kelondroRow rowdef, kelondroOrder objectOrder, int orderColumn, int objectCount) { + this(rowdef, objectCount); + assert (objectOrder != null); + setOrdering(objectOrder, orderColumn); + } + public kelondroRow.Entry get(byte[] key) { return get(key, 0, key.length); } @@ -201,7 +207,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd return this.sortOrder; } - public int orderColumn() { + public int primarykey() { return this.sortColumn; } diff --git a/source/de/anomic/kelondro/kelondroSplittedTree.java b/source/de/anomic/kelondro/kelondroSplittedTree.java index 9765d9992..06b5e3673 100644 --- a/source/de/anomic/kelondro/kelondroSplittedTree.java +++ b/source/de/anomic/kelondro/kelondroSplittedTree.java @@ -121,6 +121,23 @@ public class kelondroSplittedTree implements kelondroIndex { public kelondroRow.Entry remove(byte[] key) throws IOException { return ktfs[partition(key)].remove(key); } + + public kelondroRow.Entry removeOne() throws IOException { + // removes one entry from the partition with the most entries + int maxc = -1, maxi = 0; + for (int i = 0; i < ktfs.length; i++) { + if (ktfs[i].size() > maxc) { + maxc = ktfs[i].size(); + maxi = i; + } + } + if (maxc > 0) { + return ktfs[maxi].removeOne(); + } else { + return null; + } + } + public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { return new ktfsIterator(up, rotating, firstKey); } @@ -203,5 +220,14 @@ public class kelondroSplittedTree implements kelondroIndex { return this.order; } + public int primarykey() { + return 0; + } + public kelondroProfile profile() { + kelondroProfile[] profiles = new kelondroProfile[ktfs.length]; + for (int i = 0; i < ktfs.length; i++) profiles[i] = ktfs[i].profile(); + return kelondroProfile.consolidate(profiles); + } + } diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index a674df9d7..0216136b9 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -219,11 +219,14 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { return oo; } - public kelondroOrder getOrder() { - // returns the order of this tree + public kelondroOrder order() { return this.objectOrder; } + public int primarykey() { + return 0; + } + public void clear() throws IOException { super.clear(); setHandle(root, null); // reset the root value @@ -695,6 +698,16 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { } } + public kelondroRow.Entry removeOne() throws IOException { + // removes just any entry and removes that entry + synchronized(writeSearchObj) { + Node theOne = lastNode(); + kelondroRow.Entry values = row().newEntry(theOne.getValueRow()); + remove(theOne, null); + return values; + } + } + public synchronized void removeAll() throws IOException { while (size() > 0) remove(lastNode(), null); } @@ -1371,10 +1384,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { } } - public kelondroOrder order() { - return this.objectOrder; - } - public static void main(String[] args) { //cmd(args); //iterationtest(); diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java index 1f2fe5288..fa8fccde6 100644 --- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java @@ -160,7 +160,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { if (urlEntry != null) { /* write it into the home url db */ - this.homeUrlDB.store(urlEntry, false); + this.homeUrlDB.store(urlEntry); importedUrlBuffer.add(urlHash); this.urlCounter++; diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index e05edfcb3..8ba925c57 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -66,8 +66,7 @@ import de.anomic.http.httpc; import de.anomic.http.httpc.response; import de.anomic.index.indexEntry; import de.anomic.index.indexURL; -import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.kelondro.kelondroRAMIndex; +import de.anomic.kelondro.kelondroBufferedIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.net.URL; @@ -98,8 +97,7 @@ public final class plasmaCrawlLURL extends indexURL { cacheFile.getParentFile().mkdirs(); try { - urlIndexFile = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef); - urlIndexCache = new kelondroRAMIndex(kelondroNaturalOrder.naturalOrder, plasmaCrawlLURLOldEntry.rowdef); + urlIndexFile = new kelondroBufferedIndex(new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef)); } catch (IOException e) { e.printStackTrace(); System.exit(-1); @@ -139,6 +137,16 @@ public final class plasmaCrawlLURL extends indexURL { gcrawlResultStack.add(urlHash + initiatorHash + executorHash); } + public void flushCacheSome() { + try { + ((kelondroBufferedIndex) urlIndexFile).flushSome(); + } catch (IOException e) {} + } + + public int writeCacheSize() { + return ((kelondroBufferedIndex) urlIndexFile).writeBufferSize(); + } + public plasmaCrawlLURLEntry load(String urlHash, indexEntry searchedWord) { // generates an plasmaLURLEntry using the url hash // to speed up the access, the url-hashes are buffered @@ -147,9 +155,8 @@ public final class plasmaCrawlLURL extends indexURL { // - look into the hash cache // - look into the filed properties // if the url cannot be found, this returns null - kelondroRow.Entry entry = urlIndexCache.get(urlHash.getBytes()); try { - if (entry == null) entry = urlIndexFile.get(urlHash.getBytes()); + kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes()); if (entry == null) return null; return new plasmaCrawlLURLOldEntry(entry, searchedWord); } catch (IOException e) { @@ -157,7 +164,7 @@ public final class plasmaCrawlLURL extends indexURL { } } - public void store(plasmaCrawlLURLEntry entry, boolean cached) throws IOException { + public void store(plasmaCrawlLURLEntry entry) throws IOException { // Check if there is a more recent Entry already in the DB plasmaCrawlLURLEntry oldEntry; try { @@ -177,13 +184,7 @@ public final class plasmaCrawlLURL extends indexURL { return; // this did not need to be stored, but is updated } - if ((cached) && (urlIndexCache != null)) { - synchronized (urlIndexCache) { - urlIndexCache.put(entry.toRowEntry()); - } - } else { - urlIndexFile.put(entry.toRowEntry(), entry.loaddate()); - } + urlIndexFile.put(entry.toRowEntry(), entry.loaddate()); } public synchronized plasmaCrawlLURLEntry newEntry(String propStr, boolean setGlobal) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 4f1e77eab..e72db9278 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1574,7 +1574,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser /* ======================================================================== * STORE URL TO LOADED-URL-DB * ======================================================================== */ - urlPool.loadedURL.store(newEntry, false); + urlPool.loadedURL.store(newEntry); urlPool.loadedURL.stack( newEntry, // loaded url db entry initiatorPeerHash, // initiator peer hash @@ -1966,7 +1966,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if ((lurl != null) && (lurl.length() != 0)) { String propStr = crypt.simpleDecode(lurl, (String) page.get("key")); plasmaCrawlLURLEntry entry = urlPool.loadedURL.newEntry(propStr, true); - urlPool.loadedURL.store(entry, false); + urlPool.loadedURL.store(entry); urlPool.loadedURL.stack(entry, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1); // *** ueberfluessig/doppelt? urlPool.noticeURL.remove(entry.hash()); log.logInfo(STR_REMOTECRAWLTRIGGER + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + urlEntry.url().toString() + "). URL IS CONSIDERED AS 'LOADED!'"); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 61a5009cb..83d5a8190 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -503,7 +503,7 @@ public final class yacyClient { // get one single search result urlEntry = urlManager.newEntry((String) result.get("resource" + n), true); if ((urlEntry == null) || (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, urlEntry.url()))) { continue; } // block with backlist - urlManager.store(urlEntry, true); + urlManager.store(urlEntry); urlManager.stack(urlEntry, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2); // save the url entry diff --git a/source/yacy.java b/source/yacy.java index f937f3002..3acdea737 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -736,7 +736,7 @@ public final class yacy { if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try { plasmaCrawlLURLEntry urlEntry = currentUrlDB.load(urlHash, null); urlCounter++; - minimizedUrlDB.store(urlEntry, false); + minimizedUrlDB.store(urlEntry); if (urlCounter % 500 == 0) { log.logInfo(urlCounter + " URLs found so far."); }