refactoring of indexing methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2787 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 14490f0a83
commit 77a59a115d

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.481
releaseVersion=0.482
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -369,7 +369,7 @@ public class dir {
phrase.length(), /*size*/
condenser.RESULT_NUMB_WORDS
);
switchboard.urlPool.loadedURL.store(newEntry, false);
switchboard.urlPool.loadedURL.store(newEntry);
switchboard.urlPool.loadedURL.stack(
newEntry,
"____________", /*initiator*/

@ -130,7 +130,7 @@ public final class crawlReceipt {
"\n\tURL properties: "+ propStr);
} else try {
// put new entry into database
switchboard.urlPool.loadedURL.store(entry, false);
switchboard.urlPool.loadedURL.store(entry);
switchboard.urlPool.loadedURL.stack(entry, youare, iam, 1);
// generating url hash

@ -106,7 +106,7 @@ public final class transferURL {
lEntry = null;
blocked++;
} else try {
sb.urlPool.loadedURL.store(lEntry, true);
sb.urlPool.loadedURL.store(lEntry);
sb.urlPool.loadedURL.stack(lEntry, iam, iam, 3);
yacyCore.log.logFine("transferURL: received URL '" + lEntry.url() + "' from peer " + otherPeerName);
received++;

@ -139,14 +139,14 @@ public class dbtest {
try {
final kelondroRow.Entry entryBytes = getTable().get(entry.getKey());
if (entryBytes != null) {
System.out.println("ENTRY=" + entryBytes.getColString(1, null));
//System.out.println("ENTRY=" + entryBytes.getColString(1, null));
final STEntry dbEntry = new STEntry(entryBytes.getColBytes(0), entryBytes.getColBytes(1));
if (!dbEntry.isValid()) {
System.out.println("INVALID: " + dbEntry);
} else {
}/* else {
System.out.println("_VALID_: " + dbEntry);
getTable().remove(entry.getKey());
}
}*/
}
} catch (IOException e) {
System.err.println(e);
@ -572,6 +572,10 @@ final class dbTable implements kelondroIndex {
}
}
public kelondroRow.Entry removeOne() {
return null;
}
public Iterator rows(boolean up, boolean rotating, byte[] startKey) throws IOException {
// Objects are of type byte[][]
return null;
@ -595,6 +599,15 @@ final class dbTable implements kelondroIndex {
public kelondroOrder order() {
return this.order;
}
public int primarykey() {
return 0;
}
public kelondroProfile profile() {
return new kelondroProfile();
}
}

@ -301,7 +301,7 @@ public class indexContainer extends kelondroRowSet {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexContainer conj = new indexContainer(null); // start with empty search result
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.orderColumn() == i2.orderColumn()))) return conj; // ordering must be equal
(i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;

@ -36,8 +36,6 @@ import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRAMIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.server.serverByteBuffer;
@ -412,48 +410,24 @@ public class indexURL {
// the class object
protected kelondroIndex urlIndexFile = null;
protected kelondroRAMIndex urlIndexCache = null;
protected kelondroIndex urlIndexFile = null;
public indexURL() {
urlIndexFile = null;
urlIndexCache = null;
}
public int size() {
try {
return urlIndexFile.size() + ((urlIndexCache == null) ? 0 : urlIndexCache.size());
return urlIndexFile.size() ;
} catch (IOException e) {
return 0;
}
}
public void flushCacheSome() {
if (urlIndexCache == null) return;
if (urlIndexCache.size() == 0) return;
int flush = Math.max(1, urlIndexCache.size() / 10);
while (flush-- > 0) flushCacheOnce();
}
public void flushCacheOnce() {
if (urlIndexCache == null) return;
if (urlIndexCache.size() == 0) return;
synchronized (urlIndexCache) {
Iterator i = urlIndexCache.rows(true, false, null);
if (i.hasNext()) try {
urlIndexFile.put((kelondroRow.Entry) i.next());
i.remove();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public boolean remove(String hash) {
if (hash == null) return false;
try {
urlIndexFile.remove(hash.getBytes());
if (urlIndexCache != null) synchronized (urlIndexCache) {urlIndexCache.remove(hash.getBytes());}
return true;
} catch (IOException e) {
return false;
@ -461,19 +435,10 @@ public class indexURL {
}
public void close() throws IOException {
while ((urlIndexCache != null) && (urlIndexCache.size() > 0)) flushCacheOnce();
if (urlIndexFile != null) {
urlIndexFile.close();
urlIndexFile = null;
}
if (urlIndexCache != null) {
urlIndexCache.close();
urlIndexCache = null;
}
}
public int writeCacheSize() {
return (urlIndexCache == null) ? 0 : urlIndexCache.size();
}
public int cacheNodeChunkSize() {

@ -0,0 +1,197 @@
// kelondroBufferedIndex.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 16.10.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.server.serverMemory;
public class kelondroBufferedIndex implements kelondroIndex {
// this implements a write buffer on index objects
private static final long memBlockLimit = 2000000; // do not fill cache further if the amount of available memory is less that this
private static final int bufferFlushLimit = 10000;
private static final int bufferFlushMinimum = 1000;
private TreeMap buffer;
private kelondroIndex index;
public kelondroBufferedIndex(kelondroIndex theIndex) {
index = theIndex;
buffer = (theIndex.order() == null) ? new TreeMap() : new TreeMap(theIndex.order());
}
public synchronized void flush() throws IOException {
if (buffer.size() == 0) return;
Iterator i = buffer.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
index.put((kelondroRow.Entry) entry.getValue());
}
buffer.clear();
}
public synchronized void flushOnce() throws IOException {
if (buffer.size() == 0) return;
Iterator i = buffer.entrySet().iterator();
Map.Entry entry;
if (i.hasNext()) {
entry = (Map.Entry) i.next();
index.put((kelondroRow.Entry) entry.getValue());
}
}
public void flushSome() throws IOException {
if (buffer.size() == 0) return;
int flush = Math.max(1, buffer.size() / 10);
while (flush-- > 0) flushOnce();
}
public synchronized int size() throws IOException {
return buffer.size() + index.size();
}
public int writeBufferSize() {
return buffer.size();
}
public synchronized String toString() {
try {flush();} catch (IOException e) {}
return index.toString();
}
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
long handle = index.profile().startRead();
kelondroRow.Entry entry = null;
entry = (kelondroRow.Entry) buffer.get(key);
if (entry == null) entry = index.get(key);
index.profile().stopRead(handle);
return entry;
}
public synchronized void add(kelondroRow.Entry newentry) throws IOException {
assert (index instanceof kelondroRowSet);
((kelondroRowSet) index).add(newentry);
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
return put(row);
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry newentry) throws IOException {
long handle = index.profile().startWrite();
byte[] key = newentry.getColBytes(index.primarykey());
kelondroRow.Entry oldentry = null;
oldentry = (kelondroRow.Entry) buffer.get(key);
if (oldentry == null) {
// try the collection
oldentry = index.get(key);
if (oldentry == null) {
// this was not anywhere
buffer.put(key, newentry);
if (((buffer.size() > bufferFlushMinimum) && (serverMemory.available() > memBlockLimit))
|| (buffer.size() > bufferFlushLimit))
flush();
} else {
// replace old entry
index.put(newentry);
}
} else {
// the entry is already in buffer
// simply replace old entry
buffer.put(key, newentry);
}
index.profile().stopWrite(handle);
return oldentry;
}
public synchronized kelondroRow.Entry remove(byte[] key) throws IOException {
long handle = index.profile().startDelete();
kelondroRow.Entry oldentry = null;
oldentry = (kelondroRow.Entry) buffer.remove(key);
if (oldentry == null) {
// try the collection
return index.remove(key);
}
index.profile().stopDelete(handle);
return oldentry;
}
public synchronized kelondroRow.Entry removeOne() throws IOException {
long handle = index.profile().startDelete();
if (buffer.size() > 0) {
byte[] key = (byte[]) buffer.keySet().iterator().next();
kelondroRow.Entry entry = (kelondroRow.Entry) buffer.remove(key);
index.profile().stopDelete(handle);
return entry;
} else {
kelondroRow.Entry entry = index.removeOne();
index.profile().stopDelete(handle);
return entry;
}
}
public kelondroProfile profile() {
return index.profile();
}
public synchronized void close() throws IOException {
flush();
buffer = null;
index.close();
}
public kelondroOrder order() {
return index.order();
}
public int primarykey() {
return index.primarykey();
}
public kelondroRow row() throws IOException {
return index.row();
}
public synchronized Iterator rows() throws IOException {
return rows(true, false, null);
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
flush();
return index.rows(up, rotating, firstKey);
}
public static kelondroBufferedIndex getRAMIndex(kelondroRow rowdef, int initSize) {
return new kelondroBufferedIndex(new kelondroRowSet(rowdef, kelondroNaturalOrder.naturalOrder, 0, initSize));
}
}

@ -61,6 +61,13 @@ public class kelondroBytesIntMap {
return (int) indexentry.getColLong(1);
}
public synchronized int removeonei() throws IOException {
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.removeOne();
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}
public synchronized int size() throws IOException {
return ki.size();
}
@ -76,4 +83,8 @@ public class kelondroBytesIntMap {
return ki.order();
}
public kelondroProfile profile() {
return ki.profile();
}
}

@ -99,7 +99,8 @@ public class kelondroCollectionIndex {
boolean ramIndexGeneration = false;
boolean fileIndexGeneration = !(new File(path, filenameStub + ".index").exists());
if (ramIndexGeneration) index = new kelondroRAMIndex(indexOrder, indexRow());
//if (ramIndexGeneration) index = new kelondroRAMIndex(indexOrder, indexRow());
if (ramIndexGeneration) index = new kelondroBufferedIndex(new kelondroRowSet(indexRow(), indexOrder, 0, 0));
if (fileIndexGeneration) index = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(), indexOrder);
// open array files

@ -93,6 +93,10 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return this.objectOrder;
}
public int primarykey() {
return 0;
}
public synchronized int size() throws IOException {
Iterator i = tables.values().iterator();
int s = 0;
@ -102,6 +106,14 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return s;
}
public synchronized kelondroProfile profile() {
kelondroProfile[] profiles = new kelondroProfile[tables.size()];
Iterator i = tables.values().iterator();
int c = 0;
while (i.hasNext()) profiles[c++] = ((kelondroFlexTable) i.next()).profile();
return kelondroProfile.consolidate(profiles);
}
public kelondroRow row() throws IOException {
return this.rowdef;
}
@ -150,6 +162,24 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return null;
}
public synchronized kelondroRow.Entry removeOne() throws IOException {
Iterator i = tables.values().iterator();
kelondroFlexTable table, maxtable = null;
int maxcount = -1;
while (i.hasNext()) {
table = (kelondroFlexTable) i.next();
if (table.size() > maxcount) {
maxtable = table;
maxcount = table.size();
}
}
if (maxtable == null) {
return null;
} else {
return maxtable.removeOne();
}
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new rowIter();
}

@ -80,7 +80,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException {
kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0);
kelondroBufferedIndex ri = new kelondroBufferedIndex(new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0));
//kelondroRowSet ri = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), 0);
//ri.setOrdering(objectOrder, 0);
Iterator content = super.col[0].contentNodes(-1);
@ -93,7 +93,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
indexentry = ri.row().newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setCol(1, i);
ri.put(indexentry);
ri.add(indexentry);
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
@ -101,7 +101,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
System.out.print(" -ordering- ");
System.out.flush();
ri.trim();
ri.flush();
return ri;
}
@ -144,23 +144,32 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
int i = index.geti(row.getColBytes(0));
if (i < 0) {
index.puti(row.getColBytes(0), super.add(row));
return null;
}
return super.set(i, row);
int i = index.geti(row.getColBytes(0));
if (i < 0) {
index.puti(row.getColBytes(0), super.add(row));
return null;
}
return super.set(i, row);
}
public synchronized kelondroRow.Entry remove(byte[] key) throws IOException {
int i = index.removei(key);
if (i < 0) return null;
kelondroRow.Entry r;
r = super.get(i);
super.remove(i);
return r;
int i = index.removei(key);
if (i < 0) return null;
kelondroRow.Entry r;
r = super.get(i);
super.remove(i);
return r;
}
public synchronized kelondroRow.Entry removeOne() throws IOException {
int i = index.removeonei();
if (i < 0) return null;
kelondroRow.Entry r;
r = super.get(i);
super.remove(i);
return r;
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new rowIterator(up, rotating, firstKey);
}
@ -198,4 +207,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return index.order();
}
public int primarykey() {
return 0;
}
public kelondroProfile profile() {
return index.profile();
}
}

@ -57,12 +57,15 @@ import java.util.Iterator;
public interface kelondroIndex {
public kelondroOrder order();
public int primarykey();
public int size() throws IOException;
public kelondroProfile profile();
public kelondroRow row() throws IOException;
public kelondroRow.Entry get(byte[] key) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
public kelondroRow.Entry remove(byte[] key) throws IOException;
public kelondroRow.Entry removeOne() throws IOException;
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException;
public void close() throws IOException;
}

@ -24,36 +24,95 @@
package de.anomic.kelondro;
import java.io.IOException;
import java.util.Iterator;
//import java.util.Random;
public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public class kelondroIntBytesMap {
private kelondroBufferedIndex index;
public kelondroIntBytesMap(int payloadSize, int initSize) {
super(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), kelondroNaturalOrder.naturalOrder, 0, initSize);
index = kelondroBufferedIndex.getRAMIndex(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize);
}
public int size() {
try {
return index.size();
} catch (IOException e) {
return 0;
}
}
public byte[] getb(int ii) {
kelondroRow.Entry indexentry = super.get(kelondroNaturalOrder.encodeLong((long) ii, 4));
kelondroRow.Entry indexentry;
try {indexentry = index.get(kelondroNaturalOrder.encodeLong((long) ii, 4));} catch (IOException e) {return null;}
if (indexentry == null) return null;
return indexentry.getColBytes(1);
}
public void addb(int ii, byte[] value) {
kelondroRow.Entry newentry;
try {
newentry = index.row().newEntry();
newentry.setCol(0, (long) ii);
newentry.setCol(1, value);
index.add(newentry);
} catch (IOException e) {}
}
public byte[] putb(int ii, byte[] value) {
kelondroRow.Entry newentry = super.row().newEntry();
newentry.setCol(0, (long) ii);
newentry.setCol(1, value);
kelondroRow.Entry oldentry = super.put(newentry);
if (oldentry == null) return null;
return oldentry.getColBytes(1);
kelondroRow.Entry newentry;
try {
newentry = index.row().newEntry();
newentry.setCol(0, (long) ii);
newentry.setCol(1, value);
kelondroRow.Entry oldentry = index.put(newentry);
if (oldentry == null) return null;
return oldentry.getColBytes(1);
} catch (IOException e) {
return null;
}
}
public byte[] removeb(int ii) {
if (size() == 0) {
try {
if (index.size() == 0) return null;
kelondroRow.Entry indexentry = index.remove(kelondroNaturalOrder.encodeLong((long) ii, 4));
if (indexentry == null) return null;
return indexentry.getColBytes(1);
} catch (IOException e) {
return null;
}
kelondroRow.Entry indexentry = super.remove(kelondroNaturalOrder.encodeLong((long) ii, 4));
if (indexentry == null) return null;
return indexentry.getColBytes(1);
}
public byte[] removeoneb() {
try {
if (index.size() == 0) return null;
kelondroRow.Entry indexentry = index.removeOne();
if (indexentry == null) return null;
return indexentry.getColBytes(1);
} catch (IOException e) {
return null;
}
}
public Iterator rows() {
try {
return index.rows();
} catch (IOException e) {
return null;
}
}
public void flush() {
try {index.flush();} catch (IOException e) {}
}
public kelondroProfile profile() {
return index.profile();
}
public static void main(String[] args) {

@ -38,17 +38,23 @@ public class kelondroRAMIndex implements kelondroIndex {
private TreeMap index;
private kelondroOrder order;
private kelondroRow rowdef;
private kelondroProfile profile;
public kelondroRAMIndex(kelondroOrder defaultOrder, kelondroRow rowdef) {
this.index = new TreeMap(defaultOrder);
this.order = defaultOrder;
this.rowdef = rowdef;
this.profile = new kelondroProfile();
}
public kelondroOrder order() {
return this.order;
}
public int primarykey() {
return 0;
}
public synchronized int size() {
return this.index.size();
}
@ -73,6 +79,11 @@ public class kelondroRAMIndex implements kelondroIndex {
return (kelondroRow.Entry) index.remove(key);
}
public synchronized Entry removeOne() {
if (this.index.size() == 0) return null;
return remove((byte[]) index.keySet().iterator().next());
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
return index.values().iterator();
}
@ -81,4 +92,8 @@ public class kelondroRAMIndex implements kelondroIndex {
index = null;
}
public kelondroProfile profile() {
return profile;
}
}

@ -433,10 +433,10 @@ public class kelondroRecords {
Node n;
while ((System.currentTimeMillis() < stop) && (cacheHeaders.size() < cacheSize) && (i.hasNext())) {
n = (Node) i.next();
cacheHeaders.putb(n.handle.index, n.headChunk);
cacheHeaders.addb(n.handle.index, n.headChunk);
count++;
}
cacheHeaders.trim();
cacheHeaders.flush();
logFine("preloaded " + count + " records into cache");
} catch (kelondroException e) {
// the contentNodes iterator had a time-out; we don't do a preload
@ -831,7 +831,7 @@ public class kelondroRecords {
if ((cacheHeaders.size() < cacheSize) && (serverMemory.available() >= memBlock)) return true; // no need to flush cache space
// just delete any of the entries
cacheHeaders.removeOne();
cacheHeaders.removeoneb();
cacheFlush++;
return true;
}
@ -1332,4 +1332,9 @@ public class kelondroRecords {
entryFile.profile()
};
}
public kelondroProfile profile() {
return kelondroProfile.consolidate(profiles());
}
}

@ -1,181 +0,0 @@
// kelondroRowBufferedSet.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 21.06.2006 on http://www.anomic.de
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import de.anomic.server.serverMemory;
public class kelondroRowBufferedSet implements kelondroIndex {
private static final long memBlockLimit = 2000000; // do not fill cache further if the amount of available memory is less that this
private static final int bufferFlushLimit = 10000;
private static final int bufferFlushMinimum = 1000;
private kelondroProfile profile;
private TreeMap buffer;
private kelondroRowSet store;
public kelondroRowBufferedSet(kelondroRow rowdef, kelondroOrder objectOrder, int orderColumn, int objectCount) {
store = new kelondroRowSet(rowdef, objectCount);
assert (objectOrder != null);
store.setOrdering(objectOrder, orderColumn);
buffer = new TreeMap(objectOrder);
profile = new kelondroProfile();
}
private final void flush() {
// call only in synchronized environment
Iterator i = buffer.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
store.add((kelondroRow.Entry) entry.getValue());
}
buffer.clear();
}
public synchronized final void trim() {
flush();
store.trim();
}
public synchronized void removeOne() {
if (buffer.size() == 0) {
store.removeOne();
} else try {
// buffer.remove(buffer.keySet().iterator().next());
buffer.remove(buffer.lastKey());
} catch (NoSuchElementException e) {}
}
public synchronized void clear() {
store.clear();
buffer.clear();
}
public synchronized int size() {
return buffer.size() + store.size();
}
public synchronized Iterator rows() {
flush();
return store.rows();
}
public synchronized void uniq() {
flush();
store.uniq();
}
public synchronized String toString() {
flush();
return store.toString();
}
public synchronized kelondroRow.Entry get(byte[] key) {
long handle = profile.startRead();
kelondroRow.Entry entry = null;
entry = (kelondroRow.Entry) buffer.get(key);
if (entry == null) entry = store.get(key);
profile.stopRead(handle);
return entry;
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) {
return put(row);
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry newentry) {
long handle = profile.startWrite();
byte[] key = newentry.getColBytes(store.sortColumn);
kelondroRow.Entry oldentry = null;
oldentry = (kelondroRow.Entry) buffer.get(key);
if (oldentry == null) {
// try the collection
oldentry = store.get(key);
if (oldentry == null) {
// this was not anywhere
buffer.put(key, newentry);
if (((buffer.size() > bufferFlushMinimum) && (serverMemory.available() > memBlockLimit))
|| (buffer.size() > bufferFlushLimit))
flush();
} else {
// replace old entry
store.put(newentry);
}
} else {
// the entry is already in buffer
// simply replace old entry
buffer.put(key, newentry);
}
profile.stopWrite(handle);
return oldentry;
}
public synchronized kelondroRow.Entry remove(byte[] key) {
long handle = profile.startDelete();
kelondroRow.Entry oldentry = null;
oldentry = (kelondroRow.Entry) buffer.remove(key);
if (oldentry == null) {
// try the collection
return store.remove(key);
}
profile.stopDelete(handle);
return oldentry;
}
public synchronized void removeMarkedAll(kelondroRowCollection c) {
long handle = profile.startDelete();
flush();
store.removeMarkedAll(c);
profile.stopDelete(handle);
}
public kelondroProfile profile() {
return store.profile();
}
public synchronized void close() {
flush();
store.close();
}
public kelondroOrder order() {
return store.order();
}
public kelondroRow row() {
return store.row();
}
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
flush();
return store.rows(up, rotating, firstKey);
}
}

@ -245,11 +245,14 @@ public class kelondroRowCollection {
this.lastTimeWrote = System.currentTimeMillis();
}
public void removeOne() {
if (chunkcount == 0) return;
if (chunkcount == sortBound) sortBound--;
chunkcount--;
this.lastTimeWrote = System.currentTimeMillis();
public kelondroRow.Entry removeOne() {
synchronized (chunkcache) {
if (chunkcount == 0) return null;
if (chunkcount == sortBound) sortBound--;
chunkcount--;
this.lastTimeWrote = System.currentTimeMillis();
return get(chunkcount);
}
}
public void clear() {

@ -62,6 +62,12 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
this.profile = new kelondroProfile();
}
public kelondroRowSet(kelondroRow rowdef, kelondroOrder objectOrder, int orderColumn, int objectCount) {
this(rowdef, objectCount);
assert (objectOrder != null);
setOrdering(objectOrder, orderColumn);
}
public kelondroRow.Entry get(byte[] key) {
return get(key, 0, key.length);
}
@ -201,7 +207,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
return this.sortOrder;
}
public int orderColumn() {
public int primarykey() {
return this.sortColumn;
}

@ -121,6 +121,23 @@ public class kelondroSplittedTree implements kelondroIndex {
public kelondroRow.Entry remove(byte[] key) throws IOException {
return ktfs[partition(key)].remove(key);
}
public kelondroRow.Entry removeOne() throws IOException {
// removes one entry from the partition with the most entries
int maxc = -1, maxi = 0;
for (int i = 0; i < ktfs.length; i++) {
if (ktfs[i].size() > maxc) {
maxc = ktfs[i].size();
maxi = i;
}
}
if (maxc > 0) {
return ktfs[maxi].removeOne();
} else {
return null;
}
}
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new ktfsIterator(up, rotating, firstKey);
}
@ -203,5 +220,14 @@ public class kelondroSplittedTree implements kelondroIndex {
return this.order;
}
public int primarykey() {
return 0;
}
public kelondroProfile profile() {
kelondroProfile[] profiles = new kelondroProfile[ktfs.length];
for (int i = 0; i < ktfs.length; i++) profiles[i] = ktfs[i].profile();
return kelondroProfile.consolidate(profiles);
}
}

@ -219,11 +219,14 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
return oo;
}
public kelondroOrder getOrder() {
// returns the order of this tree
public kelondroOrder order() {
return this.objectOrder;
}
public int primarykey() {
return 0;
}
public void clear() throws IOException {
super.clear();
setHandle(root, null); // reset the root value
@ -695,6 +698,16 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
}
}
public kelondroRow.Entry removeOne() throws IOException {
// removes just any entry and removes that entry
synchronized(writeSearchObj) {
Node theOne = lastNode();
kelondroRow.Entry values = row().newEntry(theOne.getValueRow());
remove(theOne, null);
return values;
}
}
public synchronized void removeAll() throws IOException {
while (size() > 0) remove(lastNode(), null);
}
@ -1371,10 +1384,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
}
}
public kelondroOrder order() {
return this.objectOrder;
}
public static void main(String[] args) {
//cmd(args);
//iterationtest();

@ -160,7 +160,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
if (urlEntry != null) {
/* write it into the home url db */
this.homeUrlDB.store(urlEntry, false);
this.homeUrlDB.store(urlEntry);
importedUrlBuffer.add(urlHash);
this.urlCounter++;

@ -66,8 +66,7 @@ import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
import de.anomic.index.indexEntry;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRAMIndex;
import de.anomic.kelondro.kelondroBufferedIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
@ -98,8 +97,7 @@ public final class plasmaCrawlLURL extends indexURL {
cacheFile.getParentFile().mkdirs();
try {
urlIndexFile = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef);
urlIndexCache = new kelondroRAMIndex(kelondroNaturalOrder.naturalOrder, plasmaCrawlLURLOldEntry.rowdef);
urlIndexFile = new kelondroBufferedIndex(new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef));
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
@ -139,6 +137,16 @@ public final class plasmaCrawlLURL extends indexURL {
gcrawlResultStack.add(urlHash + initiatorHash + executorHash);
}
public void flushCacheSome() {
try {
((kelondroBufferedIndex) urlIndexFile).flushSome();
} catch (IOException e) {}
}
public int writeCacheSize() {
return ((kelondroBufferedIndex) urlIndexFile).writeBufferSize();
}
public plasmaCrawlLURLEntry load(String urlHash, indexEntry searchedWord) {
// generates an plasmaLURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
@ -147,9 +155,8 @@ public final class plasmaCrawlLURL extends indexURL {
// - look into the hash cache
// - look into the filed properties
// if the url cannot be found, this returns null
kelondroRow.Entry entry = urlIndexCache.get(urlHash.getBytes());
try {
if (entry == null) entry = urlIndexFile.get(urlHash.getBytes());
kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes());
if (entry == null) return null;
return new plasmaCrawlLURLOldEntry(entry, searchedWord);
} catch (IOException e) {
@ -157,7 +164,7 @@ public final class plasmaCrawlLURL extends indexURL {
}
}
public void store(plasmaCrawlLURLEntry entry, boolean cached) throws IOException {
public void store(plasmaCrawlLURLEntry entry) throws IOException {
// Check if there is a more recent Entry already in the DB
plasmaCrawlLURLEntry oldEntry;
try {
@ -177,13 +184,7 @@ public final class plasmaCrawlLURL extends indexURL {
return; // this did not need to be stored, but is updated
}
if ((cached) && (urlIndexCache != null)) {
synchronized (urlIndexCache) {
urlIndexCache.put(entry.toRowEntry());
}
} else {
urlIndexFile.put(entry.toRowEntry(), entry.loaddate());
}
urlIndexFile.put(entry.toRowEntry(), entry.loaddate());
}
public synchronized plasmaCrawlLURLEntry newEntry(String propStr, boolean setGlobal) {

@ -1574,7 +1574,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
/* ========================================================================
* STORE URL TO LOADED-URL-DB
* ======================================================================== */
urlPool.loadedURL.store(newEntry, false);
urlPool.loadedURL.store(newEntry);
urlPool.loadedURL.stack(
newEntry, // loaded url db entry
initiatorPeerHash, // initiator peer hash
@ -1966,7 +1966,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((lurl != null) && (lurl.length() != 0)) {
String propStr = crypt.simpleDecode(lurl, (String) page.get("key"));
plasmaCrawlLURLEntry entry = urlPool.loadedURL.newEntry(propStr, true);
urlPool.loadedURL.store(entry, false);
urlPool.loadedURL.store(entry);
urlPool.loadedURL.stack(entry, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1); // *** ueberfluessig/doppelt?
urlPool.noticeURL.remove(entry.hash());
log.logInfo(STR_REMOTECRAWLTRIGGER + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + urlEntry.url().toString() + "). URL IS CONSIDERED AS 'LOADED!'");

@ -503,7 +503,7 @@ public final class yacyClient {
// get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n), true);
if ((urlEntry == null) || (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, urlEntry.url()))) { continue; } // block with backlist
urlManager.store(urlEntry, true);
urlManager.store(urlEntry);
urlManager.stack(urlEntry, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2);
// save the url entry

@ -736,7 +736,7 @@ public final class yacy {
if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
plasmaCrawlLURLEntry urlEntry = currentUrlDB.load(urlHash, null);
urlCounter++;
minimizedUrlDB.store(urlEntry, false);
minimizedUrlDB.store(urlEntry);
if (urlCounter % 500 == 0) {
log.logInfo(urlCounter + " URLs found so far.");
}

Loading…
Cancel
Save