performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7147 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 7ebef56add
commit 39f409a7bb

@ -104,7 +104,7 @@ public class WebStructurePicture_p {
// find start hash
String hash = null;
try {
hash = new String((new DigestURI("http://" + host, null)).hash()).substring(6);
hash = new String((new DigestURI("http://" + host, null)).hash(), 6, 6);
} catch (final MalformedURLException e) {Log.logException(e);}
//assert (sb.webStructure.outgoingReferences(hash) != null);

@ -48,7 +48,7 @@ public class webstructure {
if (about.length() > 6) {
try {
url = new DigestURI(about, null);
about = new String(url.hash()).substring(6);
about = new String(url.hash(), 6, 6);
} catch (MalformedURLException e) {
about = null;
}

@ -207,7 +207,7 @@ public final class RankingProcess extends Thread {
}
// check site constraints
if (query.sitehash != null && !new String(iEntry.metadataHash()).substring(6).equals(query.sitehash)) {
if (query.sitehash != null && !new String(iEntry.metadataHash(), 6, 6).equals(query.sitehash)) {
// filter out all domains that do not match with the site constraint
continue;
}
@ -231,14 +231,11 @@ public final class RankingProcess extends Thread {
// do the ranking
for (WordReferenceVars fEntry: filteredEntries) {
// double-check
if (urlhashes.has(fEntry.metadataHash())) continue;
// insert
stack.put(new ReverseElement<WordReferenceVars>(fEntry, this.order.cardinal(fEntry))); // inserts the element and removed the worst (which is smallest)
try {
urlhashes.put(fEntry.metadataHash());
// insert with double-check
try {
if (!urlhashes.put(fEntry.metadataHash())) {
stack.put(new ReverseElement<WordReferenceVars>(fEntry, this.order.cardinal(fEntry))); // inserts the element and removes the worst (which is smallest)
}
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
@ -299,7 +296,7 @@ public final class RankingProcess extends Thread {
if (!skipDoubleDom) return rwi;
// check doubledom
final String domhash = new String(rwi.getElement().metadataHash()).substring(6);
final String domhash = new String(rwi.getElement().metadataHash(), 6, 6);
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
@ -398,7 +395,7 @@ public final class RankingProcess extends Thread {
// in case that we do not have e catchall filter for urls
// we must also construct the domain navigator here
this.hostNavigator.inc(new String(urlhash).substring(6), new String(urlhash));
this.hostNavigator.inc(new String(urlhash, 6, 6), new String(urlhash));
}
// check for more errors

@ -48,11 +48,11 @@ public class ReferenceOrder {
private static int cores = Runtime.getRuntime().availableProcessors();
protected int maxdomcount;
protected WordReferenceVars min, max;
protected final ScoreCluster<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking;
private final String language;
private int maxdomcount;
private WordReferenceVars min, max;
private final ScoreCluster<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking;
private final String language;
public ReferenceOrder(final RankingProfile profile, String language) {
this.min = null;
@ -164,7 +164,7 @@ public class ReferenceOrder {
if (min == null) min = iEntry.clone(); else min.min(iEntry);
if (max == null) max = iEntry.clone(); else max.max(iEntry);
// update domcount
dom = new String(iEntry.metadataHash()).substring(6);
dom = new String(iEntry.metadataHash(), 6, 6);
count = doms0.get(dom);
if (count == null) {
doms0.put(dom, int1);
@ -191,8 +191,8 @@ public class ReferenceOrder {
if (max == null) max = iEntry.clone(); else max.max(iEntry);
// update domcount
String dom = new String(iEntry.metadataHash()).substring(6);
doms.addScore(dom, 1);
String dom = new String(iEntry.metadataHash(), 6, 6);
doms.incScore(dom);
if (!doms.isEmpty()) this.maxdomcount = doms.getMaxScore();
}

@ -161,8 +161,8 @@ public class RCIEvaluation {
dom = i.next();
if (dom.startsWith("www.")) dom = dom.substring(4);
try {
dommap.put(new String((new DigestURI("http://" + dom, null)).hash()).substring(6), dom);
dommap.put(new String((new DigestURI("http://www." + dom, null)).hash()).substring(6), "www." + dom);
dommap.put(new String((new DigestURI("http://" + dom, null)).hash(), 6, 6), dom);
dommap.put(new String((new DigestURI("http://www." + dom, null)).hash(), 6, 6), "www." + dom);
} catch (final MalformedURLException e) {}
}
return dommap;

@ -104,7 +104,7 @@ public class WebStructureGraph {
final StringBuilder cpg = new StringBuilder(12 * (hl.size() + 1) + 1);
assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString();
final StringBuilder cpl = new StringBuilder(12 * (hl.size() + 1) + 1);
final String lhp = new String(url.hash()).substring(6); // local hash part
final String lhp = new String(url.hash(), 6, 6); // local hash part
int GCount = 0;
int LCount = 0;
while (it.hasNext()) {
@ -360,7 +360,7 @@ public class WebStructureGraph {
}
private void learn(final DigestURI url, final StringBuilder reference /*string of b64(12digits)-hashes*/) {
final String domhash = new String(url.hash()).substring(6);
final String domhash = new String(url.hash(), 6, 6);
// parse the new reference string and join it with the stored references
structureEntry structure = outgoingReferences(domhash);

@ -57,7 +57,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
Log.logException(e);
return null;
}
return (url == null) ? null : new String(url.hash()).substring(6);
return (url == null) ? null : new String(url.hash(), 6, 6);
}
public DigestURI(final File file) throws MalformedURLException {

@ -149,10 +149,10 @@ public class BufferedObjectIndex implements Index, Iterable<Row.Entry> {
}
}
public void put(Entry row) throws IOException, RowSpaceExceededException {
public boolean put(Entry row) throws IOException, RowSpaceExceededException {
synchronized (this.backend) {
checkBuffer();
this.buffer.put(row);
return this.buffer.put(row);
}
}

@ -289,7 +289,7 @@ public final class Cache implements Index, Iterable<Row.Entry> {
return entry;
}
public final synchronized void put(final Row.Entry row) throws IOException, RowSpaceExceededException {
public final synchronized boolean put(final Row.Entry row) throws IOException, RowSpaceExceededException {
assert (row != null);
assert (row.columns() == row().columns());
//assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
@ -305,12 +305,13 @@ public final class Cache implements Index, Iterable<Row.Entry> {
}
// write to the back-end
boolean c;
try {
index.put(row);
c = index.put(row);
} catch (RowSpaceExceededException e1) {
// flush all caches to get more memory
clearCache();
index.put(row); // try again
c = index.put(row); // try again
}
if (checkHitSpace()) try {
final Row.Entry dummy = readHitCache.replace(row); // overwrite old entry
@ -318,6 +319,7 @@ public final class Cache implements Index, Iterable<Row.Entry> {
} catch (RowSpaceExceededException e) {
clearCache();
}
return c;
}
public final synchronized Row.Entry replace(final Row.Entry row) throws IOException, RowSpaceExceededException {

@ -143,11 +143,11 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
for (byte[] b: aset) put(b);
}
public final synchronized void put(final byte[] key) throws RowSpaceExceededException {
public final synchronized boolean put(final byte[] key) throws RowSpaceExceededException {
assert (key != null);
final Row.Entry newentry = index.row().newEntry();
newentry.setCol(0, key);
index.put(newentry);
return index.put(newentry);
}
public final synchronized void putUnique(final byte[] key) throws RowSpaceExceededException {

@ -44,7 +44,7 @@ public interface Index extends Iterable<Row.Entry> {
public boolean has(byte[] key); // use this only if there is no get in case that has returns true
public Row.Entry get(byte[] key) throws IOException;
public Row.Entry replace(Row.Entry row) throws RowSpaceExceededException, IOException;
public void put(Row.Entry row) throws IOException, RowSpaceExceededException;
public boolean put(Row.Entry row) throws IOException, RowSpaceExceededException;
public void addUnique(Row.Entry row) throws RowSpaceExceededException, IOException; // no double-check
public ArrayList<RowCollection> removeDoubles() throws IOException, RowSpaceExceededException; // removes all elements that are double (to be used after all addUnique)
public boolean delete(byte[] key) throws IOException;

@ -158,9 +158,9 @@ public final class RAMIndex implements Index, Iterable<Row.Entry> {
return index1.replace(entry);
}
public final synchronized void put(final Row.Entry entry) throws RowSpaceExceededException {
public final synchronized boolean put(final Row.Entry entry) throws RowSpaceExceededException {
assert (entry != null);
if (entry == null) return;
if (entry == null) return false;
finishInitialization();
// if the new entry is within the initialization part, just overwrite it
assert index0.isSorted();
@ -168,9 +168,10 @@ public final class RAMIndex implements Index, Iterable<Row.Entry> {
if (index0.has(key)) {
// replace the entry
index0.put(entry);
return true;
}
// else place it in the index1
index1.put(entry);
return index1.put(entry);
}
public final synchronized void addUnique(final Row.Entry entry) throws RowSpaceExceededException {

@ -172,10 +172,10 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
}
}
public final void put(final Entry row) throws RowSpaceExceededException {
public final boolean put(final Entry row) throws RowSpaceExceededException {
final int i = indexFor(row);
if (i < 0) return;
accessArray(i).put(row);
if (i < 0) return false;
return accessArray(i).put(row);
}
public final boolean delete(final byte[] key) {

@ -113,7 +113,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
return get(index, true);
}
public final synchronized void put(final Row.Entry entry) throws RowSpaceExceededException {
public final synchronized boolean put(final Row.Entry entry) throws RowSpaceExceededException {
assert (entry != null);
assert (entry.getPrimaryKeyBytes() != null);
// when reaching a specific amount of un-sorted entries, re-sort all
@ -124,10 +124,12 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
final int index = find(entry.bytes(), 0);
if (index < 0) {
super.addUnique(entry);
return false;
} else {
final int sb = this.sortBound; // save the sortBound, because it is not altered (we replace at the same place)
set(index, entry); // this may alter the sortBound, which we will revert in the next step
this.sortBound = sb; // revert a sortBound altering
return true;
}
}

@ -524,8 +524,8 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
while (i.hasNext()) {
iEntry = i.next();
if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.metadataHash()) != null)) continue; // do not include urls that are in excludeContainer
dom = new String(iEntry.metadataHash()).substring(6);
mod = new String(iEntry.metadataHash()).substring(0, 6);
dom = new String(iEntry.metadataHash(), 6, 6);
mod = new String(iEntry.metadataHash(), 0, 6);
if ((paths = doms.get(dom)) == null) {
doms.put(dom, mod);
} else {

@ -216,7 +216,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
public void put(final Row.Entry row) throws IOException {
public boolean put(final Row.Entry row) throws IOException {
try {
final String sqlQuery = "INSERT INTO test (" +
"hash, " +
@ -231,7 +231,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
sqlStatement.close();
return;
return false;
} catch (final Exception e) {
throw new IOException(e.getMessage());
}

@ -354,15 +354,15 @@ public class SplitTable implements Index, Iterable<Row.Entry> {
return null;
}
public void put(final Row.Entry row) throws IOException, RowSpaceExceededException {
public boolean put(final Row.Entry row) throws IOException, RowSpaceExceededException {
assert row.objectsize() <= this.rowdef.objectsize;
Index keeper = keeperOf(row.getColBytes(0, true));
if (keeper != null) {keeper.put(row); return;}
if (keeper != null) return keeper.put(row);
synchronized (this.tables) {
assert this.current == null || this.tables.get(this.current) != null : "this.current = " + this.current;
keeper = (this.current == null) ? newTable() : checkTable(this.tables.get(this.current));
}
keeper.put(row);
return keeper.put(row);
}

@ -543,12 +543,12 @@ public class Table implements Index, Iterable<Row.Entry> {
return rowdef.newEntry(b);
}
public synchronized void put(final Entry row) throws IOException, RowSpaceExceededException {
public synchronized boolean put(final Entry row) throws IOException, RowSpaceExceededException {
assert file == null || file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", file = " + this.filename();
assert table == null || table.size() == index.size() : "table.size() = " + table.size() + ", index.size() = " + index.size() + ", file = " + this.filename();
assert row != null;
assert row.bytes() != null;
if (file == null || row == null || row.bytes() == null) return;
if (file == null || row == null || row.bytes() == null) return false;
final int i = (int) index.get(row.getPrimaryKeyBytes());
if (i == -1) {
try {
@ -558,7 +558,7 @@ public class Table implements Index, Iterable<Row.Entry> {
this.table = null;
addUnique(row);
}
return;
return false;
}
if (table == null) {
@ -576,6 +576,7 @@ public class Table implements Index, Iterable<Row.Entry> {
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert table == null || table.size() == index.size() : "table.size() = " + table.size() + ", index.size() = " + index.size();
return true;
}
public Entry put(final Entry row, final Date entryDate) throws IOException, RowSpaceExceededException {

@ -231,38 +231,37 @@ public final class ScoreCluster<E> {
gcount += newScore;
}
public synchronized void addScore(final E obj, final int incrementScore) {
public void addScore(final E obj, final int incrementScore) {
if (obj == null) return;
//System.out.println("setScore " + obj.getClass().getName());
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
// set new value
if (incrementScore < 0) throw new kelondroOutOfLimitsException(incrementScore);
usk = Long.valueOf(scoreKey(encnt++, incrementScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(usk);
synchronized (this) {
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
// get previous handle and score
final long c = usk.longValue();
final int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
final int oldHandle = (int) (c & 0xFFFFFFFFL);
// set new value
final int newValue = oldScore + incrementScore;
if (newValue < 0) throw new kelondroOutOfLimitsException(newValue);
usk = Long.valueOf(scoreKey(oldHandle, newValue)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
}
if (usk == null) {
// set new value
if (incrementScore < 0) throw new kelondroOutOfLimitsException(incrementScore);
usk = Long.valueOf(scoreKey(encnt++, incrementScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(usk);
// get previous handle and score
final long c = usk.longValue();
final int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
final int oldHandle = (int) (c & 0xFFFFFFFFL);
// set new value
final int newValue = oldScore + incrementScore;
if (newValue < 0) throw new kelondroOutOfLimitsException(newValue);
usk = Long.valueOf(scoreKey(oldHandle, newValue)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
}
}
// increase overall counter
gcount += incrementScore;
}

Loading…
Cancel
Save