performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7955 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 23e81b28b2
commit 734059d33e

@ -1,4 +1,4 @@
// transferRWI.java // transferRWI.java
// ----------------------- // -----------------------
// part of the AnomicHTTPD caching proxy // part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@yacy.net // (C) by Michael Peter Christen; mc@yacy.net
@ -35,12 +35,13 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import de.anomic.search.Segments; import de.anomic.search.Segments;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants; import de.anomic.search.SwitchboardConstants;
@ -56,7 +57,7 @@ import de.anomic.yacy.dht.FlatWordPartitionScheme;
public final class transferRWI { public final class transferRWI {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
@ -77,7 +78,7 @@ public final class transferRWI {
logWarning(contentType, "missing entryc"); logWarning(contentType, "missing entryc");
return prop; return prop;
} }
// request values // request values
final String iam = post.get("iam", ""); // seed hash of requester final String iam = post.get("iam", ""); // seed hash of requester
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
@ -89,13 +90,13 @@ public final class transferRWI {
final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false); final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false);
final long cachelimit = sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000); final long cachelimit = sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000);
final yacySeed otherPeer = sb.peers.get(iam); final yacySeed otherPeer = sb.peers.get(iam);
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion())); final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
// response values // response values
int pause = 0; int pause = 0;
String result = "ok"; String result = "ok";
final StringBuilder unknownURLs = new StringBuilder(6000); final StringBuilder unknownURLs = new StringBuilder(6000);
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) { if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target"; result = "wrong_target";
@ -134,7 +135,7 @@ public final class transferRWI {
// decode request // decode request
System.out.println("STRINGS " + UTF8.String(indexes)); System.out.println("STRINGS " + UTF8.String(indexes));
Iterator<String> it = FileUtils.strings(indexes); final Iterator<String> it = FileUtils.strings(indexes);
// free memory // free memory
indexes = null; indexes = null;
@ -151,10 +152,11 @@ public final class transferRWI {
int received = 0; int received = 0;
int blocked = 0; int blocked = 0;
int receivedURL = 0; int receivedURL = 0;
final IndexCell<WordReference> cell = sb.indexSegments.termIndex(Segments.Process.DHTIN);
while (it.hasNext()) { while (it.hasNext()) {
serverCore.checkInterruption(); serverCore.checkInterruption();
estring = it.next(); estring = it.next();
// check if RWI entry is well-formed // check if RWI entry is well-formed
p = estring.indexOf('{'); p = estring.indexOf('{');
if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) { if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) {
@ -165,14 +167,14 @@ public final class transferRWI {
wordhashes.add(wordHash); wordhashes.add(wordHash);
iEntry = new WordReferenceRow(estring.substring(p)); iEntry = new WordReferenceRow(estring.substring(p));
urlHash = iEntry.urlhash(); urlHash = iEntry.urlhash();
// block blacklisted entries // block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) { if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
blocked++; blocked++;
continue; continue;
} }
// check if the entry is in our network domain // check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash); final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash);
if (urlRejectReason != null) { if (urlRejectReason != null) {
@ -181,11 +183,11 @@ public final class transferRWI {
blocked++; blocked++;
continue; continue;
} }
// learn entry // learn entry
try { try {
sb.indexSegments.termIndex(Segments.Process.DHTIN).add(wordHash.getBytes(), iEntry); cell.add(wordHash.getBytes(), iEntry);
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
serverCore.checkInterruption(); serverCore.checkInterruption();
@ -208,7 +210,7 @@ public final class transferRWI {
sb.peers.mySeed().incRI(received); sb.peers.mySeed().incRI(received);
// finally compose the unknownURL hash list // finally compose the unknownURL hash list
Iterator<byte[]> bit = unknownURL.iterator(); final Iterator<byte[]> bit = unknownURL.iterator();
unknownURLs.ensureCapacity(unknownURL.size() * 25); unknownURLs.ensureCapacity(unknownURL.size() * 25);
while (bit.hasNext()) { while (bit.hasNext()) {
unknownURLs.append(",").append(UTF8.String(bit.next())); unknownURLs.append(",").append(UTF8.String(bit.next()));
@ -217,14 +219,14 @@ public final class transferRWI {
if (wordhashes.isEmpty() || received == 0) { if (wordhashes.isEmpty() || received == 0) {
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs"); sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs");
} else { } else {
String firstHash = wordhashes.get(0); final String firstHash = wordhashes.get(0);
String lastHash = wordhashes.get(wordhashes.size() - 1); final String lastHash = wordhashes.get(wordhashes.size() - 1);
final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2; final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2;
sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName); sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName);
yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash)); yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash));
} }
result = "ok"; result = "ok";
pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
} }

@ -62,10 +62,12 @@ public class serverAccessTracker {
/* /*
* remove all entries from the access tracker where the age of the last access is greater than the given timeout * remove all entries from the access tracker where the age of the last access is greater than the given timeout
*/ */
private synchronized void cleanupAccessTracker() { private void cleanupAccessTracker() {
if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues synchronized (this) {
this.lastCleanup = System.currentTimeMillis(); if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues
this.lastCleanup = System.currentTimeMillis();
}
// clear entries which had no entry for the maxTrackingTime time // clear entries which had no entry for the maxTrackingTime time
final Iterator<Map.Entry<String, ConcurrentLinkedQueue<Track>>> i = this.accessTracker.entrySet().iterator(); final Iterator<Map.Entry<String, ConcurrentLinkedQueue<Track>>> i = this.accessTracker.entrySet().iterator();

@ -175,7 +175,7 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
/** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ...*/ /** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ...*/
private final ConcurrentMap<String, String> dna; private final ConcurrentMap<String, String> dna;
private String alternativeIP = null; private String alternativeIP = null;
private final long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations. private long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations.
// use our own formatter to prevent concurrency locks with other processes // use our own formatter to prevent concurrency locks with other processes
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second);
@ -188,13 +188,7 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
final String flags = this.dna.get(yacySeed.FLAGS); final String flags = this.dna.get(yacySeed.FLAGS);
if ((flags == null) || (flags.length() != 4)) { this.dna.put(yacySeed.FLAGS, yacySeed.FLAGSZERO); } if ((flags == null) || (flags.length() != 4)) { this.dna.put(yacySeed.FLAGS, yacySeed.FLAGSZERO); }
this.dna.put(yacySeed.NAME, checkPeerName(get(yacySeed.NAME, "&empty;"))); this.dna.put(yacySeed.NAME, checkPeerName(get(yacySeed.NAME, "&empty;")));
long b; this.birthdate = -1; // this means 'not yet parsed', parse that later when it is used
try {
b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime();
} catch (final ParseException e) {
b = System.currentTimeMillis();
}
this.birthdate = b;
} }
private yacySeed(final String theHash) { private yacySeed(final String theHash) {
@ -564,9 +558,21 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
return d > milliseconds; return d > milliseconds;
} }
public final long getBirthdate() {
if (this.birthdate > 0) return this.birthdate;
long b;
try {
b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime();
} catch (final ParseException e) {
b = System.currentTimeMillis();
}
this.birthdate = b;
return this.birthdate;
}
/** @return the age of the seed in number of days */ /** @return the age of the seed in number of days */
public final int getAge() { public final int getAge() {
return (int) Math.abs((System.currentTimeMillis() - this.birthdate) / 1000 / 60 / 60 / 24); return (int) Math.abs((System.currentTimeMillis() - getBirthdate()) / 1000 / 60 / 60 / 24);
} }
public void setPeerTags(final Set<String> keys) { public void setPeerTags(final Set<String> keys) {

File diff suppressed because it is too large Load Diff

@ -28,6 +28,8 @@ package net.yacy.kelondro.data.word;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Column;
@ -204,6 +206,67 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_posofphrase, word.numOfPhrase); this.entry.setCol(col_posofphrase, word.numOfPhrase);
} }
public static class ExternalParser {
private static final String PIN = "_";
private final BlockingQueue<String> in;
private final BlockingQueue<WordReferenceRow> out;
Thread[] worker;
public ExternalParser(final int concurrency) {
this.in = new LinkedBlockingQueue<String>();
this.out = new LinkedBlockingQueue<WordReferenceRow>();
for (int i = 0; i < concurrency; i++) {
this.worker[i] = new Thread() {
public void run() {
String s;
try {
while ((s = ExternalParser.this.in.take()) != PIN) {
ExternalParser.this.out.put(new WordReferenceRow(s));
}
} catch (final InterruptedException e) {
}
}
};
this.worker[i].start();
}
}
public ExternalParser() {
this(Runtime.getRuntime().availableProcessors());
}
public void put(final String s) {
try {
this.in.put(s);
} catch (final InterruptedException e) {
}
}
public void terminate() {
for (@SuppressWarnings("unused") final Thread w : this.worker) {
try {
this.in.put(PIN);
} catch (final InterruptedException e) {
}
}
for (final Thread w : this.worker) {
try {
if (w.isAlive()) w.join();
} catch (final InterruptedException e) {
}
}
try {
this.out.put(poison);
} catch (final InterruptedException e) {
}
}
public WordReferenceRow take() {
WordReferenceRow row;
try {
row = this.out.take();
} catch (final InterruptedException e) {
return poison;
}
return row;
}
}
public WordReferenceRow(final String external) { public WordReferenceRow(final String external) {
this.entry = urlEntryRow.newEntry(external, true); this.entry = urlEntryRow.newEntry(external, true);
} }

@ -63,8 +63,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
public char type; public char type;
public int hitcount, llocal, lother, phrasesintext, public int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase, posinphrase, posofphrase,
urlcomps, urllength, virtualAge, urlcomps, urllength,
wordsintext, wordsintitle; wordsintext, wordsintitle;
private int virtualAge;
private final ConcurrentLinkedQueue<Integer> positions; private final ConcurrentLinkedQueue<Integer> positions;
public double termFrequency; public double termFrequency;
@ -89,7 +90,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final double termfrequency final double termfrequency
) { ) {
if (language == null || language.length != 2) language = default_language; if (language == null || language.length != 2) language = default_language;
final int mddlm = MicroDate.microDateDays(lastmodified);
//final int mddct = MicroDate.microDateDays(updatetime); //final int mddct = MicroDate.microDateDays(updatetime);
this.flags = flags; this.flags = flags;
//this.freshUntil = Math.max(0, mddlm + (mddct - mddlm) * 2); //this.freshUntil = Math.max(0, mddlm + (mddct - mddlm) * 2);
@ -107,7 +107,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.posofphrase = posofphrase; this.posofphrase = posofphrase;
this.urlcomps = urlComps; this.urlcomps = urlComps;
this.urllength = urlLength; this.urllength = urlLength;
this.virtualAge = mddlm; this.virtualAge = -1; // compute that later
this.wordsintext = wordcount; this.wordsintext = wordcount;
this.wordsintitle = titleLength; this.wordsintitle = titleLength;
this.termFrequency = termfrequency; this.termFrequency = termfrequency;
@ -288,6 +288,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
} }
public int virtualAge() { public int virtualAge() {
if (this.virtualAge > 0) return this.virtualAge;
this.virtualAge = MicroDate.microDateDays(this.lastModified);
return this.virtualAge; return this.virtualAge;
} }
@ -312,7 +314,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.hitcount > (v = other.hitcount)) this.hitcount = v; if (this.hitcount > (v = other.hitcount)) this.hitcount = v;
if (this.llocal > (v = other.llocal)) this.llocal = v; if (this.llocal > (v = other.llocal)) this.llocal = v;
if (this.lother > (v = other.lother)) this.lother = v; if (this.lother > (v = other.lother)) this.lother = v;
if (this.virtualAge > (v = other.virtualAge)) this.virtualAge = v; if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v; if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v; if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, min(this.positions, other.positions)); if (other.positions != null) a(this.positions, min(this.positions, other.positions));
@ -334,7 +336,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.hitcount < (v = other.hitcount)) this.hitcount = v; if (this.hitcount < (v = other.hitcount)) this.hitcount = v;
if (this.llocal < (v = other.llocal)) this.llocal = v; if (this.llocal < (v = other.llocal)) this.llocal = v;
if (this.lother < (v = other.lother)) this.lother = v; if (this.lother < (v = other.lother)) this.lother = v;
if (this.virtualAge < (v = other.virtualAge)) this.virtualAge = v; if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v; if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v; if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, max(this.positions, other.positions)); if (other.positions != null) a(this.positions, max(this.positions, other.positions));

Loading…
Cancel
Save