performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7955 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 23e81b28b2
commit 734059d33e

@ -1,4 +1,4 @@
// transferRWI.java
// transferRWI.java
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@yacy.net
@ -35,12 +35,13 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
@ -56,7 +57,7 @@ import de.anomic.yacy.dht.FlatWordPartitionScheme;
public final class transferRWI {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
@ -77,7 +78,7 @@ public final class transferRWI {
logWarning(contentType, "missing entryc");
return prop;
}
// request values
final String iam = post.get("iam", ""); // seed hash of requester
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
@ -89,13 +90,13 @@ public final class transferRWI {
final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false);
final long cachelimit = sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000);
final yacySeed otherPeer = sb.peers.get(iam);
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
// response values
int pause = 0;
String result = "ok";
final StringBuilder unknownURLs = new StringBuilder(6000);
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target";
@ -134,7 +135,7 @@ public final class transferRWI {
// decode request
System.out.println("STRINGS " + UTF8.String(indexes));
Iterator<String> it = FileUtils.strings(indexes);
final Iterator<String> it = FileUtils.strings(indexes);
// free memory
indexes = null;
@ -151,10 +152,11 @@ public final class transferRWI {
int received = 0;
int blocked = 0;
int receivedURL = 0;
final IndexCell<WordReference> cell = sb.indexSegments.termIndex(Segments.Process.DHTIN);
while (it.hasNext()) {
serverCore.checkInterruption();
estring = it.next();
// check if RWI entry is well-formed
p = estring.indexOf('{');
if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) {
@ -165,14 +167,14 @@ public final class transferRWI {
wordhashes.add(wordHash);
iEntry = new WordReferenceRow(estring.substring(p));
urlHash = iEntry.urlhash();
// block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
blocked++;
continue;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash);
if (urlRejectReason != null) {
@ -181,11 +183,11 @@ public final class transferRWI {
blocked++;
continue;
}
// learn entry
try {
sb.indexSegments.termIndex(Segments.Process.DHTIN).add(wordHash.getBytes(), iEntry);
} catch (Exception e) {
cell.add(wordHash.getBytes(), iEntry);
} catch (final Exception e) {
Log.logException(e);
}
serverCore.checkInterruption();
@ -208,7 +210,7 @@ public final class transferRWI {
sb.peers.mySeed().incRI(received);
// finally compose the unknownURL hash list
Iterator<byte[]> bit = unknownURL.iterator();
final Iterator<byte[]> bit = unknownURL.iterator();
unknownURLs.ensureCapacity(unknownURL.size() * 25);
while (bit.hasNext()) {
unknownURLs.append(",").append(UTF8.String(bit.next()));
@ -217,14 +219,14 @@ public final class transferRWI {
if (wordhashes.isEmpty() || received == 0) {
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs");
} else {
String firstHash = wordhashes.get(0);
String lastHash = wordhashes.get(wordhashes.size() - 1);
final String firstHash = wordhashes.get(0);
final String lastHash = wordhashes.get(wordhashes.size() - 1);
final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2;
sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName);
yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash));
sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName);
yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash));
}
result = "ok";
pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
}

@ -62,10 +62,12 @@ public class serverAccessTracker {
/*
* remove all entries from the access tracker where the age of the last access is greater than the given timeout
*/
private synchronized void cleanupAccessTracker() {
private void cleanupAccessTracker() {
if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues
this.lastCleanup = System.currentTimeMillis();
synchronized (this) {
if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues
this.lastCleanup = System.currentTimeMillis();
}
// clear entries which had no entry for the maxTrackingTime time
final Iterator<Map.Entry<String, ConcurrentLinkedQueue<Track>>> i = this.accessTracker.entrySet().iterator();

@ -175,7 +175,7 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
/** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ...*/
private final ConcurrentMap<String, String> dna;
private String alternativeIP = null;
private final long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations.
private long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations.
// use our own formatter to prevent concurrency locks with other processes
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second);
@ -188,13 +188,7 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
final String flags = this.dna.get(yacySeed.FLAGS);
if ((flags == null) || (flags.length() != 4)) { this.dna.put(yacySeed.FLAGS, yacySeed.FLAGSZERO); }
this.dna.put(yacySeed.NAME, checkPeerName(get(yacySeed.NAME, "&empty;")));
long b;
try {
b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime();
} catch (final ParseException e) {
b = System.currentTimeMillis();
}
this.birthdate = b;
this.birthdate = -1; // this means 'not yet parsed', parse that later when it is used
}
private yacySeed(final String theHash) {
@ -564,9 +558,21 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
return d > milliseconds;
}
public final long getBirthdate() {
if (this.birthdate > 0) return this.birthdate;
long b;
try {
b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime();
} catch (final ParseException e) {
b = System.currentTimeMillis();
}
this.birthdate = b;
return this.birthdate;
}
/** @return the age of the seed in number of days */
public final int getAge() {
return (int) Math.abs((System.currentTimeMillis() - this.birthdate) / 1000 / 60 / 60 / 24);
return (int) Math.abs((System.currentTimeMillis() - getBirthdate()) / 1000 / 60 / 60 / 24);
}
public void setPeerTags(final Set<String> keys) {

File diff suppressed because it is too large Load Diff

@ -28,6 +28,8 @@ package net.yacy.kelondro.data.word;
import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.kelondro.index.Column;
@ -204,6 +206,67 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_posofphrase, word.numOfPhrase);
}
public static class ExternalParser {
private static final String PIN = "_";
private final BlockingQueue<String> in;
private final BlockingQueue<WordReferenceRow> out;
Thread[] worker;
public ExternalParser(final int concurrency) {
this.in = new LinkedBlockingQueue<String>();
this.out = new LinkedBlockingQueue<WordReferenceRow>();
for (int i = 0; i < concurrency; i++) {
this.worker[i] = new Thread() {
public void run() {
String s;
try {
while ((s = ExternalParser.this.in.take()) != PIN) {
ExternalParser.this.out.put(new WordReferenceRow(s));
}
} catch (final InterruptedException e) {
}
}
};
this.worker[i].start();
}
}
public ExternalParser() {
this(Runtime.getRuntime().availableProcessors());
}
public void put(final String s) {
try {
this.in.put(s);
} catch (final InterruptedException e) {
}
}
public void terminate() {
for (@SuppressWarnings("unused") final Thread w : this.worker) {
try {
this.in.put(PIN);
} catch (final InterruptedException e) {
}
}
for (final Thread w : this.worker) {
try {
if (w.isAlive()) w.join();
} catch (final InterruptedException e) {
}
}
try {
this.out.put(poison);
} catch (final InterruptedException e) {
}
}
public WordReferenceRow take() {
WordReferenceRow row;
try {
row = this.out.take();
} catch (final InterruptedException e) {
return poison;
}
return row;
}
}
public WordReferenceRow(final String external) {
this.entry = urlEntryRow.newEntry(external, true);
}

@ -63,8 +63,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
public char type;
public int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase,
urlcomps, urllength, virtualAge,
urlcomps, urllength,
wordsintext, wordsintitle;
private int virtualAge;
private final ConcurrentLinkedQueue<Integer> positions;
public double termFrequency;
@ -89,7 +90,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final double termfrequency
) {
if (language == null || language.length != 2) language = default_language;
final int mddlm = MicroDate.microDateDays(lastmodified);
//final int mddct = MicroDate.microDateDays(updatetime);
this.flags = flags;
//this.freshUntil = Math.max(0, mddlm + (mddct - mddlm) * 2);
@ -107,7 +107,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.posofphrase = posofphrase;
this.urlcomps = urlComps;
this.urllength = urlLength;
this.virtualAge = mddlm;
this.virtualAge = -1; // compute that later
this.wordsintext = wordcount;
this.wordsintitle = titleLength;
this.termFrequency = termfrequency;
@ -288,6 +288,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
}
public int virtualAge() {
if (this.virtualAge > 0) return this.virtualAge;
this.virtualAge = MicroDate.microDateDays(this.lastModified);
return this.virtualAge;
}
@ -312,7 +314,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.hitcount > (v = other.hitcount)) this.hitcount = v;
if (this.llocal > (v = other.llocal)) this.llocal = v;
if (this.lother > (v = other.lother)) this.lother = v;
if (this.virtualAge > (v = other.virtualAge)) this.virtualAge = v;
if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, min(this.positions, other.positions));
@ -334,7 +336,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.hitcount < (v = other.hitcount)) this.hitcount = v;
if (this.llocal < (v = other.llocal)) this.llocal = v;
if (this.lother < (v = other.lother)) this.lother = v;
if (this.virtualAge < (v = other.virtualAge)) this.virtualAge = v;
if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v;
if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v;
if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v;
if (other.positions != null) a(this.positions, max(this.positions, other.positions));

Loading…
Cancel
Save