performance hacks

pull/1/head
Michael Peter Christen 13 years ago
parent 2fe207f813
commit a1fe65b115

@ -364,7 +364,7 @@ public class yacysearch {
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
indexSegment.urlMetadata().clearCache();
SearchEventCache.cleanupEvents(true);
SearchEventCache.cleanupEvents(false);
}
final RankingProfile ranking = sb.getRanking();

@ -24,8 +24,6 @@
package de.anomic.tools;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.Random;
import net.yacy.kelondro.logging.Log;
@ -57,7 +55,6 @@ public class crypt {
public static final String vDATE = "20030925";
public static final String copyright = "[ 'crypt' v" + vDATE + " by Michael Christen / www.anomic.de ]";
public static final String magicString = "crypt|anomic.de|0"; // magic identifier inside every '.crypt' - file
public static final SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.ENGLISH);
String cryptMethod; // one of ["TripleDES", "Blowfish", "DESede", "DES"]
//private static final String defaultMethod = "PBEWithMD5AndDES"; //"DES";

@ -94,6 +94,7 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
return c;
}
@Override
public synchronized TreeSet<ReferenceContainer<ReferenceType>> referenceContainer(final byte[] startHash, final boolean rot, final boolean excludePrivate, int count) throws IOException {
// creates a set of indexContainers
// this does not use the cache
@ -212,7 +213,6 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
final HandleSet urlselection,
final ReferenceFactory<ReferenceType> termFactory,
final int maxDistance) throws RowSpaceExceededException {
return new TermSearch<ReferenceType>(this, queryHashes, excludeHashes, urlselection, termFactory, maxDistance);
}

@ -105,12 +105,20 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
public ReferenceContainer<ReferenceType> get(byte[] termHash, HandleSet referenceselection) throws IOException;
/**
* delete all references for a word
* remove all references for a word
* @param termHash
* @return the deleted references
* @throws IOException
*/
public ReferenceContainer<ReferenceType> delete(byte[] termHash) throws IOException;
public ReferenceContainer<ReferenceType> remove(byte[] termHash) throws IOException;
/**
* delete all references for a word
* the difference to 'remove' is, that the removed element is not returned
* @param termHash
* @throws IOException
*/
public void delete(byte[] termHash) throws IOException;
/**
* remove a specific reference entry

@ -343,7 +343,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
* @throws IOException
*/
@Override
public ReferenceContainer<ReferenceType> delete(final byte[] termHash) throws IOException {
public ReferenceContainer<ReferenceType> remove(final byte[] termHash) throws IOException {
removeDelayed();
ReferenceContainer<ReferenceType> c1 = null;
try {
@ -354,7 +354,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
if (c1 != null) {
this.array.delete(termHash);
}
final ReferenceContainer<ReferenceType> c0 = this.ram.delete(termHash);
final ReferenceContainer<ReferenceType> c0 = this.ram.remove(termHash);
if (c1 == null) return c0;
if (c0 == null) return c1;
try {
@ -370,6 +370,22 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
}
@Override
public void delete(final byte[] termHash) throws IOException {
removeDelayed();
ReferenceContainer<ReferenceType> c1 = null;
try {
c1 = this.array.get(termHash);
} catch (final RowSpaceExceededException e2) {
Log.logException(e2);
}
if (c1 != null) {
this.array.delete(termHash);
}
this.ram.delete(termHash);
return;
}
@Override
public void removeDelayed(final byte[] termHash, final HandleSet urlHashes) {
HandleSet r;

@ -462,17 +462,26 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
* @return the indexContainer if the cache contained the container, null otherwise
*/
@Override
public ReferenceContainer<ReferenceType> delete(final byte[] termHash) {
public ReferenceContainer<ReferenceType> remove(final byte[] termHash) {
// returns the index that had been deleted
assert this.cache != null;
if (this.cache == null) return null;
return this.cache.remove(new ByteArray(termHash));
}
@Override
public void delete(final byte[] termHash) {
// returns the index that had been deleted
assert this.cache != null;
if (this.cache == null) return;
this.cache.remove(new ByteArray(termHash));
}
@Override
public void removeDelayed(final byte[] termHash, final byte[] urlHashBytes) {
remove(termHash, urlHashBytes);
}
@Override
public boolean remove(final byte[] termHash, final byte[] urlHashBytes) {
assert this.cache != null;

@ -209,7 +209,7 @@ public class Dispatcher {
// but to avoid race conditions return the results from the deletes
rc = new ArrayList<ReferenceContainer<WordReference>>(containers.size());
for (final ReferenceContainer<WordReference> c: containers) {
container = this.segment.termIndex().delete(c.getTermHash()); // be aware this might be null!
container = this.segment.termIndex().remove(c.getTermHash()); // be aware this might be null!
if (container != null && !container.isEmpty()) {
if (this.log.isFine()) this.log.logFine("selected " + container.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'");
rc.add(container);

@ -1914,7 +1914,7 @@ public final class Switchboard extends serverSwitch
for ( final Segment indexSegment : this.indexSegments ) {
indexSegment.urlMetadata().clearCache();
}
SearchEventCache.cleanupEvents(true);
SearchEventCache.cleanupEvents(false);
this.trail.clear();
}

@ -481,7 +481,7 @@ public final class QueryParams {
return matcher;
}
private String idCacheAnon = null, idCache = null;
private volatile String idCacheAnon = null, idCache = null;
final static private char asterisk = '*';
public String id(final boolean anonymized) {
if (anonymized) {
@ -489,9 +489,15 @@ public final class QueryParams {
} else {
if (this.idCache != null) return this.idCache;
}
synchronized (this) {
// do a Double-Checked Locking
if (anonymized) {
if (this.idCacheAnon != null) return this.idCacheAnon;
} else {
if (this.idCache != null) return this.idCache;
}
// generate a string that identifies a search so results can be re-used in a cache
final StringBuilder context = new StringBuilder(120);
final StringBuilder context = new StringBuilder(180);
if (anonymized) {
context.append(anonymizedQueryHashes(this.queryHashes));
context.append('-');
@ -504,32 +510,19 @@ public final class QueryParams {
//context.append(asterisk);
//context.append(this.domType);
context.append(asterisk);
context.append(this.contentdom);
context.append(asterisk);
context.append(this.zonecode);
context.append(asterisk);
context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString())));
context.append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString()));
context.append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString()));
context.append(asterisk);
context.append(this.sitehash);
context.append(asterisk);
context.append(this.siteexcludes);
context.append(asterisk);
context.append(this.authorhash);
context.append(asterisk);
context.append(this.targetlang);
context.append(asterisk);
context.append(this.constraint);
context.append(asterisk);
context.append(this.maxDistance);
context.append(asterisk);
context.append(this.modifier.s);
context.append(asterisk);
context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius);
context.append(asterisk);
context.append(this.contentdom).append(asterisk);
context.append(this.zonecode).append(asterisk);
context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk);
context.append(this.sitehash).append(asterisk);
context.append(this.siteexcludes).append(asterisk);
context.append(this.authorhash).append(asterisk);
context.append(this.targetlang).append(asterisk);
context.append(this.constraint).append(asterisk);
context.append(this.maxDistance).append(asterisk);
context.append(this.modifier.s).append(asterisk);
context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk);
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());
String result = context.toString();
if (anonymized) {
@ -539,6 +532,7 @@ public final class QueryParams {
}
return result;
}
}
/**
* make a query anchor tag

@ -643,10 +643,10 @@ public final class RWIProcess extends Thread
// check geo coordinates
double lat, lon;
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) > 0.0d && (lon = page.lon()) > 0.0d) {
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) {
double latDelta = this.query.lat - lat;
double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta) / 2; // pythagoras
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
if (distance > this.query.radius) {
this.sortout++;
continue;

@ -82,7 +82,7 @@ public final class SearchEvent
RESULTLIST;
}
public static final int max_results_preparation = 3000;
public static final int max_results_preparation = 420000;
// class variables that may be implemented with an abstract class
private long eventTime;
@ -118,7 +118,7 @@ public final class SearchEvent
final int burstMultiwordPercent,
final boolean deleteIfSnippetFail) {
if ( MemoryControl.available() < 1024 * 1024 * 100 ) {
SearchEventCache.cleanupEvents(true);
SearchEventCache.cleanupEvents(false);
}
this.eventTime = System.currentTimeMillis(); // for lifetime check
this.peers = peers;
@ -300,7 +300,7 @@ public final class SearchEvent
// store this search to a cache so it can be re-used
if ( MemoryControl.available() < 1024 * 1024 * 100 ) {
SearchEventCache.cleanupEvents(true);
SearchEventCache.cleanupEvents(false);
}
SearchEventCache.put(this.query.id(false), this);
}

@ -30,28 +30,24 @@ import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import net.yacy.cora.document.Classification;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.SeedDB;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile;
import de.anomic.data.WorkTables;
public class SearchEventCache {
private static ConcurrentMap<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
private volatile static Map<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetimeBigMem = 600000; // the time an event will stay in the cache when available memory is high, 10 Minutes
public static final long eventLifetimeMediumMem = 60000; // the time an event will stay in the cache when available memory is medium, 1 Minute
public static final long eventLifetimeShortMem = 10000; // the time an event will stay in the cache when memory is low, 10 seconds
public static final long memlimitHigh = 600 * 1024 * 1024; // 400 MB
public static final long memlimitMedium = 200 * 1024 * 1024; // 100 MB
public static String lastEventID = "";
public volatile static String lastEventID = "";
public static long cacheInsert = 0, cacheHit = 0, cacheMiss = 0, cacheDelete = 0;
public static int size() {
@ -59,7 +55,7 @@ public class SearchEventCache {
}
public static void put(final String eventID, final SearchEvent event) {
if (MemoryControl.shortStatus()) cleanupEvents(true);
if (MemoryControl.shortStatus()) cleanupEvents(false);
lastEventID = eventID;
final SearchEvent oldEvent = lastEvents.put(eventID, event);
if (oldEvent == null) cacheInsert++;
@ -89,8 +85,6 @@ public class SearchEventCache {
if (event.workerAlive()) {
event.cleanup();
}
}
if (!event.workerAlive()) {
i.remove();
cacheDelete++;
}
@ -98,21 +92,29 @@ public class SearchEventCache {
}
public static SearchEvent getEvent(final String eventID) {
final SearchEvent event = lastEvents.get(eventID);
SearchEvent event = lastEvents.get(eventID);
if (event == null) {
synchronized (lastEvents) {
event = lastEvents.get(eventID);
if (event == null) cacheMiss++; else cacheHit++;
}
cacheMiss++;
} else {
cacheHit++;
}
return event;
}
public static int countAliveThreads() {
int alive = 0;
for (final SearchEvent e: SearchEventCache.lastEvents.values()) {
for (final SearchEvent e: lastEvents.values()) {
if (e.workerAlive()) alive++;
}
return alive;
}
private static SearchEvent dummyEvent = null;
/*
private volatile static SearchEvent dummyEvent = null;
private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) {
Log.logWarning("SearchEventCache", "returning dummy event");
if (dummyEvent != null) return dummyEvent;
@ -120,7 +122,7 @@ public class SearchEventCache {
dummyEvent = new SearchEvent(query, null, workTables, null, false, loader, 0, 0, 0, 0, false);
return dummyEvent;
}
*/
public static SearchEvent getEvent(
final QueryParams query,
final SeedDB peers,
@ -134,13 +136,12 @@ public class SearchEventCache {
final int burstMultiwordPercent) {
final String id = query.id(false);
SearchEvent event = SearchEventCache.lastEvents.get(id);
if (event == null) cacheMiss++; else cacheHit++;
SearchEvent event = getEvent(id);
if (Switchboard.getSwitchboard() != null && !Switchboard.getSwitchboard().crawlQueues.noticeURL.isEmpty() && event != null && System.currentTimeMillis() - event.getEventTime() > 60000) {
// if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl
// to prevent that this happens during a person switches between the different result pages, a re-search happens no more than
// once a minute
SearchEventCache.lastEvents.remove(id);
lastEvents.remove(id);
cacheDelete++;
event = null;
} else {
@ -156,26 +157,28 @@ public class SearchEventCache {
// throttling in case of too many search requests
int waitcount = 0;
/*
throttling : while (true) {
final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024);
final int allowedThreads = (int) Math.max(10, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024);
// make room if there are too many search events (they need a lot of RAM)
if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
if (lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 1: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(false);
} else break throttling;
// if there are still some then delete just all
if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
if (lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 2: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(true);
} else break throttling;
// now there might be still events left that are alive
if (countAliveThreads() < allowedThreads) break throttling;
// finally we just wait some time until we get access
Log.logWarning("SearchEventCache", "throttling phase 3: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
Log.logWarning("SearchEventCache", "throttling phase 3: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
try { Thread.sleep(200); } catch (final InterruptedException e) { }
waitcount++;
if (waitcount >= 100) return getDummyEvent(workTables, loader, query.getSegment());
}
*/
if (waitcount > 0) {
// do not fetch snippets because that is most time-expensive
@ -183,7 +186,7 @@ public class SearchEventCache {
}
// check if there are too many other searches alive now
Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive");
Log.logInfo("SearchEventCache", "getEvent: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive");
// start a new event
final boolean delete = Switchboard.getSwitchboard() == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);

Loading…
Cancel
Save