From a1fe65b1150e2a1d239c6f0b0066b5debab85048 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 5 Jun 2012 12:06:26 +0200 Subject: [PATCH] performance hacks --- htroot/IndexControlRWIs_p.java | 2 +- htroot/yacysearch.java | 2 +- source/de/anomic/tools/crypt.java | 7 +- .../net/yacy/kelondro/rwi/AbstractIndex.java | 2 +- source/net/yacy/kelondro/rwi/Index.java | 12 ++- source/net/yacy/kelondro/rwi/IndexCell.java | 20 +++- .../kelondro/rwi/ReferenceContainerCache.java | 11 ++- source/net/yacy/peers/dht/Dispatcher.java | 2 +- source/net/yacy/search/Switchboard.java | 2 +- source/net/yacy/search/query/QueryParams.java | 92 +++++++++---------- source/net/yacy/search/query/RWIProcess.java | 4 +- source/net/yacy/search/query/SearchEvent.java | 6 +- .../yacy/search/query/SearchEventCache.java | 53 ++++++----- 13 files changed, 121 insertions(+), 94 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 559ea0175..7b1f59658 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -374,7 +374,7 @@ public class IndexControlRWIs_p index = null; } else { prop.put("result", "Peer " + host + " not found"); - } + } } catch ( final IOException e ) { Log.logException(e); } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 17240a992..6c41fef75 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -364,7 +364,7 @@ public class yacysearch { // check available memory and clean up if necessary if ( !MemoryControl.request(8000000L, false) ) { indexSegment.urlMetadata().clearCache(); - SearchEventCache.cleanupEvents(true); + SearchEventCache.cleanupEvents(false); } final RankingProfile ranking = sb.getRanking(); diff --git a/source/de/anomic/tools/crypt.java b/source/de/anomic/tools/crypt.java index 7e884af5e..eb263a57a 100644 --- a/source/de/anomic/tools/crypt.java +++ b/source/de/anomic/tools/crypt.java @@ -1,4 +1,4 @@ -// crypt.java +// crypt.java // ------------------------------------- // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de @@ -24,8 +24,6 @@ package de.anomic.tools; -import java.text.SimpleDateFormat; -import java.util.Locale; import java.util.Random; import net.yacy.kelondro.logging.Log; @@ -57,10 +55,9 @@ public class crypt { public static final String vDATE = "20030925"; public static final String copyright = "[ 'crypt' v" + vDATE + " by Michael Christen / www.anomic.de ]"; public static final String magicString = "crypt|anomic.de|0"; // magic identifier inside every '.crypt' - file - public static final SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.ENGLISH); String cryptMethod; // one of ["TripleDES", "Blowfish", "DESede", "DES"] - //private static final String defaultMethod = "PBEWithMD5AndDES"; //"DES"; + //private static final String defaultMethod = "PBEWithMD5AndDES"; //"DES"; // -------------------------------------------------------- // Section: simple Codings diff --git a/source/net/yacy/kelondro/rwi/AbstractIndex.java b/source/net/yacy/kelondro/rwi/AbstractIndex.java index 8418eab51..f6a9108bb 100644 --- a/source/net/yacy/kelondro/rwi/AbstractIndex.java +++ b/source/net/yacy/kelondro/rwi/AbstractIndex.java @@ -94,6 +94,7 @@ public abstract class AbstractIndex implements return c; } + @Override public synchronized TreeSet> referenceContainer(final byte[] startHash, final boolean rot, final boolean excludePrivate, int count) throws IOException { // creates a set of indexContainers // this does not use the cache @@ -212,7 +213,6 @@ public abstract class AbstractIndex implements final HandleSet urlselection, final ReferenceFactory termFactory, final int maxDistance) throws RowSpaceExceededException { - return new TermSearch(this, queryHashes, excludeHashes, urlselection, termFactory, maxDistance); } diff --git a/source/net/yacy/kelondro/rwi/Index.java b/source/net/yacy/kelondro/rwi/Index.java index edc4e7d64..c307efad7 100644 --- a/source/net/yacy/kelondro/rwi/Index.java +++ b/source/net/yacy/kelondro/rwi/Index.java @@ -105,12 +105,20 @@ public interface Index extends Iterable get(byte[] termHash, HandleSet referenceselection) throws IOException; /** - * delete all references for a word + * remove all references for a word * @param termHash * @return the deleted references * @throws IOException */ - public ReferenceContainer delete(byte[] termHash) throws IOException; + public ReferenceContainer remove(byte[] termHash) throws IOException; + + /** + * delete all references for a word + * the difference to 'remove' is, that the removed element is not returned + * @param termHash + * @throws IOException + */ + public void delete(byte[] termHash) throws IOException; /** * remove a specific reference entry diff --git a/source/net/yacy/kelondro/rwi/IndexCell.java b/source/net/yacy/kelondro/rwi/IndexCell.java index acbbc9775..e29797ec6 100644 --- a/source/net/yacy/kelondro/rwi/IndexCell.java +++ b/source/net/yacy/kelondro/rwi/IndexCell.java @@ -343,7 +343,7 @@ public final class IndexCell extends AbstractBu * @throws IOException */ @Override - public ReferenceContainer delete(final byte[] termHash) throws IOException { + public ReferenceContainer remove(final byte[] termHash) throws IOException { removeDelayed(); ReferenceContainer c1 = null; try { @@ -354,7 +354,7 @@ public final class IndexCell extends AbstractBu if (c1 != null) { this.array.delete(termHash); } - final ReferenceContainer c0 = this.ram.delete(termHash); + final ReferenceContainer c0 = this.ram.remove(termHash); if (c1 == null) return c0; if (c0 == null) return c1; try { @@ -370,6 +370,22 @@ public final class IndexCell extends AbstractBu } } + @Override + public void delete(final byte[] termHash) throws IOException { + removeDelayed(); + ReferenceContainer c1 = null; + try { + c1 = this.array.get(termHash); + } catch (final RowSpaceExceededException e2) { + Log.logException(e2); + } + if (c1 != null) { + this.array.delete(termHash); + } + this.ram.delete(termHash); + return; + } + @Override public void removeDelayed(final byte[] termHash, final HandleSet urlHashes) { HandleSet r; diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java index b3f2978c5..750be8109 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java @@ -462,17 +462,26 @@ public final class ReferenceContainerCache exte * @return the indexContainer if the cache contained the container, null otherwise */ @Override - public ReferenceContainer delete(final byte[] termHash) { + public ReferenceContainer remove(final byte[] termHash) { // returns the index that had been deleted assert this.cache != null; if (this.cache == null) return null; return this.cache.remove(new ByteArray(termHash)); } + @Override + public void delete(final byte[] termHash) { + // returns the index that had been deleted + assert this.cache != null; + if (this.cache == null) return; + this.cache.remove(new ByteArray(termHash)); + } + @Override public void removeDelayed(final byte[] termHash, final byte[] urlHashBytes) { remove(termHash, urlHashBytes); } + @Override public boolean remove(final byte[] termHash, final byte[] urlHashBytes) { assert this.cache != null; diff --git a/source/net/yacy/peers/dht/Dispatcher.java b/source/net/yacy/peers/dht/Dispatcher.java index 2d8f3258a..84078ec37 100644 --- a/source/net/yacy/peers/dht/Dispatcher.java +++ b/source/net/yacy/peers/dht/Dispatcher.java @@ -209,7 +209,7 @@ public class Dispatcher { // but to avoid race conditions return the results from the deletes rc = new ArrayList>(containers.size()); for (final ReferenceContainer c: containers) { - container = this.segment.termIndex().delete(c.getTermHash()); // be aware this might be null! + container = this.segment.termIndex().remove(c.getTermHash()); // be aware this might be null! if (container != null && !container.isEmpty()) { if (this.log.isFine()) this.log.logFine("selected " + container.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'"); rc.add(container); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index c1c26af4e..4f1957e9a 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1914,7 +1914,7 @@ public final class Switchboard extends serverSwitch for ( final Segment indexSegment : this.indexSegments ) { indexSegment.urlMetadata().clearCache(); } - SearchEventCache.cleanupEvents(true); + SearchEventCache.cleanupEvents(false); this.trail.clear(); } diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 35a90ba7c..21ed469d5 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -481,7 +481,7 @@ public final class QueryParams { return matcher; } - private String idCacheAnon = null, idCache = null; + private volatile String idCacheAnon = null, idCache = null; final static private char asterisk = '*'; public String id(final boolean anonymized) { if (anonymized) { @@ -489,55 +489,49 @@ public final class QueryParams { } else { if (this.idCache != null) return this.idCache; } - - // generate a string that identifies a search so results can be re-used in a cache - final StringBuilder context = new StringBuilder(120); - if (anonymized) { - context.append(anonymizedQueryHashes(this.queryHashes)); - context.append('-'); - context.append(anonymizedQueryHashes(this.excludeHashes)); - } else { - context.append(hashSet2hashString(this.queryHashes)); - context.append('-'); - context.append(hashSet2hashString(this.excludeHashes)); - } - //context.append(asterisk); - //context.append(this.domType); - context.append(asterisk); - context.append(this.contentdom); - context.append(asterisk); - context.append(this.zonecode); - context.append(asterisk); - context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))); - context.append(asterisk); - context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())); - context.append(asterisk); - context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())); - context.append(asterisk); - context.append(this.sitehash); - context.append(asterisk); - context.append(this.siteexcludes); - context.append(asterisk); - context.append(this.authorhash); - context.append(asterisk); - context.append(this.targetlang); - context.append(asterisk); - context.append(this.constraint); - context.append(asterisk); - context.append(this.maxDistance); - context.append(asterisk); - context.append(this.modifier.s); - context.append(asterisk); - context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius); - context.append(asterisk); - context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name()); - String result = context.toString(); - if (anonymized) { - this.idCacheAnon = result; - } else { - this.idCache = result; + synchronized (this) { + // do a Double-Checked Locking + if (anonymized) { + if (this.idCacheAnon != null) return this.idCacheAnon; + } else { + if (this.idCache != null) return this.idCache; + } + // generate a string that identifies a search so results can be re-used in a cache + final StringBuilder context = new StringBuilder(180); + if (anonymized) { + context.append(anonymizedQueryHashes(this.queryHashes)); + context.append('-'); + context.append(anonymizedQueryHashes(this.excludeHashes)); + } else { + context.append(hashSet2hashString(this.queryHashes)); + context.append('-'); + context.append(hashSet2hashString(this.excludeHashes)); + } + //context.append(asterisk); + //context.append(this.domType); + context.append(asterisk); + context.append(this.contentdom).append(asterisk); + context.append(this.zonecode).append(asterisk); + context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk); + context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk); + context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk); + context.append(this.sitehash).append(asterisk); + context.append(this.siteexcludes).append(asterisk); + context.append(this.authorhash).append(asterisk); + context.append(this.targetlang).append(asterisk); + context.append(this.constraint).append(asterisk); + context.append(this.maxDistance).append(asterisk); + context.append(this.modifier.s).append(asterisk); + context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk); + context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name()); + String result = context.toString(); + if (anonymized) { + this.idCacheAnon = result; + } else { + this.idCache = result; + } + return result; } - return result; } /** diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index d12b42a97..cd65efa28 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -643,10 +643,10 @@ public final class RWIProcess extends Thread // check geo coordinates double lat, lon; - if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) > 0.0d && (lon = page.lon()) > 0.0d) { + if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) { double latDelta = this.query.lat - lat; double lonDelta = this.query.lon - lon; - double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta) / 2; // pythagoras + double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras if (distance > this.query.radius) { this.sortout++; continue; diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 243d63132..02de72095 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -82,7 +82,7 @@ public final class SearchEvent RESULTLIST; } - public static final int max_results_preparation = 3000; + public static final int max_results_preparation = 420000; // class variables that may be implemented with an abstract class private long eventTime; @@ -118,7 +118,7 @@ public final class SearchEvent final int burstMultiwordPercent, final boolean deleteIfSnippetFail) { if ( MemoryControl.available() < 1024 * 1024 * 100 ) { - SearchEventCache.cleanupEvents(true); + SearchEventCache.cleanupEvents(false); } this.eventTime = System.currentTimeMillis(); // for lifetime check this.peers = peers; @@ -300,7 +300,7 @@ public final class SearchEvent // store this search to a cache so it can be re-used if ( MemoryControl.available() < 1024 * 1024 * 100 ) { - SearchEventCache.cleanupEvents(true); + SearchEventCache.cleanupEvents(false); } SearchEventCache.put(this.query.id(false), this); } diff --git a/source/net/yacy/search/query/SearchEventCache.java b/source/net/yacy/search/query/SearchEventCache.java index a5491067f..20fbb0afd 100644 --- a/source/net/yacy/search/query/SearchEventCache.java +++ b/source/net/yacy/search/query/SearchEventCache.java @@ -30,28 +30,24 @@ import java.util.Iterator; import java.util.Map; import java.util.SortedMap; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import net.yacy.cora.document.Classification; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.MemoryControl; import net.yacy.peers.SeedDB; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.index.Segment; -import net.yacy.search.ranking.RankingProfile; import de.anomic.data.WorkTables; public class SearchEventCache { - private static ConcurrentMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests + private volatile static Map lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetimeBigMem = 600000; // the time an event will stay in the cache when available memory is high, 10 Minutes public static final long eventLifetimeMediumMem = 60000; // the time an event will stay in the cache when available memory is medium, 1 Minute public static final long eventLifetimeShortMem = 10000; // the time an event will stay in the cache when memory is low, 10 seconds public static final long memlimitHigh = 600 * 1024 * 1024; // 400 MB public static final long memlimitMedium = 200 * 1024 * 1024; // 100 MB - public static String lastEventID = ""; + public volatile static String lastEventID = ""; public static long cacheInsert = 0, cacheHit = 0, cacheMiss = 0, cacheDelete = 0; public static int size() { @@ -59,7 +55,7 @@ public class SearchEventCache { } public static void put(final String eventID, final SearchEvent event) { - if (MemoryControl.shortStatus()) cleanupEvents(true); + if (MemoryControl.shortStatus()) cleanupEvents(false); lastEventID = eventID; final SearchEvent oldEvent = lastEvents.put(eventID, event); if (oldEvent == null) cacheInsert++; @@ -89,8 +85,6 @@ public class SearchEventCache { if (event.workerAlive()) { event.cleanup(); } - } - if (!event.workerAlive()) { i.remove(); cacheDelete++; } @@ -98,21 +92,29 @@ public class SearchEventCache { } public static SearchEvent getEvent(final String eventID) { - final SearchEvent event = lastEvents.get(eventID); - if (event == null) cacheMiss++; else cacheHit++; + SearchEvent event = lastEvents.get(eventID); + if (event == null) { + synchronized (lastEvents) { + event = lastEvents.get(eventID); + if (event == null) cacheMiss++; else cacheHit++; + } + cacheMiss++; + } else { + cacheHit++; + } return event; } public static int countAliveThreads() { int alive = 0; - for (final SearchEvent e: SearchEventCache.lastEvents.values()) { + for (final SearchEvent e: lastEvents.values()) { if (e.workerAlive()) alive++; } return alive; } - private static SearchEvent dummyEvent = null; - +/* + private volatile static SearchEvent dummyEvent = null; private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) { Log.logWarning("SearchEventCache", "returning dummy event"); if (dummyEvent != null) return dummyEvent; @@ -120,7 +122,7 @@ public class SearchEventCache { dummyEvent = new SearchEvent(query, null, workTables, null, false, loader, 0, 0, 0, 0, false); return dummyEvent; } - +*/ public static SearchEvent getEvent( final QueryParams query, final SeedDB peers, @@ -134,13 +136,12 @@ public class SearchEventCache { final int burstMultiwordPercent) { final String id = query.id(false); - SearchEvent event = SearchEventCache.lastEvents.get(id); - if (event == null) cacheMiss++; else cacheHit++; + SearchEvent event = getEvent(id); if (Switchboard.getSwitchboard() != null && !Switchboard.getSwitchboard().crawlQueues.noticeURL.isEmpty() && event != null && System.currentTimeMillis() - event.getEventTime() > 60000) { // if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl // to prevent that this happens during a person switches between the different result pages, a re-search happens no more than // once a minute - SearchEventCache.lastEvents.remove(id); + lastEvents.remove(id); cacheDelete++; event = null; } else { @@ -156,26 +157,28 @@ public class SearchEventCache { // throttling in case of too many search requests int waitcount = 0; + /* throttling : while (true) { - final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024); + final int allowedThreads = (int) Math.max(10, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024); // make room if there are too many search events (they need a lot of RAM) - if (SearchEventCache.lastEvents.size() >= allowedThreads) { - Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); + if (lastEvents.size() >= allowedThreads) { + Log.logWarning("SearchEventCache", "throttling phase 1: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); cleanupEvents(false); } else break throttling; // if there are still some then delete just all - if (SearchEventCache.lastEvents.size() >= allowedThreads) { - Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); + if (lastEvents.size() >= allowedThreads) { + Log.logWarning("SearchEventCache", "throttling phase 2: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); cleanupEvents(true); } else break throttling; // now there might be still events left that are alive if (countAliveThreads() < allowedThreads) break throttling; // finally we just wait some time until we get access - Log.logWarning("SearchEventCache", "throttling phase 3: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); + Log.logWarning("SearchEventCache", "throttling phase 3: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); try { Thread.sleep(200); } catch (final InterruptedException e) { } waitcount++; if (waitcount >= 100) return getDummyEvent(workTables, loader, query.getSegment()); } + */ if (waitcount > 0) { // do not fetch snippets because that is most time-expensive @@ -183,7 +186,7 @@ public class SearchEventCache { } // check if there are too many other searches alive now - Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive"); + Log.logInfo("SearchEventCache", "getEvent: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive"); // start a new event final boolean delete = Switchboard.getSwitchboard() == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);