From e7736d9c8d1a05f6b29f31f629dadfc6e2b942a8 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 26 Aug 2009 15:59:55 +0000 Subject: [PATCH] more refactoring: made all variables in SearchEvent private to prepare splitting of the class into two parts: local and remote search git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6265 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/SearchEventPicture.java | 5 +- htroot/yacy/search.java | 24 ++-- htroot/yacysearch.java | 4 +- htroot/yacysearchitem.java | 2 +- source/de/anomic/search/SearchEvent.java | 106 ++++++++++++++---- source/de/anomic/search/SearchEventCache.java | 53 ++++----- source/de/anomic/search/SnippetFetcher.java | 6 +- 7 files changed, 126 insertions(+), 74 deletions(-) diff --git a/htroot/SearchEventPicture.java b/htroot/SearchEventPicture.java index 1443bb2f9..49c085990 100644 --- a/htroot/SearchEventPicture.java +++ b/htroot/SearchEventPicture.java @@ -24,9 +24,8 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - import de.anomic.http.metadata.RequestHeader; -import de.anomic.search.SearchEvent; +import de.anomic.search.SearchEventCache; import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -39,7 +38,7 @@ public class SearchEventPicture { public static ymageMatrix respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; - final String eventID = header.get("event", SearchEvent.lastEventID); + final String eventID = header.get("event", SearchEventCache.lastEventID); if (eventID == null) return null; final ymageMatrix yp = NetworkGraph.getSearchEventPicture(sb.peers, eventID); if (yp == null) return new ymageMatrix(1, 1, ymageMatrix.MODE_SUB, "000000"); // empty image diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index b7a026a49..a8d71b7eb 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -279,7 +279,7 @@ public final class search { // attach information about index abstracts final StringBuilder indexcount = new StringBuilder(); Map.Entry entry; - final Iterator> i = theSearch.IACount.entrySet().iterator(); + final Iterator> i = theSearch.abstractsCount(); while (i.hasNext()) { entry = i.next(); indexcount.append("indexcount.").append(new String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING); @@ -290,8 +290,8 @@ public final class search { byte[] wordhash; while (j.hasNext()) { wordhash = j.next(); - indexabstractContainercount += (theSearch.IACount.get(wordhash)).intValue(); - indexabstract.append("indexabstract." + wordhash + "=").append(theSearch.IAResults.get(wordhash)).append(serverCore.CRLF_STRING); + indexabstractContainercount += theSearch.abstractsCount(wordhash); + indexabstract.append("indexabstract." + wordhash + "=").append(theSearch.abstractsString(wordhash)).append(serverCore.CRLF_STRING); } } prop.put("indexcount", indexcount.toString()); @@ -302,22 +302,22 @@ public final class search { } else { joincount = theSearch.getRankingResult().getLocalResourceSize(); prop.put("joincount", Integer.toString(joincount)); - accu = theSearch.snippets.completeResults(3000); + accu = theSearch.result().completeResults(3000); } // generate compressed index for maxcounthash // this is not needed if the search is restricted to specific // urls, because it is a re-search - if ((theSearch.IAmaxcounthash == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.length() == 0)) { + if ((theSearch.getAbstractsMaxCountHash() == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.length() == 0)) { prop.put("indexabstract", ""); } else if (abstracts.equals("auto")) { // automatically attach the index abstract for the index that has the most references. This should be our target dht position - indexabstractContainercount += (theSearch.IACount.get(theSearch.IAmaxcounthash)).intValue(); - indexabstract.append("indexabstract." + theSearch.IAmaxcounthash + "=").append(theSearch.IAResults.get(theSearch.IAmaxcounthash)).append(serverCore.CRLF_STRING); - if ((theSearch.IAneardhthash != null) && (!(theSearch.IAneardhthash.equals(theSearch.IAmaxcounthash)))) { + indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsMaxCountHash()); + indexabstract.append("indexabstract." + theSearch.getAbstractsMaxCountHash() + "=").append(theSearch.abstractsString(theSearch.getAbstractsMaxCountHash())).append(serverCore.CRLF_STRING); + if ((theSearch.getAbstractsNearDHTHash() != null) && (!(theSearch.getAbstractsNearDHTHash().equals(theSearch.getAbstractsMaxCountHash())))) { // in case that the neardhthash is different from the maxcounthash attach also the neardhthash-container - indexabstractContainercount += (theSearch.IACount.get(theSearch.IAneardhthash)).intValue(); - indexabstract.append("indexabstract." + theSearch.IAneardhthash + "=").append(theSearch.IAResults.get(theSearch.IAneardhthash)).append(serverCore.CRLF_STRING); + indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsNearDHTHash()); + indexabstract.append("indexabstract." + theSearch.getAbstractsNearDHTHash() + "=").append(theSearch.abstractsString(theSearch.getAbstractsNearDHTHash())).append(serverCore.CRLF_STRING); } //System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash); //System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash); @@ -373,8 +373,8 @@ public final class search { theQuery.remotepeer = sb.peers.lookupByIP(natLib.getInetAddress(client), true, false, false); theQuery.resultcount = (theSearch == null) ? 0 : theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); theQuery.searchtime = System.currentTimeMillis() - timestamp; - theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.snippets.getURLRetrievalTime(); - theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.snippets.getSnippetComputationTime(); + theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.result().getURLRetrievalTime(); + theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.result().getSnippetComputationTime(); sb.remoteSearches.add(theQuery); // update the search tracker diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index ba7f6628c..4aecb8378 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -472,8 +472,8 @@ public class yacysearch { // prepare search statistics theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); theQuery.searchtime = System.currentTimeMillis() - timestamp; - theQuery.urlretrievaltime = theSearch.snippets.getURLRetrievalTime(); - theQuery.snippetcomputationtime = theSearch.snippets.getSnippetComputationTime(); + theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime(); + theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime(); sb.localSearches.add(theQuery); // check suggestions diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index e7a618466..ea6b494f2 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -158,7 +158,7 @@ public class yacysearchitem { // image search; shows thumbnails prop.put("content", theQuery.contentdom + 1); // switch on specific content - final SnippetCache.MediaSnippet ms = theSearch.snippets.oneImage(item); + final SnippetCache.MediaSnippet ms = theSearch.result().oneImage(item); if (ms == null) { prop.put("content_items", "0"); } else { diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 232be4175..2772c37c5 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -26,10 +26,12 @@ package de.anomic.search; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; import de.anomic.crawler.ResultURLs; import de.anomic.kelondro.order.Base64Order; @@ -56,27 +58,29 @@ public final class SearchEvent { public static final String NORMALIZING = "normalizing"; public static final String FINALIZATION = "finalization"; - protected final static int workerThreadCount = 10; - public static String lastEventID = ""; private static final int max_results_preparation = 1000; - protected long eventTime; - protected QueryParams query; - protected final Segment indexSegment; + // class variables that may be implemented with an abstract class + private long eventTime; + private QueryParams query; + private final Segment indexSegment; private final yacySeedDB peers; - protected RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container + private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container + private SnippetFetcher snippets; + + // class variables for search abstracts private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation + + // class variables for remote searches private yacySearch[] primarySearchThreads, secondarySearchThreads; - private Thread localSearchThread; private final TreeMap preselectedPeerHashes; - //private Object[] references; - public TreeMap IAResults; - public TreeMap IACount; - public byte[] IAmaxcounthash, IAneardhthash; - public ResultURLs crawlResults; - public SnippetFetcher snippets; + private ResultURLs crawlResults; + private Thread localSearchThread; + private TreeMap IAResults; + private TreeMap IACount; + private byte[] IAmaxcounthash, IAneardhthash; - @SuppressWarnings("unchecked") SearchEvent(final QueryParams query, + @SuppressWarnings("unchecked") SearchEvent(final QueryParams query, final Segment indexSegment, final yacySeedDB peers, final ResultURLs crawlResults, @@ -180,11 +184,71 @@ public final class SearchEvent { // store this search to a cache so it can be re-used if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true); - lastEventID = query.id(false); - SearchEventCache.lastEvents.put(lastEventID, this); + SearchEventCache.put(query.id(false), this); } - boolean anyRemoteSearchAlive() { + public long getEventTime() { + return this.eventTime; + } + + public void resetEventTime() { + this.eventTime = System.currentTimeMillis(); + } + + public QueryParams getQuery() { + return this.query; + } + + public void setQuery(QueryParams query) { + this.query = query; + } + + public void cleanup() { + // execute deletion of failed words + int rw = this.snippets.failedURLs.size(); + if (rw > 0) { + final TreeSet removeWords = query.queryHashes; + removeWords.addAll(query.excludeHashes); + try { + final Iterator j = removeWords.iterator(); + // remove the same url hashes for multiple words + while (j.hasNext()) { + this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet()); + } + } catch (IOException e) { + e.printStackTrace(); + } + Log.logInfo("SearchEvents", "cleaning up event " + query.id(true) + ", removed " + rw + " URL references on " + removeWords.size() + " words"); + } + } + + public Iterator> abstractsString() { + return this.IAResults.entrySet().iterator(); + } + + public String abstractsString(byte[] hash) { + return this.IAResults.get(hash); + } + + public Iterator> abstractsCount() { + return this.IACount.entrySet().iterator(); + } + + public int abstractsCount(byte[] hash) { + Integer i = this.IACount.get(hash); + if (i == null) return -1; + return i.intValue(); + } + + public byte[] getAbstractsMaxCountHash() { + return this.IAmaxcounthash; + } + + public byte[] getAbstractsNearDHTHash() { + return this.IAneardhthash; + } + + boolean anyRemoteSearchAlive() { // check primary search threads if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) { for (int i = 0; i < this.primarySearchThreads.length; i++) { @@ -211,10 +275,6 @@ public final class SearchEvent { return count; } - public QueryParams getQuery() { - return query; - } - public yacySearch[] getPrimarySearchThreads() { return primarySearchThreads; } @@ -340,4 +400,8 @@ public final class SearchEvent { //assert e != null; } + public SnippetFetcher result() { + return this.snippets; + } + } diff --git a/source/de/anomic/search/SearchEventCache.java b/source/de/anomic/search/SearchEventCache.java index b623acfb5..b22715de9 100644 --- a/source/de/anomic/search/SearchEventCache.java +++ b/source/de/anomic/search/SearchEventCache.java @@ -26,45 +26,34 @@ package de.anomic.search; -import java.io.IOException; import java.util.Iterator; import java.util.TreeMap; -import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import de.anomic.crawler.ResultURLs; import de.anomic.kelondro.text.Segment; import de.anomic.yacy.yacySeedDB; -import de.anomic.yacy.logging.Log; public class SearchEventCache { - protected static ConcurrentHashMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests + private static ConcurrentHashMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetime = 60000; // the time an event will stay in the cache, 1 Minute + + public static String lastEventID = ""; + + public static void put(String eventID, SearchEvent event) { + lastEventID = eventID; + lastEvents.put(eventID, event); + } public static void cleanupEvents(final boolean all) { // remove old events in the event cache final Iterator i = lastEvents.values().iterator(); - SearchEvent cleanEvent; + SearchEvent event; while (i.hasNext()) { - cleanEvent = i.next(); - if ((all) || (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis())) { - // execute deletion of failed words - int rw = cleanEvent.snippets.failedURLs.size(); - if (rw > 0) { - final TreeSet removeWords = cleanEvent.query.queryHashes; - removeWords.addAll(cleanEvent.query.excludeHashes); - try { - final Iterator j = removeWords.iterator(); - // remove the same url hashes for multiple words - while (j.hasNext()) { - cleanEvent.indexSegment.termIndex().remove(j.next(), cleanEvent.snippets.failedURLs.keySet()); - } - } catch (IOException e) { - e.printStackTrace(); - } - Log.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + rw + " URL references on " + removeWords.size() + " words"); - } + event = i.next(); + if ((all) || (event.getEventTime() + eventLifetime < System.currentTimeMillis())) { + event.cleanup(); // remove the event i.remove(); @@ -86,7 +75,7 @@ public class SearchEventCache { String id = query.id(false); SearchEvent event = SearchEventCache.lastEvents.get(id); - if (Switchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0 && event != null && System.currentTimeMillis() - event.eventTime > 60000) { + if (Switchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0 && event != null && System.currentTimeMillis() - event.getEventTime() > 60000) { // if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl // to prevent that this happens during a person switches between the different result pages, a re-search happens no more than // once a minute @@ -95,9 +84,9 @@ public class SearchEventCache { } else { if (event != null) { //re-new the event time for this event, so it is not deleted next time too early - event.eventTime = System.currentTimeMillis(); + event.resetEventTime(); // replace the query, because this contains the current result offset - event.query = query; + event.setQuery(query); } } if (event == null) { @@ -105,15 +94,15 @@ public class SearchEventCache { event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts); } else { // if worker threads had been alive, but did not succeed, start them again to fetch missing links - if ((!event.snippets.anyWorkerAlive()) && - (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.snippets.images.size() + 30 < query.neededResults())) || - (event.snippets.result.size() < query.neededResults() + 10)) && + if ((!event.result().anyWorkerAlive()) && + (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) || + (event.result().result.size() < query.neededResults() + 10)) && //(event.query.onlineSnippetFetch) && - (event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.snippets.result.size())) { + (event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) { // set new timeout - event.eventTime = System.currentTimeMillis(); + event.resetEventTime(); // start worker threads to fetch urls and snippets - event.snippets.restartWorker(); + event.result().restartWorker(); } } diff --git a/source/de/anomic/search/SnippetFetcher.java b/source/de/anomic/search/SnippetFetcher.java index e7ae3a4bb..6e321f270 100644 --- a/source/de/anomic/search/SnippetFetcher.java +++ b/source/de/anomic/search/SnippetFetcher.java @@ -50,8 +50,8 @@ public class SnippetFetcher { protected final static int workerThreadCount = 10; // input values - private final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container - private final QueryParams query; + final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container + final QueryParams query; private final Segment indexSegment; private final yacySeedDB peers; @@ -103,7 +103,7 @@ public class SnippetFetcher { public void restartWorker() { if (anyWorkerAlive()) return; - this.workerThreads = new Worker[SearchEvent.workerThreadCount]; + this.workerThreads = new Worker[workerThreadCount]; Worker worker; for (int i = 0; i < workerThreads.length; i++) { worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0);