more refactoring: made all variables in SearchEvent private

to prepare splitting of the class into two parts: local and remote search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6265 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 4b92d0b9b7
commit e7736d9c8d

@ -24,9 +24,8 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import de.anomic.http.metadata.RequestHeader;
import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -39,7 +38,7 @@ public class SearchEventPicture {
public static ymageMatrix respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final String eventID = header.get("event", SearchEvent.lastEventID);
final String eventID = header.get("event", SearchEventCache.lastEventID);
if (eventID == null) return null;
final ymageMatrix yp = NetworkGraph.getSearchEventPicture(sb.peers, eventID);
if (yp == null) return new ymageMatrix(1, 1, ymageMatrix.MODE_SUB, "000000"); // empty image

@ -279,7 +279,7 @@ public final class search {
// attach information about index abstracts
final StringBuilder indexcount = new StringBuilder();
Map.Entry<byte[], Integer> entry;
final Iterator<Map.Entry<byte[], Integer>> i = theSearch.IACount.entrySet().iterator();
final Iterator<Map.Entry<byte[], Integer>> i = theSearch.abstractsCount();
while (i.hasNext()) {
entry = i.next();
indexcount.append("indexcount.").append(new String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING);
@ -290,8 +290,8 @@ public final class search {
byte[] wordhash;
while (j.hasNext()) {
wordhash = j.next();
indexabstractContainercount += (theSearch.IACount.get(wordhash)).intValue();
indexabstract.append("indexabstract." + wordhash + "=").append(theSearch.IAResults.get(wordhash)).append(serverCore.CRLF_STRING);
indexabstractContainercount += theSearch.abstractsCount(wordhash);
indexabstract.append("indexabstract." + wordhash + "=").append(theSearch.abstractsString(wordhash)).append(serverCore.CRLF_STRING);
}
}
prop.put("indexcount", indexcount.toString());
@ -302,22 +302,22 @@ public final class search {
} else {
joincount = theSearch.getRankingResult().getLocalResourceSize();
prop.put("joincount", Integer.toString(joincount));
accu = theSearch.snippets.completeResults(3000);
accu = theSearch.result().completeResults(3000);
}
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific
// urls, because it is a re-search
if ((theSearch.IAmaxcounthash == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.length() == 0)) {
if ((theSearch.getAbstractsMaxCountHash() == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.length() == 0)) {
prop.put("indexabstract", "");
} else if (abstracts.equals("auto")) {
// automatically attach the index abstract for the index that has the most references. This should be our target dht position
indexabstractContainercount += (theSearch.IACount.get(theSearch.IAmaxcounthash)).intValue();
indexabstract.append("indexabstract." + theSearch.IAmaxcounthash + "=").append(theSearch.IAResults.get(theSearch.IAmaxcounthash)).append(serverCore.CRLF_STRING);
if ((theSearch.IAneardhthash != null) && (!(theSearch.IAneardhthash.equals(theSearch.IAmaxcounthash)))) {
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsMaxCountHash());
indexabstract.append("indexabstract." + theSearch.getAbstractsMaxCountHash() + "=").append(theSearch.abstractsString(theSearch.getAbstractsMaxCountHash())).append(serverCore.CRLF_STRING);
if ((theSearch.getAbstractsNearDHTHash() != null) && (!(theSearch.getAbstractsNearDHTHash().equals(theSearch.getAbstractsMaxCountHash())))) {
// in case that the neardhthash is different from the maxcounthash attach also the neardhthash-container
indexabstractContainercount += (theSearch.IACount.get(theSearch.IAneardhthash)).intValue();
indexabstract.append("indexabstract." + theSearch.IAneardhthash + "=").append(theSearch.IAResults.get(theSearch.IAneardhthash)).append(serverCore.CRLF_STRING);
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsNearDHTHash());
indexabstract.append("indexabstract." + theSearch.getAbstractsNearDHTHash() + "=").append(theSearch.abstractsString(theSearch.getAbstractsNearDHTHash())).append(serverCore.CRLF_STRING);
}
//System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash);
//System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash);
@ -373,8 +373,8 @@ public final class search {
theQuery.remotepeer = sb.peers.lookupByIP(natLib.getInetAddress(client), true, false, false);
theQuery.resultcount = (theSearch == null) ? 0 : theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.snippets.getURLRetrievalTime();
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.snippets.getSnippetComputationTime();
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.result().getSnippetComputationTime();
sb.remoteSearches.add(theQuery);
// update the search tracker

@ -472,8 +472,8 @@ public class yacysearch {
// prepare search statistics
theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.snippets.getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.snippets.getSnippetComputationTime();
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
sb.localSearches.add(theQuery);
// check suggestions

@ -158,7 +158,7 @@ public class yacysearchitem {
// image search; shows thumbnails
prop.put("content", theQuery.contentdom + 1); // switch on specific content
final SnippetCache.MediaSnippet ms = theSearch.snippets.oneImage(item);
final SnippetCache.MediaSnippet ms = theSearch.result().oneImage(item);
if (ms == null) {
prop.put("content_items", "0");
} else {

@ -26,10 +26,12 @@
package de.anomic.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import de.anomic.crawler.ResultURLs;
import de.anomic.kelondro.order.Base64Order;
@ -56,27 +58,29 @@ public final class SearchEvent {
public static final String NORMALIZING = "normalizing";
public static final String FINALIZATION = "finalization";
protected final static int workerThreadCount = 10;
public static String lastEventID = "";
private static final int max_results_preparation = 1000;
protected long eventTime;
protected QueryParams query;
protected final Segment indexSegment;
// class variables that may be implemented with an abstract class
private long eventTime;
private QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers;
protected RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private SnippetFetcher snippets;
// class variables for search abstracts
private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
// class variables for remote searches
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private Thread localSearchThread;
private final TreeMap<byte[], String> preselectedPeerHashes;
//private Object[] references;
public TreeMap<byte[], String> IAResults;
public TreeMap<byte[], Integer> IACount;
public byte[] IAmaxcounthash, IAneardhthash;
public ResultURLs crawlResults;
public SnippetFetcher snippets;
private ResultURLs crawlResults;
private Thread localSearchThread;
private TreeMap<byte[], String> IAResults;
private TreeMap<byte[], Integer> IACount;
private byte[] IAmaxcounthash, IAneardhthash;
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
final Segment indexSegment,
final yacySeedDB peers,
final ResultURLs crawlResults,
@ -180,11 +184,71 @@ public final class SearchEvent {
// store this search to a cache so it can be re-used
if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true);
lastEventID = query.id(false);
SearchEventCache.lastEvents.put(lastEventID, this);
SearchEventCache.put(query.id(false), this);
}
boolean anyRemoteSearchAlive() {
public long getEventTime() {
return this.eventTime;
}
public void resetEventTime() {
this.eventTime = System.currentTimeMillis();
}
public QueryParams getQuery() {
return this.query;
}
public void setQuery(QueryParams query) {
this.query = query;
}
public void cleanup() {
// execute deletion of failed words
int rw = this.snippets.failedURLs.size();
if (rw > 0) {
final TreeSet<byte[]> removeWords = query.queryHashes;
removeWords.addAll(query.excludeHashes);
try {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet());
}
} catch (IOException e) {
e.printStackTrace();
}
Log.logInfo("SearchEvents", "cleaning up event " + query.id(true) + ", removed " + rw + " URL references on " + removeWords.size() + " words");
}
}
public Iterator<Map.Entry<byte[], String>> abstractsString() {
return this.IAResults.entrySet().iterator();
}
public String abstractsString(byte[] hash) {
return this.IAResults.get(hash);
}
public Iterator<Map.Entry<byte[], Integer>> abstractsCount() {
return this.IACount.entrySet().iterator();
}
public int abstractsCount(byte[] hash) {
Integer i = this.IACount.get(hash);
if (i == null) return -1;
return i.intValue();
}
public byte[] getAbstractsMaxCountHash() {
return this.IAmaxcounthash;
}
public byte[] getAbstractsNearDHTHash() {
return this.IAneardhthash;
}
boolean anyRemoteSearchAlive() {
// check primary search threads
if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) {
for (int i = 0; i < this.primarySearchThreads.length; i++) {
@ -211,10 +275,6 @@ public final class SearchEvent {
return count;
}
public QueryParams getQuery() {
return query;
}
public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads;
}
@ -340,4 +400,8 @@ public final class SearchEvent {
//assert e != null;
}
public SnippetFetcher result() {
return this.snippets;
}
}

@ -26,45 +26,34 @@
package de.anomic.search;
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import de.anomic.crawler.ResultURLs;
import de.anomic.kelondro.text.Segment;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.logging.Log;
public class SearchEventCache {
protected static ConcurrentHashMap<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
private static ConcurrentHashMap<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetime = 60000; // the time an event will stay in the cache, 1 Minute
public static String lastEventID = "";
public static void put(String eventID, SearchEvent event) {
lastEventID = eventID;
lastEvents.put(eventID, event);
}
public static void cleanupEvents(final boolean all) {
// remove old events in the event cache
final Iterator<SearchEvent> i = lastEvents.values().iterator();
SearchEvent cleanEvent;
SearchEvent event;
while (i.hasNext()) {
cleanEvent = i.next();
if ((all) || (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis())) {
// execute deletion of failed words
int rw = cleanEvent.snippets.failedURLs.size();
if (rw > 0) {
final TreeSet<byte[]> removeWords = cleanEvent.query.queryHashes;
removeWords.addAll(cleanEvent.query.excludeHashes);
try {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
cleanEvent.indexSegment.termIndex().remove(j.next(), cleanEvent.snippets.failedURLs.keySet());
}
} catch (IOException e) {
e.printStackTrace();
}
Log.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + rw + " URL references on " + removeWords.size() + " words");
}
event = i.next();
if ((all) || (event.getEventTime() + eventLifetime < System.currentTimeMillis())) {
event.cleanup();
// remove the event
i.remove();
@ -86,7 +75,7 @@ public class SearchEventCache {
String id = query.id(false);
SearchEvent event = SearchEventCache.lastEvents.get(id);
if (Switchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0 && event != null && System.currentTimeMillis() - event.eventTime > 60000) {
if (Switchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0 && event != null && System.currentTimeMillis() - event.getEventTime() > 60000) {
// if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl
// to prevent that this happens during a person switches between the different result pages, a re-search happens no more than
// once a minute
@ -95,9 +84,9 @@ public class SearchEventCache {
} else {
if (event != null) {
//re-new the event time for this event, so it is not deleted next time too early
event.eventTime = System.currentTimeMillis();
event.resetEventTime();
// replace the query, because this contains the current result offset
event.query = query;
event.setQuery(query);
}
}
if (event == null) {
@ -105,15 +94,15 @@ public class SearchEventCache {
event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
} else {
// if worker threads had been alive, but did not succeed, start them again to fetch missing links
if ((!event.snippets.anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.snippets.images.size() + 30 < query.neededResults())) ||
(event.snippets.result.size() < query.neededResults() + 10)) &&
if ((!event.result().anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) ||
(event.result().result.size() < query.neededResults() + 10)) &&
//(event.query.onlineSnippetFetch) &&
(event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.snippets.result.size())) {
(event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) {
// set new timeout
event.eventTime = System.currentTimeMillis();
event.resetEventTime();
// start worker threads to fetch urls and snippets
event.snippets.restartWorker();
event.result().restartWorker();
}
}

@ -50,8 +50,8 @@ public class SnippetFetcher {
protected final static int workerThreadCount = 10;
// input values
private final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private final QueryParams query;
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
final QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers;
@ -103,7 +103,7 @@ public class SnippetFetcher {
public void restartWorker() {
if (anyWorkerAlive()) return;
this.workerThreads = new Worker[SearchEvent.workerThreadCount];
this.workerThreads = new Worker[workerThreadCount];
Worker worker;
for (int i = 0; i < workerThreads.length; i++) {
worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0);

Loading…
Cancel
Save