refactoring of search processes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 5dee7e9c29
commit 5605887571

@ -48,7 +48,7 @@ import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
@ -70,14 +70,14 @@ public class PerformanceSearch_p {
if (post.containsKey("submitlocalprofilecustom")) {
// first count percentages
int c = 0;
for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
c += post.getInt("searchProcessLocalTime_" + plasmaSearchTimingProfile.sequence[i], 0);
for (int i = 0; i < plasmaSearchProcessing.sequence.length; i++) {
c += post.getInt("searchProcessLocalTime_" + plasmaSearchProcessing.sequence[i], 0);
}
// if check is ok set new values
if (c == 100) {
for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchTimingProfile.sequence[i], post.get("searchProcessLocalTime_" + plasmaSearchTimingProfile.sequence[i], ""));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchTimingProfile.sequence[i], post.get("searchProcessLocalCount_" + plasmaSearchTimingProfile.sequence[i], ""));
for (int i = 0; i < plasmaSearchProcessing.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchProcessing.sequence[i], post.get("searchProcessLocalTime_" + plasmaSearchProcessing.sequence[i], ""));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchProcessing.sequence[i], post.get("searchProcessLocalCount_" + plasmaSearchProcessing.sequence[i], ""));
}
prop.put("submitlocalrespond", 1);
} else {
@ -85,9 +85,9 @@ public class PerformanceSearch_p {
}
}
if (post.containsKey("submitlocalprofiledefault")) {
for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchTimingProfile.sequence[i], (String) defaultSettings.get("searchProcessLocalTime_" + plasmaSearchTimingProfile.sequence[i]));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchTimingProfile.sequence[i], (String) defaultSettings.get("searchProcessLocalCount_" + plasmaSearchTimingProfile.sequence[i]));
for (int i = 0; i < plasmaSearchProcessing.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchProcessing.sequence[i], (String) defaultSettings.get("searchProcessLocalTime_" + plasmaSearchProcessing.sequence[i]));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchProcessing.sequence[i], (String) defaultSettings.get("searchProcessLocalCount_" + plasmaSearchProcessing.sequence[i]));
}
prop.put("submitlocalrespond", 2);
}
@ -100,12 +100,12 @@ public class PerformanceSearch_p {
long t;
int c;
char sequence;
if (se != null) for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
t = se.getLocalTiming().getYieldTime(plasmaSearchTimingProfile.sequence[i]);
if (se != null) for (int i = 0; i < plasmaSearchProcessing.sequence.length; i++) {
t = se.getLocalTiming().getYieldTime(plasmaSearchProcessing.sequence[i]);
if (t > 0) time += t;
}
for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
sequence = plasmaSearchTimingProfile.sequence[i];
for (int i = 0; i < plasmaSearchProcessing.sequence.length; i++) {
sequence = plasmaSearchProcessing.sequence[i];
prop.put("searchProcessLocalTime_" + sequence, sb.getConfig("searchProcessLocalTime_" + sequence, ""));
prop.put("searchProcessLocalCount_" + sequence, sb.getConfig("searchProcessLocalCount_" + sequence, ""));
if (se == null) {

@ -64,9 +64,10 @@ import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchPostOrder;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -158,11 +159,11 @@ public final class search {
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
plasmaSearchProcessing remoteTiming = null;
theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, sb.snippetCache, null);
Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.snippetCache, null);
Map[] containers = localTiming.localSearchContainers(squery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry;
@ -186,15 +187,14 @@ public final class search {
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
plasmaSearchProcessing remoteTiming = null;
theSearch = new plasmaSearchEvent(squery,
rankingProfile, localTiming, remoteTiming, true,
yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL,
yacyCore.log, sb.wordIndex,
sb.snippetCache, null);
Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
Map[] containers = localTiming.localSearchContainers(squery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
// set statistic details of search result and find best result index set
if (containers == null) {
prop.putASIS("indexcount", "");
@ -231,7 +231,16 @@ public final class search {
prop.putASIS("indexcount", new String(indexcount));
// join and order the result
indexContainer localResults = theSearch.localSearchJoinExclude(containers[0].values(), containers[1].values());
indexContainer localResults =
(containers == null) ?
plasmaWordIndex.emptyContainer(null) :
localTiming.localSearchJoinExclude(
containers[0].values(),
containers[1].values(),
(squery.queryHashes.size() == 0) ?
0 :
localTiming.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * squery.queryHashes.size() / (squery.queryHashes.size() + squery.excludeHashes.size()),
squery.maxDistance);
if (localResults == null) {
joincount = 0;
prop.put("joincount", 0);
@ -239,7 +248,9 @@ public final class search {
} else {
joincount = localResults.size();
prop.putASIS("joincount", Integer.toString(joincount));
acc = theSearch.orderFinal(localResults);
acc = localTiming.orderFinal(squery, rankingProfile, sb.wordIndex, true, localResults);
}
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific

@ -68,7 +68,7 @@ import de.anomic.plasma.plasmaSearchImages;
import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
@ -285,8 +285,8 @@ public class yacysearch {
20,
constraint);
plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomString) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchProcessing remoteTiming = new plasmaSearchProcessing(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchResults results = new plasmaSearchResults();
String wrongregex = null;

@ -41,17 +41,14 @@
package de.anomic.plasma;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.server.logging.serverLog;
@ -73,7 +70,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private indexContainer rcContainers; // cache for results
private int rcContainerFlushCount;
private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private plasmaSearchProcessing profileLocal, profileGlobal;
private boolean postsort;
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private long searchtime;
@ -82,21 +79,20 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
public plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming,
plasmaSearchProcessing localTiming,
plasmaSearchProcessing remoteTiming,
boolean postsort,
serverLog log,
plasmaWordIndex wordIndex,
plasmaCrawlLURL urlStore,
plasmaSnippetCache snippetCache,
TreeMap preselectedPeerHashes) {
this.log = log;
this.wordIndex = wordIndex;
this.query = query;
this.ranking = ranking;
this.urlStore = urlStore;
this.urlStore = wordIndex.loadedURL;
this.snippetCache = snippetCache;
this.rcContainers = wordIndex.emptyContainer(null);
this.rcContainers = plasmaWordIndex.emptyContainer(null);
this.rcContainerFlushCount = 0;
this.rcAbstracts = (query.queryHashes.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
@ -113,7 +109,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return query;
}
public plasmaSearchTimingProfile getLocalTiming() {
public plasmaSearchProcessing getLocalTiming() {
return profileLocal;
}
@ -152,7 +148,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// do a global search
// the result of the fetch is then in the rcGlobal
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), plasmaSearchQuery.hashSet2hashString(query.excludeHashes), "",
@ -161,7 +157,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
(query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
// meanwhile do a local search
Map[] searchContainerMaps = localSearchContainers(null);
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
// use the search containers to fill up rcAbstracts locally
/*
@ -189,7 +185,16 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
*/
// try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal = localSearchJoinExclude(searchContainerMaps[0].values(), searchContainerMaps[1].values());
indexContainer rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
prefetchLocal(rcLocal, secondaryTimeout);
// this is temporary debugging code to learn that the index abstracts are fetched correctly
@ -214,7 +219,13 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
// combine the result and order
result = orderFinal(rcLocal);
indexContainer searchResult = plasmaWordIndex.emptyContainer(null);
searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcContainers);
searchResult.sort();
searchResult.uniq(1000);
result = profileLocal.orderFinal(query, ranking, wordIndex, postsort, searchResult);
if (result != null) {
result.globalContributions = globalContributions;
@ -222,9 +233,19 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.start(); // start to flush results
}
} else {
Map[] searchContainerMaps = localSearchContainers(null);
indexContainer rcLocal = (searchContainerMaps == null) ? wordIndex.emptyContainer(null) : localSearchJoinExclude(searchContainerMaps[0].values(), searchContainerMaps[1].values());
result = orderFinal(rcLocal);
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
indexContainer rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
result = profileLocal.orderFinal(query, ranking, wordIndex, postsort, rcLocal);
result.globalContributions = 0;
}
@ -333,157 +354,6 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return wordlist;
}
public Map[] localSearchContainers(Set urlselection) {
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
// retrieve entities that belong to the hashes
profileLocal.startTimer();
long start = System.currentTimeMillis();
Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(
query.queryHashes,
urlselection,
true,
true,
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));
if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
long remaintime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION) - System.currentTimeMillis() + start;
Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0) || (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(
query.excludeHashes,
urlselection,
true,
true,
remaintime);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_COLLECTION);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_COLLECTION, inclusionContainers.size());
return new Map[]{inclusionContainers, exclusionContainers};
}
public indexContainer localSearchJoinExclude(Collection includeContainers, Collection excludeContainers) {
// join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known
if (includeContainers == null) return wordIndex.emptyContainer(null);
// join the result
profileLocal.startTimer();
long start = System.currentTimeMillis();
indexContainer rcLocal = indexContainer.joinContainers(includeContainers,
(query.queryHashes.size() == 0) ? 0 :
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
long remaining = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN) - System.currentTimeMillis() + start;
if ((rcLocal != null) && (remaining > 0)) {
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
}
if (rcLocal == null) rcLocal = wordIndex.emptyContainer(null);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_JOIN);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_JOIN, rcLocal.size());
return rcLocal;
}
public plasmaSearchPostOrder orderFinal(indexContainer rcLocal) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
assert (rcLocal != null);
indexContainer searchResult = wordIndex.emptyContainer(null);
long preorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.startTimer();
long pst = System.currentTimeMillis();
searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcContainers);
searchResult.sort();
searchResult.uniq(1000);
preorderTime = preorderTime - (System.currentTimeMillis() - pst);
if (preorderTime < 0) preorderTime = 200;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime);
if (searchResult.size() > query.wantedResults) preorder.remove(true, true);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_PRESORT, rcLocal.size());
// start url-fetch
long postorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
//System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);
profileLocal.startTimer();
plasmaSearchPostOrder acc = new plasmaSearchPostOrder(query, ranking);
indexRWIEntry entry;
indexURLEntry page;
Long preranking;
Object[] preorderEntry;
indexURLEntry.Components comp;
String pagetitle, pageurl, pageauthor;
int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT);
try {
ordering: while (preorder.hasNext()) {
if ((System.currentTimeMillis() >= postorderLimitTime) && (acc.sizeFetched() >= minEntries)) break;
preorderEntry = preorder.next();
entry = (indexRWIEntry) preorderEntry[0];
// load only urls if there was not yet a root url of that hash
preranking = (Long) preorderEntry[1];
// find the url entry
page = urlStore.load(entry.urlHash(), entry);
if (page != null) {
comp = page.comp();
pagetitle = comp.title().toLowerCase();
if (comp.url() == null) continue ordering; // rare case where the url is corrupted
pageurl = comp.url().toString().toLowerCase();
pageauthor = comp.author().toLowerCase();
// check exclusion
if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) continue ordering;
if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) continue ordering;
if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) continue ordering;
// check url mask
if (!(pageurl.matches(query.urlMask))) continue ordering;
// check constraints
if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
(!(comp.title().startsWith("Index of")))) {
log.logFine("filtered out " + comp.url().toString());
// filter out bad results
Iterator wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);
} else {
acc.addPage(page, preranking);
}
}
}
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
}
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_URLFETCH);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_URLFETCH, acc.sizeFetched());
// start postsorting
profileLocal.startTimer();
acc.sortPages(postsort);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_POSTSORT, acc.sizeOrdered());
// apply filter
profileLocal.startTimer();
acc.removeRedundant();
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_FILTER);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_FILTER, acc.sizeOrdered());
acc.localContributions = (rcLocal == null) ? 0 : rcLocal.size();
acc.filteredResults = preorder.filteredCount();
return acc;
}
private void prefetchLocal(indexContainer rcLocal, long timeout) {
// pre-fetch some urls to fill LURL ram cache

@ -0,0 +1,435 @@
// plasmaSearchProcess.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 17.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.plasma;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
/**
*
* This class provides search processes and keeps a timing record of the processes
* It shall be used to initiate a search and also to evaluate
* the real obtained timings after a search is performed
*/
public class plasmaSearchProcessing implements Cloneable {
// collection:
// time = time to get a RWI out of RAM cache, assortments and WORDS files
// count = maximum number of RWI-entries that shall be collected
// join
// time = time to perform the join between all collected RWIs
// count = maximum number of entries that shall be joined
// presort:
// time = time to do a sort of the joined URL-records
// count = maximum number of entries that shall be pre-sorted
// urlfetch:
// time = time to fetch the real URLs from the LURL database
// count = maximum number of urls that shall be fetched
// postsort:
// time = time for final sort of URLs
// count = maximum number oof URLs that shall be retrieved during sort
// snippetfetch:
// time = time to fetch snippets for selected URLs
// count = maximum number of snipptes to be fetched
public static final char PROCESS_COLLECTION = 'c';
public static final char PROCESS_JOIN = 'j';
public static final char PROCESS_PRESORT = 'r';
public static final char PROCESS_URLFETCH = 'u';
public static final char PROCESS_POSTSORT = 'o';
public static final char PROCESS_FILTER = 'f';
public static final char PROCESS_SNIPPETFETCH = 's';
private static final long minimumTargetTime = 100;
public static char[] sequence = new char[]{
PROCESS_COLLECTION,
PROCESS_JOIN,
PROCESS_PRESORT,
PROCESS_URLFETCH,
PROCESS_POSTSORT,
PROCESS_FILTER,
PROCESS_SNIPPETFETCH
};
private HashMap targetTime;
private HashMap targetCount;
private HashMap yieldTime;
private HashMap yieldCount;
private long timer;
private plasmaSearchProcessing() {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
timer = 0;
}
public plasmaSearchProcessing(long time, int count) {
this(
3 * time / 12, 10 * count,
1 * time / 12, 10 * count,
1 * time / 12, 10 * count,
2 * time / 12, 5 * count,
3 * time / 12, count,
1 * time / 12, count,
1 * time / 12, 1
);
}
public plasmaSearchProcessing(
long time_collection, int count_collection,
long time_join, int count_join,
long time_presort, int count_presort,
long time_urlfetch, int count_urlfetch,
long time_postsort, int count_postsort,
long time_filter, int count_filter,
long time_snippetfetch, int count_snippetfetch) {
this();
targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
}
public Object clone() {
plasmaSearchProcessing p = new plasmaSearchProcessing();
p.targetTime = (HashMap) this.targetTime.clone();
p.targetCount = (HashMap) this.targetCount.clone();
p.yieldTime = (HashMap) this.yieldTime.clone();
p.yieldCount = (HashMap) this.yieldCount.clone();
return p;
}
public plasmaSearchProcessing(String s) {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
intoMap(s, targetTime, targetCount);
}
public long duetime() {
// returns the old duetime value as sum of all waiting times
long d = 0;
for (int i = 0; i < sequence.length; i++) {
d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
}
return d;
}
public void putYield(String s) {
intoMap(s, yieldTime, yieldCount);
}
public String yieldToString() {
return toString(yieldTime, yieldCount);
}
public String targetToString() {
return toString(targetTime, targetCount);
}
public long getTargetTime(char type) {
// sum up all time that was demanded and subtract all that had been wasted
long sum = 0;
Long t;
Character element;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
t = (Long) targetTime.get(element);
if (t != null) sum += t.longValue();
if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;
t = (Long) yieldTime.get(element);
if (t != null) sum -= t.longValue();
}
return minimumTargetTime;
}
public int getTargetCount(char type) {
Integer i = (Integer) targetCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public long getYieldTime(char type) {
Long l = (Long) yieldTime.get(new Character(type));
if (l == null) return -1; else return l.longValue();
}
public int getYieldCount(char type) {
Integer i = (Integer) yieldCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public void startTimer() {
this.timer = System.currentTimeMillis();
}
public void setYieldTime(char type) {
// sets a time that is computed using the timer
long t = System.currentTimeMillis() - this.timer;
yieldTime.put(new Character(type), new Long(t));
}
public void setYieldCount(char type, int count) {
yieldCount.put(new Character(type), new Integer(count));
}
public String reportToString() {
return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
}
public static String toString(HashMap time, HashMap count) {
// put this into a format in such a way that it can be send in a http header or post argument
// that means that no '=' or spaces are allowed
StringBuffer sb = new StringBuffer(sequence.length * 10);
Character element;
Integer xi;
Long xl;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
sb.append("t");
sb.append(element);
xl = (Long) time.get(element);
sb.append((xl == null) ? "0" : xl.toString());
sb.append("|");
sb.append("c");
sb.append(element);
xi = (Integer) count.get(element);
sb.append((xi == null) ? "0" : xi.toString());
sb.append("|");
}
return sb.toString();
}
public static void intoMap(String s, HashMap time, HashMap count) {
// this is the reverse method to toString
int p = 0;
char ct;
String elt;
String v;
int p1;
while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
ct = s.charAt(p);
elt = s.substring(p + 1, p + 2);
v = s.substring(p + 2, p1);
if (ct == 't') {
time.put(elt, new Long(Long.parseLong(v)));
} else {
count.put(elt, new Integer(Integer.parseInt(v)));
}
}
}
// the processes
public Map[] localSearchContainers(
plasmaSearchQuery query,
plasmaWordIndex wordIndex,
Set urlselection) {
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
// retrieve entities that belong to the hashes
startTimer();
long start = System.currentTimeMillis();
Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(
query.queryHashes,
urlselection,
true,
true,
getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));
if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
long remaintime = getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) - System.currentTimeMillis() + start;
Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0) || (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(
query.excludeHashes,
urlselection,
true,
true,
remaintime);
setYieldTime(plasmaSearchProcessing.PROCESS_COLLECTION);
setYieldCount(plasmaSearchProcessing.PROCESS_COLLECTION, inclusionContainers.size());
return new Map[]{inclusionContainers, exclusionContainers};
}
public indexContainer localSearchJoinExclude(
Collection includeContainers,
Collection excludeContainers,
long time, int maxDistance) {
// join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null);
// join the result
startTimer();
long start = System.currentTimeMillis();
indexContainer rcLocal = indexContainer.joinContainers(includeContainers, time, maxDistance);
long remaining = getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) - System.currentTimeMillis() + start;
if ((rcLocal != null) && (remaining > 0)) {
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
}
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null);
setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);
setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());
return rcLocal;
}
public plasmaSearchPostOrder orderFinal(
plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaWordIndex wordIndex,
boolean postsort,
indexContainer resultIndex) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
assert (resultIndex != null);
long preorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_PRESORT);
startTimer();
long pst = System.currentTimeMillis();
resultIndex.sort();
resultIndex.uniq(1000);
preorderTime = preorderTime - (System.currentTimeMillis() - pst);
if (preorderTime < 0) preorderTime = 200;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, resultIndex, preorderTime);
if (resultIndex.size() > query.wantedResults) preorder.remove(true, true);
setYieldTime(plasmaSearchProcessing.PROCESS_PRESORT);
setYieldCount(plasmaSearchProcessing.PROCESS_PRESORT, resultIndex.size());
// start url-fetch
long postorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_POSTSORT);
//System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);
startTimer();
plasmaSearchPostOrder acc = new plasmaSearchPostOrder(query, ranking);
indexRWIEntry entry;
indexURLEntry page;
Long preranking;
Object[] preorderEntry;
indexURLEntry.Components comp;
String pagetitle, pageurl, pageauthor;
int minEntries = getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);
try {
ordering: while (preorder.hasNext()) {
if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= minEntries)) break;
preorderEntry = preorder.next();
entry = (indexRWIEntry) preorderEntry[0];
// load only urls if there was not yet a root url of that hash
preranking = (Long) preorderEntry[1];
// find the url entry
page = wordIndex.loadedURL.load(entry.urlHash(), entry);
if (page != null) {
comp = page.comp();
pagetitle = comp.title().toLowerCase();
if (comp.url() == null) continue ordering; // rare case where the url is corrupted
pageurl = comp.url().toString().toLowerCase();
pageauthor = comp.author().toLowerCase();
// check exclusion
if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) continue ordering;
if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) continue ordering;
if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) continue ordering;
// check url mask
if (!(pageurl.matches(query.urlMask))) continue ordering;
// check constraints
if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
(!(comp.title().startsWith("Index of")))) {
serverLog.logFine("PLASMA", "filtered out " + comp.url().toString());
// filter out bad results
Iterator wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);
} else {
acc.addPage(page, preranking);
}
}
}
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
}
setYieldTime(plasmaSearchProcessing.PROCESS_URLFETCH);
setYieldCount(plasmaSearchProcessing.PROCESS_URLFETCH, acc.sizeFetched());
// start postsorting
startTimer();
acc.sortPages(postsort);
setYieldTime(plasmaSearchProcessing.PROCESS_POSTSORT);
setYieldCount(plasmaSearchProcessing.PROCESS_POSTSORT, acc.sizeOrdered());
// apply filter
startTimer();
acc.removeRedundant();
setYieldTime(plasmaSearchProcessing.PROCESS_FILTER);
setYieldCount(plasmaSearchProcessing.PROCESS_FILTER, acc.sizeOrdered());
acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();
acc.filteredResults = preorder.filteredCount();
return acc;
}
}

@ -1,282 +0,0 @@
// plasmaSearchProfile.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// Created: 17.10.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.util.HashMap;
/**
*
* This class provides timing properties for search processes
* It shall be used to initiate a search and also to evaluate
* the real obtained timings after a search is performed
*/
public class plasmaSearchTimingProfile implements Cloneable {
// collection:
// time = time to get a RWI out of RAM cache, assortments and WORDS files
// count = maximum number of RWI-entries that shall be collected
// join
// time = time to perform the join between all collected RWIs
// count = maximum number of entries that shall be joined
// presort:
// time = time to do a sort of the joined URL-records
// count = maximum number of entries that shall be pre-sorted
// urlfetch:
// time = time to fetch the real URLs from the LURL database
// count = maximum number of urls that shall be fetched
// postsort:
// time = time for final sort of URLs
// count = maximum number oof URLs that shall be retrieved during sort
// snippetfetch:
// time = time to fetch snippets for selected URLs
// count = maximum number of snipptes to be fetched
public static final char PROCESS_COLLECTION = 'c';
public static final char PROCESS_JOIN = 'j';
public static final char PROCESS_PRESORT = 'r';
public static final char PROCESS_URLFETCH = 'u';
public static final char PROCESS_POSTSORT = 'o';
public static final char PROCESS_FILTER = 'f';
public static final char PROCESS_SNIPPETFETCH = 's';
private static final long minimumTargetTime = 100;
public static char[] sequence = new char[]{
PROCESS_COLLECTION,
PROCESS_JOIN,
PROCESS_PRESORT,
PROCESS_URLFETCH,
PROCESS_POSTSORT,
PROCESS_FILTER,
PROCESS_SNIPPETFETCH
};
private HashMap targetTime;
private HashMap targetCount;
private HashMap yieldTime;
private HashMap yieldCount;
private long timer;
private plasmaSearchTimingProfile() {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
timer = 0;
}
public plasmaSearchTimingProfile(long time, int count) {
this(
3 * time / 12, 10 * count,
1 * time / 12, 10 * count,
1 * time / 12, 10 * count,
2 * time / 12, 5 * count,
3 * time / 12, count,
1 * time / 12, count,
1 * time / 12, 1
);
}
public plasmaSearchTimingProfile(
long time_collection, int count_collection,
long time_join, int count_join,
long time_presort, int count_presort,
long time_urlfetch, int count_urlfetch,
long time_postsort, int count_postsort,
long time_filter, int count_filter,
long time_snippetfetch, int count_snippetfetch) {
this();
targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
}
public Object clone() {
plasmaSearchTimingProfile p = new plasmaSearchTimingProfile();
p.targetTime = (HashMap) this.targetTime.clone();
p.targetCount = (HashMap) this.targetCount.clone();
p.yieldTime = (HashMap) this.yieldTime.clone();
p.yieldCount = (HashMap) this.yieldCount.clone();
return p;
}
public plasmaSearchTimingProfile(String s) {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
intoMap(s, targetTime, targetCount);
}
public long duetime() {
// returns the old duetime value as sum of all waiting times
long d = 0;
for (int i = 0; i < sequence.length; i++) {
d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
}
return d;
}
public void putYield(String s) {
intoMap(s, yieldTime, yieldCount);
}
public String yieldToString() {
return toString(yieldTime, yieldCount);
}
public String targetToString() {
return toString(targetTime, targetCount);
}
public long getTargetTime(char type) {
// sum up all time that was demanded and subtract all that had been wasted
long sum = 0;
Long t;
Character element;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
t = (Long) targetTime.get(element);
if (t != null) sum += t.longValue();
if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;
t = (Long) yieldTime.get(element);
if (t != null) sum -= t.longValue();
}
return minimumTargetTime;
}
public int getTargetCount(char type) {
Integer i = (Integer) targetCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public long getYieldTime(char type) {
Long l = (Long) yieldTime.get(new Character(type));
if (l == null) return -1; else return l.longValue();
}
public int getYieldCount(char type) {
Integer i = (Integer) yieldCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public void startTimer() {
this.timer = System.currentTimeMillis();
}
public void setYieldTime(char type) {
// sets a time that is computed using the timer
long t = System.currentTimeMillis() - this.timer;
yieldTime.put(new Character(type), new Long(t));
}
public void setYieldCount(char type, int count) {
yieldCount.put(new Character(type), new Integer(count));
}
public String reportToString() {
return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
}
public static String toString(HashMap time, HashMap count) {
// put this into a format in such a way that it can be send in a http header or post argument
// that means that no '=' or spaces are allowed
StringBuffer sb = new StringBuffer(sequence.length * 10);
Character element;
Integer xi;
Long xl;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
sb.append("t");
sb.append(element);
xl = (Long) time.get(element);
sb.append((xl == null) ? "0" : xl.toString());
sb.append("|");
sb.append("c");
sb.append(element);
xi = (Integer) count.get(element);
sb.append((xi == null) ? "0" : xi.toString());
sb.append("|");
}
return sb.toString();
}
public static void intoMap(String s, HashMap time, HashMap count) {
// this is the reverse method to toString
int p = 0;
char ct;
String elt;
String v;
int p1;
while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
ct = s.charAt(p);
elt = s.substring(p + 1, p + 2);
v = s.substring(p + 2, p1);
if (ct == 't') {
time.put(elt, new Long(Long.parseLong(v)));
} else {
count.put(elt, new Integer(Integer.parseInt(v)));
}
}
}
}

@ -2628,7 +2628,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
ioLinks[1].intValue(),
condenser.RESULT_FLAGS
);
indexContainer wordIdxContainer = wordIndex.emptyContainer(wordHash);
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash);
wordIdxContainer.add(wordIdxEntry);
tmpContainers.add(wordIdxContainer);
}
@ -2894,8 +2894,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaSearchResults searchFromLocal(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming,
plasmaSearchProcessing localTiming,
plasmaSearchProcessing remoteTiming,
boolean postsort,
String client) {
@ -2924,7 +2924,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//}
// create a new search event
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, postsort, log, wordIndex, wordIndex.loadedURL, snippetCache, (isRobinsonMode()) ? this.clusterhashes : null);
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, postsort, log, wordIndex, snippetCache, (isRobinsonMode()) ? this.clusterhashes : null);
plasmaSearchPostOrder acc = theSearch.search();
// fetch snippets

@ -162,7 +162,7 @@ public final class plasmaWordIndex implements indexRI {
return entries.updated();
}
public indexContainer emptyContainer(String wordHash) {
public static indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, indexRWIEntry.urlEntryRow);
}

@ -34,7 +34,6 @@ import java.util.HashSet;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class crawlHandler extends DefaultHandler {
@ -110,7 +109,7 @@ public class crawlHandler extends DefaultHandler {
}
}
public void startElement(String uri, String name, String tag, Attributes atts) throws SAXException {
public void startElement(String uri, String name, String tag, Attributes atts) {
if ("channel".equals(tag)) {
channel = new Startpoint();
parsingAttributes = true;

@ -63,7 +63,7 @@ import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
@ -350,7 +350,7 @@ public final class yacyClient {
Map abstractCache,
plasmaURLPattern blacklist,
plasmaSnippetCache snippets,
plasmaSearchTimingProfile timingProfile,
plasmaSearchProcessing timingProfile,
plasmaSearchRankingProfile rankingProfile,
kelondroBitfield constraint
) {
@ -375,7 +375,7 @@ public final class yacyClient {
final String salt = yacyNetwork.enrichRequestPost(post, plasmaSwitchboard.getSwitchboard(), target.hash);
long duetime = timingProfile.duetime();
post.putASIS("myseed", yacyCore.seedDB.mySeed.genSeedStr(salt));
post.put("count", timingProfile.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT));
post.put("count", timingProfile.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT));
post.putASIS("resource", ((global) ? "global" : "local"));
post.put("partitions", partitions);
post.putASIS("query", wordhashes);
@ -452,7 +452,7 @@ public final class yacyClient {
final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
container[i] = wordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength));
container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength));
}
// insert results to containers

@ -57,7 +57,7 @@ import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
@ -77,7 +77,7 @@ public class yacySearch extends Thread {
final private yacySeed targetPeer;
private String[] urls;
private int maxDistance;
final private plasmaSearchTimingProfile timingProfile;
final private plasmaSearchProcessing timingProfile;
final private plasmaSearchRankingProfile rankingProfile;
final private String prefer, filter;
final private kelondroBitfield constraint;
@ -86,7 +86,7 @@ public class yacySearch extends Thread {
boolean global, int partitions, yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache,
plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile,
plasmaSearchProcessing timingProfile, plasmaSearchRankingProfile rankingProfile,
kelondroBitfield constraint) {
super("yacySearch_" + targetPeer.getName());
//System.out.println("DEBUG - yacySearch thread " + this.getName() + " initialized " + ((urlhashes.length() == 0) ? "(primary)" : "(secondary)"));
@ -106,7 +106,7 @@ public class yacySearch extends Thread {
this.targetPeer = targetPeer;
this.urls = null;
this.maxDistance = maxDistance;
this.timingProfile = (plasmaSearchTimingProfile) timingProfile.clone();
this.timingProfile = (plasmaSearchProcessing) timingProfile.clone();
this.rankingProfile = rankingProfile;
this.constraint = constraint;
}
@ -138,7 +138,7 @@ public class yacySearch extends Thread {
return this.urls.length;
}
public plasmaSearchTimingProfile timingProfile() {
public plasmaSearchProcessing timingProfile() {
return this.timingProfile;
}
@ -253,7 +253,7 @@ public class yacySearch extends Thread {
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache,
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile,
plasmaSearchProcessing timingProfile, plasmaSearchRankingProfile rankingProfile,
kelondroBitfield constraint, TreeMap clusterselection) {
// check own peer status
if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getPublicAddress() == null) { return null; }
@ -277,7 +277,7 @@ public class yacySearch extends Thread {
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache,
String targethash, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile,
plasmaSearchProcessing timingProfile, plasmaSearchRankingProfile rankingProfile,
kelondroBitfield constraint, TreeMap clusterselection) {
// check own peer status
if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getPublicAddress() == null) { return null; }

Loading…
Cancel
Save