git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
5dee7e9c29
commit
5605887571
@ -0,0 +1,435 @@
|
||||
// plasmaSearchProcess.java
|
||||
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 17.10.2005 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.plasma;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import de.anomic.index.indexContainer;
|
||||
import de.anomic.index.indexRWIEntry;
|
||||
import de.anomic.index.indexURLEntry;
|
||||
import de.anomic.kelondro.kelondroException;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
|
||||
/**
|
||||
*
|
||||
* This class provides search processes and keeps a timing record of the processes
|
||||
* It shall be used to initiate a search and also to evaluate
|
||||
* the real obtained timings after a search is performed
|
||||
*/
|
||||
|
||||
public class plasmaSearchProcessing implements Cloneable {
|
||||
|
||||
// collection:
|
||||
// time = time to get a RWI out of RAM cache, assortments and WORDS files
|
||||
// count = maximum number of RWI-entries that shall be collected
|
||||
|
||||
// join
|
||||
// time = time to perform the join between all collected RWIs
|
||||
// count = maximum number of entries that shall be joined
|
||||
|
||||
// presort:
|
||||
// time = time to do a sort of the joined URL-records
|
||||
// count = maximum number of entries that shall be pre-sorted
|
||||
|
||||
// urlfetch:
|
||||
// time = time to fetch the real URLs from the LURL database
|
||||
// count = maximum number of urls that shall be fetched
|
||||
|
||||
// postsort:
|
||||
// time = time for final sort of URLs
|
||||
// count = maximum number oof URLs that shall be retrieved during sort
|
||||
|
||||
// snippetfetch:
|
||||
// time = time to fetch snippets for selected URLs
|
||||
// count = maximum number of snipptes to be fetched
|
||||
|
||||
public static final char PROCESS_COLLECTION = 'c';
|
||||
public static final char PROCESS_JOIN = 'j';
|
||||
public static final char PROCESS_PRESORT = 'r';
|
||||
public static final char PROCESS_URLFETCH = 'u';
|
||||
public static final char PROCESS_POSTSORT = 'o';
|
||||
public static final char PROCESS_FILTER = 'f';
|
||||
public static final char PROCESS_SNIPPETFETCH = 's';
|
||||
|
||||
private static final long minimumTargetTime = 100;
|
||||
|
||||
public static char[] sequence = new char[]{
|
||||
PROCESS_COLLECTION,
|
||||
PROCESS_JOIN,
|
||||
PROCESS_PRESORT,
|
||||
PROCESS_URLFETCH,
|
||||
PROCESS_POSTSORT,
|
||||
PROCESS_FILTER,
|
||||
PROCESS_SNIPPETFETCH
|
||||
};
|
||||
|
||||
private HashMap targetTime;
|
||||
private HashMap targetCount;
|
||||
private HashMap yieldTime;
|
||||
private HashMap yieldCount;
|
||||
private long timer;
|
||||
|
||||
private plasmaSearchProcessing() {
|
||||
targetTime = new HashMap();
|
||||
targetCount = new HashMap();
|
||||
yieldTime = new HashMap();
|
||||
yieldCount = new HashMap();
|
||||
timer = 0;
|
||||
}
|
||||
|
||||
public plasmaSearchProcessing(long time, int count) {
|
||||
this(
|
||||
3 * time / 12, 10 * count,
|
||||
1 * time / 12, 10 * count,
|
||||
1 * time / 12, 10 * count,
|
||||
2 * time / 12, 5 * count,
|
||||
3 * time / 12, count,
|
||||
1 * time / 12, count,
|
||||
1 * time / 12, 1
|
||||
);
|
||||
}
|
||||
|
||||
public plasmaSearchProcessing(
|
||||
long time_collection, int count_collection,
|
||||
long time_join, int count_join,
|
||||
long time_presort, int count_presort,
|
||||
long time_urlfetch, int count_urlfetch,
|
||||
long time_postsort, int count_postsort,
|
||||
long time_filter, int count_filter,
|
||||
long time_snippetfetch, int count_snippetfetch) {
|
||||
this();
|
||||
|
||||
targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
|
||||
targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
|
||||
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
|
||||
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
|
||||
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
|
||||
targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
|
||||
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
|
||||
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
|
||||
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
|
||||
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
|
||||
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
|
||||
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
|
||||
targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
|
||||
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
|
||||
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
plasmaSearchProcessing p = new plasmaSearchProcessing();
|
||||
p.targetTime = (HashMap) this.targetTime.clone();
|
||||
p.targetCount = (HashMap) this.targetCount.clone();
|
||||
p.yieldTime = (HashMap) this.yieldTime.clone();
|
||||
p.yieldCount = (HashMap) this.yieldCount.clone();
|
||||
return p;
|
||||
}
|
||||
|
||||
public plasmaSearchProcessing(String s) {
|
||||
targetTime = new HashMap();
|
||||
targetCount = new HashMap();
|
||||
yieldTime = new HashMap();
|
||||
yieldCount = new HashMap();
|
||||
|
||||
intoMap(s, targetTime, targetCount);
|
||||
}
|
||||
|
||||
public long duetime() {
|
||||
// returns the old duetime value as sum of all waiting times
|
||||
long d = 0;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public void putYield(String s) {
|
||||
intoMap(s, yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public String yieldToString() {
|
||||
return toString(yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public String targetToString() {
|
||||
return toString(targetTime, targetCount);
|
||||
}
|
||||
|
||||
public long getTargetTime(char type) {
|
||||
// sum up all time that was demanded and subtract all that had been wasted
|
||||
long sum = 0;
|
||||
Long t;
|
||||
Character element;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
element = new Character(sequence[i]);
|
||||
t = (Long) targetTime.get(element);
|
||||
if (t != null) sum += t.longValue();
|
||||
if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;
|
||||
t = (Long) yieldTime.get(element);
|
||||
if (t != null) sum -= t.longValue();
|
||||
}
|
||||
return minimumTargetTime;
|
||||
}
|
||||
|
||||
public int getTargetCount(char type) {
|
||||
Integer i = (Integer) targetCount.get(new Character(type));
|
||||
if (i == null) return -1; else return i.intValue();
|
||||
}
|
||||
|
||||
public long getYieldTime(char type) {
|
||||
Long l = (Long) yieldTime.get(new Character(type));
|
||||
if (l == null) return -1; else return l.longValue();
|
||||
}
|
||||
|
||||
public int getYieldCount(char type) {
|
||||
Integer i = (Integer) yieldCount.get(new Character(type));
|
||||
if (i == null) return -1; else return i.intValue();
|
||||
}
|
||||
|
||||
public void startTimer() {
|
||||
this.timer = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void setYieldTime(char type) {
|
||||
// sets a time that is computed using the timer
|
||||
long t = System.currentTimeMillis() - this.timer;
|
||||
yieldTime.put(new Character(type), new Long(t));
|
||||
}
|
||||
|
||||
public void setYieldCount(char type, int count) {
|
||||
yieldCount.put(new Character(type), new Integer(count));
|
||||
}
|
||||
|
||||
public String reportToString() {
|
||||
return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public static String toString(HashMap time, HashMap count) {
|
||||
// put this into a format in such a way that it can be send in a http header or post argument
|
||||
// that means that no '=' or spaces are allowed
|
||||
StringBuffer sb = new StringBuffer(sequence.length * 10);
|
||||
Character element;
|
||||
Integer xi;
|
||||
Long xl;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
element = new Character(sequence[i]);
|
||||
sb.append("t");
|
||||
sb.append(element);
|
||||
xl = (Long) time.get(element);
|
||||
sb.append((xl == null) ? "0" : xl.toString());
|
||||
sb.append("|");
|
||||
sb.append("c");
|
||||
sb.append(element);
|
||||
xi = (Integer) count.get(element);
|
||||
sb.append((xi == null) ? "0" : xi.toString());
|
||||
sb.append("|");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static void intoMap(String s, HashMap time, HashMap count) {
|
||||
// this is the reverse method to toString
|
||||
int p = 0;
|
||||
char ct;
|
||||
String elt;
|
||||
String v;
|
||||
int p1;
|
||||
while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
|
||||
ct = s.charAt(p);
|
||||
elt = s.substring(p + 1, p + 2);
|
||||
v = s.substring(p + 2, p1);
|
||||
if (ct == 't') {
|
||||
time.put(elt, new Long(Long.parseLong(v)));
|
||||
} else {
|
||||
count.put(elt, new Integer(Integer.parseInt(v)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the processes
|
||||
|
||||
public Map[] localSearchContainers(
|
||||
plasmaSearchQuery query,
|
||||
plasmaWordIndex wordIndex,
|
||||
Set urlselection) {
|
||||
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
|
||||
|
||||
// retrieve entities that belong to the hashes
|
||||
startTimer();
|
||||
long start = System.currentTimeMillis();
|
||||
Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(
|
||||
query.queryHashes,
|
||||
urlselection,
|
||||
true,
|
||||
true,
|
||||
getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));
|
||||
if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
|
||||
long remaintime = getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) - System.currentTimeMillis() + start;
|
||||
Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0) || (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(
|
||||
query.excludeHashes,
|
||||
urlselection,
|
||||
true,
|
||||
true,
|
||||
remaintime);
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_COLLECTION);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_COLLECTION, inclusionContainers.size());
|
||||
|
||||
return new Map[]{inclusionContainers, exclusionContainers};
|
||||
}
|
||||
|
||||
public indexContainer localSearchJoinExclude(
|
||||
Collection includeContainers,
|
||||
Collection excludeContainers,
|
||||
long time, int maxDistance) {
|
||||
// join a search result and return the joincount (number of pages after join)
|
||||
|
||||
// since this is a conjunction we return an empty entity if any word is not known
|
||||
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null);
|
||||
|
||||
// join the result
|
||||
startTimer();
|
||||
long start = System.currentTimeMillis();
|
||||
indexContainer rcLocal = indexContainer.joinContainers(includeContainers, time, maxDistance);
|
||||
long remaining = getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) - System.currentTimeMillis() + start;
|
||||
if ((rcLocal != null) && (remaining > 0)) {
|
||||
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
|
||||
}
|
||||
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null);
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());
|
||||
|
||||
return rcLocal;
|
||||
}
|
||||
|
||||
public plasmaSearchPostOrder orderFinal(
|
||||
plasmaSearchQuery query,
|
||||
plasmaSearchRankingProfile ranking,
|
||||
plasmaWordIndex wordIndex,
|
||||
boolean postsort,
|
||||
indexContainer resultIndex) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||
|
||||
assert (resultIndex != null);
|
||||
|
||||
long preorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_PRESORT);
|
||||
|
||||
startTimer();
|
||||
long pst = System.currentTimeMillis();
|
||||
resultIndex.sort();
|
||||
resultIndex.uniq(1000);
|
||||
preorderTime = preorderTime - (System.currentTimeMillis() - pst);
|
||||
if (preorderTime < 0) preorderTime = 200;
|
||||
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, resultIndex, preorderTime);
|
||||
if (resultIndex.size() > query.wantedResults) preorder.remove(true, true);
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_PRESORT);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_PRESORT, resultIndex.size());
|
||||
|
||||
// start url-fetch
|
||||
long postorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_POSTSORT);
|
||||
//System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
|
||||
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);
|
||||
startTimer();
|
||||
plasmaSearchPostOrder acc = new plasmaSearchPostOrder(query, ranking);
|
||||
|
||||
indexRWIEntry entry;
|
||||
indexURLEntry page;
|
||||
Long preranking;
|
||||
Object[] preorderEntry;
|
||||
indexURLEntry.Components comp;
|
||||
String pagetitle, pageurl, pageauthor;
|
||||
int minEntries = getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);
|
||||
try {
|
||||
ordering: while (preorder.hasNext()) {
|
||||
if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= minEntries)) break;
|
||||
preorderEntry = preorder.next();
|
||||
entry = (indexRWIEntry) preorderEntry[0];
|
||||
// load only urls if there was not yet a root url of that hash
|
||||
preranking = (Long) preorderEntry[1];
|
||||
// find the url entry
|
||||
page = wordIndex.loadedURL.load(entry.urlHash(), entry);
|
||||
if (page != null) {
|
||||
comp = page.comp();
|
||||
pagetitle = comp.title().toLowerCase();
|
||||
if (comp.url() == null) continue ordering; // rare case where the url is corrupted
|
||||
pageurl = comp.url().toString().toLowerCase();
|
||||
pageauthor = comp.author().toLowerCase();
|
||||
|
||||
// check exclusion
|
||||
if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) continue ordering;
|
||||
if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) continue ordering;
|
||||
if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) continue ordering;
|
||||
|
||||
// check url mask
|
||||
if (!(pageurl.matches(query.urlMask))) continue ordering;
|
||||
|
||||
// check constraints
|
||||
if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&
|
||||
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
|
||||
(!(comp.title().startsWith("Index of")))) {
|
||||
serverLog.logFine("PLASMA", "filtered out " + comp.url().toString());
|
||||
// filter out bad results
|
||||
Iterator wi = query.queryHashes.iterator();
|
||||
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
|
||||
} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
|
||||
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);
|
||||
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);
|
||||
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);
|
||||
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);
|
||||
} else {
|
||||
acc.addPage(page, preranking);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (kelondroException ee) {
|
||||
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
|
||||
}
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_URLFETCH);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_URLFETCH, acc.sizeFetched());
|
||||
|
||||
// start postsorting
|
||||
startTimer();
|
||||
acc.sortPages(postsort);
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_POSTSORT);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_POSTSORT, acc.sizeOrdered());
|
||||
|
||||
// apply filter
|
||||
startTimer();
|
||||
acc.removeRedundant();
|
||||
setYieldTime(plasmaSearchProcessing.PROCESS_FILTER);
|
||||
setYieldCount(plasmaSearchProcessing.PROCESS_FILTER, acc.sizeOrdered());
|
||||
|
||||
acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();
|
||||
acc.filteredResults = preorder.filteredCount();
|
||||
return acc;
|
||||
}
|
||||
|
||||
}
|
@ -1,282 +0,0 @@
|
||||
// plasmaSearchProfile.java
|
||||
// -----------------------
|
||||
// part of YACY
|
||||
// (C) by Michael Peter Christen; mc@anomic.de
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2005
|
||||
// Created: 17.10.2005
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
//
|
||||
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||
// running) means that you agree that the Author(s) is (are) not responsible
|
||||
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||
// by usage of this softare or this documentation. The usage of this software
|
||||
// is on your own risk. The installation and usage (starting/running) of this
|
||||
// software may allow other people or application to access your computer and
|
||||
// any attached devices and is highly dependent on the configuration of the
|
||||
// software which must be done by the user of the software; the author(s) is
|
||||
// (are) also not responsible for proper configuration and usage of the
|
||||
// software, even if provoked by documentation provided together with
|
||||
// the software.
|
||||
//
|
||||
// Any changes to this file according to the GPL as documented in the file
|
||||
// gpl.txt aside this file in the shipment you received can be done to the
|
||||
// lines that follows this copyright notice here, but changes must not be
|
||||
// done inside the copyright notive above. A re-distribution must contain
|
||||
// the intact and unchanged copyright notice.
|
||||
// Contributions and changes to the program code must be marked as such.
|
||||
|
||||
package de.anomic.plasma;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
*
|
||||
* This class provides timing properties for search processes
|
||||
* It shall be used to initiate a search and also to evaluate
|
||||
* the real obtained timings after a search is performed
|
||||
*/
|
||||
|
||||
public class plasmaSearchTimingProfile implements Cloneable {
|
||||
|
||||
// collection:
|
||||
// time = time to get a RWI out of RAM cache, assortments and WORDS files
|
||||
// count = maximum number of RWI-entries that shall be collected
|
||||
|
||||
// join
|
||||
// time = time to perform the join between all collected RWIs
|
||||
// count = maximum number of entries that shall be joined
|
||||
|
||||
// presort:
|
||||
// time = time to do a sort of the joined URL-records
|
||||
// count = maximum number of entries that shall be pre-sorted
|
||||
|
||||
// urlfetch:
|
||||
// time = time to fetch the real URLs from the LURL database
|
||||
// count = maximum number of urls that shall be fetched
|
||||
|
||||
// postsort:
|
||||
// time = time for final sort of URLs
|
||||
// count = maximum number oof URLs that shall be retrieved during sort
|
||||
|
||||
// snippetfetch:
|
||||
// time = time to fetch snippets for selected URLs
|
||||
// count = maximum number of snipptes to be fetched
|
||||
|
||||
public static final char PROCESS_COLLECTION = 'c';
|
||||
public static final char PROCESS_JOIN = 'j';
|
||||
public static final char PROCESS_PRESORT = 'r';
|
||||
public static final char PROCESS_URLFETCH = 'u';
|
||||
public static final char PROCESS_POSTSORT = 'o';
|
||||
public static final char PROCESS_FILTER = 'f';
|
||||
public static final char PROCESS_SNIPPETFETCH = 's';
|
||||
|
||||
private static final long minimumTargetTime = 100;
|
||||
|
||||
public static char[] sequence = new char[]{
|
||||
PROCESS_COLLECTION,
|
||||
PROCESS_JOIN,
|
||||
PROCESS_PRESORT,
|
||||
PROCESS_URLFETCH,
|
||||
PROCESS_POSTSORT,
|
||||
PROCESS_FILTER,
|
||||
PROCESS_SNIPPETFETCH
|
||||
};
|
||||
|
||||
private HashMap targetTime;
|
||||
private HashMap targetCount;
|
||||
private HashMap yieldTime;
|
||||
private HashMap yieldCount;
|
||||
private long timer;
|
||||
|
||||
private plasmaSearchTimingProfile() {
|
||||
targetTime = new HashMap();
|
||||
targetCount = new HashMap();
|
||||
yieldTime = new HashMap();
|
||||
yieldCount = new HashMap();
|
||||
timer = 0;
|
||||
}
|
||||
|
||||
public plasmaSearchTimingProfile(long time, int count) {
|
||||
this(
|
||||
3 * time / 12, 10 * count,
|
||||
1 * time / 12, 10 * count,
|
||||
1 * time / 12, 10 * count,
|
||||
2 * time / 12, 5 * count,
|
||||
3 * time / 12, count,
|
||||
1 * time / 12, count,
|
||||
1 * time / 12, 1
|
||||
);
|
||||
}
|
||||
|
||||
public plasmaSearchTimingProfile(
|
||||
long time_collection, int count_collection,
|
||||
long time_join, int count_join,
|
||||
long time_presort, int count_presort,
|
||||
long time_urlfetch, int count_urlfetch,
|
||||
long time_postsort, int count_postsort,
|
||||
long time_filter, int count_filter,
|
||||
long time_snippetfetch, int count_snippetfetch) {
|
||||
this();
|
||||
|
||||
targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
|
||||
targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
|
||||
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
|
||||
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
|
||||
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
|
||||
targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
|
||||
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
|
||||
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
|
||||
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
|
||||
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
|
||||
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
|
||||
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
|
||||
targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
|
||||
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
|
||||
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
plasmaSearchTimingProfile p = new plasmaSearchTimingProfile();
|
||||
p.targetTime = (HashMap) this.targetTime.clone();
|
||||
p.targetCount = (HashMap) this.targetCount.clone();
|
||||
p.yieldTime = (HashMap) this.yieldTime.clone();
|
||||
p.yieldCount = (HashMap) this.yieldCount.clone();
|
||||
return p;
|
||||
}
|
||||
|
||||
public plasmaSearchTimingProfile(String s) {
|
||||
targetTime = new HashMap();
|
||||
targetCount = new HashMap();
|
||||
yieldTime = new HashMap();
|
||||
yieldCount = new HashMap();
|
||||
|
||||
intoMap(s, targetTime, targetCount);
|
||||
}
|
||||
|
||||
public long duetime() {
|
||||
// returns the old duetime value as sum of all waiting times
|
||||
long d = 0;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public void putYield(String s) {
|
||||
intoMap(s, yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public String yieldToString() {
|
||||
return toString(yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public String targetToString() {
|
||||
return toString(targetTime, targetCount);
|
||||
}
|
||||
|
||||
public long getTargetTime(char type) {
|
||||
// sum up all time that was demanded and subtract all that had been wasted
|
||||
long sum = 0;
|
||||
Long t;
|
||||
Character element;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
element = new Character(sequence[i]);
|
||||
t = (Long) targetTime.get(element);
|
||||
if (t != null) sum += t.longValue();
|
||||
if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;
|
||||
t = (Long) yieldTime.get(element);
|
||||
if (t != null) sum -= t.longValue();
|
||||
}
|
||||
return minimumTargetTime;
|
||||
}
|
||||
|
||||
public int getTargetCount(char type) {
|
||||
Integer i = (Integer) targetCount.get(new Character(type));
|
||||
if (i == null) return -1; else return i.intValue();
|
||||
}
|
||||
|
||||
public long getYieldTime(char type) {
|
||||
Long l = (Long) yieldTime.get(new Character(type));
|
||||
if (l == null) return -1; else return l.longValue();
|
||||
}
|
||||
|
||||
public int getYieldCount(char type) {
|
||||
Integer i = (Integer) yieldCount.get(new Character(type));
|
||||
if (i == null) return -1; else return i.intValue();
|
||||
}
|
||||
|
||||
public void startTimer() {
|
||||
this.timer = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void setYieldTime(char type) {
|
||||
// sets a time that is computed using the timer
|
||||
long t = System.currentTimeMillis() - this.timer;
|
||||
yieldTime.put(new Character(type), new Long(t));
|
||||
}
|
||||
|
||||
public void setYieldCount(char type, int count) {
|
||||
yieldCount.put(new Character(type), new Integer(count));
|
||||
}
|
||||
|
||||
public String reportToString() {
|
||||
return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
|
||||
}
|
||||
|
||||
public static String toString(HashMap time, HashMap count) {
|
||||
// put this into a format in such a way that it can be send in a http header or post argument
|
||||
// that means that no '=' or spaces are allowed
|
||||
StringBuffer sb = new StringBuffer(sequence.length * 10);
|
||||
Character element;
|
||||
Integer xi;
|
||||
Long xl;
|
||||
for (int i = 0; i < sequence.length; i++) {
|
||||
element = new Character(sequence[i]);
|
||||
sb.append("t");
|
||||
sb.append(element);
|
||||
xl = (Long) time.get(element);
|
||||
sb.append((xl == null) ? "0" : xl.toString());
|
||||
sb.append("|");
|
||||
sb.append("c");
|
||||
sb.append(element);
|
||||
xi = (Integer) count.get(element);
|
||||
sb.append((xi == null) ? "0" : xi.toString());
|
||||
sb.append("|");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static void intoMap(String s, HashMap time, HashMap count) {
|
||||
// this is the reverse method to toString
|
||||
int p = 0;
|
||||
char ct;
|
||||
String elt;
|
||||
String v;
|
||||
int p1;
|
||||
while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
|
||||
ct = s.charAt(p);
|
||||
elt = s.substring(p + 1, p + 2);
|
||||
v = s.substring(p + 2, p1);
|
||||
if (ct == 't') {
|
||||
time.put(elt, new Long(Long.parseLong(v)));
|
||||
} else {
|
||||
count.put(elt, new Integer(Integer.parseInt(v)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue