yacy_search_server/source/de/anomic/plasma/plasmaSearchProcessing.java

// plasmaSearchProcessing.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 17.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
// 
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

package de.anomic.plasma;

import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;

/**
 *
 * This class provides search processes and keeps a timing record of the processes
 * It shall be used to initiate a search and also to evaluate
 * the real obtained timings after a search is performed
 */

public class plasmaSearchProcessing implements Cloneable {
    
    // collection:
    // time = time to get a RWI out of RAM cache, assortments and WORDS files
    // count = maximum number of RWI-entries that shall be collected
    
    // join
    // time = time to perform the join between all collected RWIs
    // count = maximum number of entries that shall be joined
    
    // presort:
    // time = time to do a sort of the joined URL-records
    // count = maximum number of entries that shall be pre-sorted
    
    // urlfetch:
    // time = time to fetch the real URLs from the LURL database
    // count = maximum number of urls that shall be fetched
    
    // postsort:
    // time = time for final sort of URLs
    // count = maximum number oof URLs that shall be retrieved during sort
    
    // snippetfetch:
    // time = time to fetch snippets for selected URLs
    // count = maximum number of snipptes to be fetched
    
    public static final char PROCESS_COLLECTION   = 'c';
    public static final char PROCESS_JOIN         = 'j';
    public static final char PROCESS_PRESORT      = 'r';
    public static final char PROCESS_URLFETCH     = 'u';
    public static final char PROCESS_POSTSORT     = 'o';
    public static final char PROCESS_FILTER       = 'f';
    public static final char PROCESS_SNIPPETFETCH = 's';
    
    private static final long minimumTargetTime = 100;
    
    public static char[] sequence = new char[]{
        PROCESS_COLLECTION,
        PROCESS_JOIN,
        PROCESS_PRESORT,
        PROCESS_URLFETCH,
        PROCESS_POSTSORT,
        PROCESS_FILTER,
        PROCESS_SNIPPETFETCH
    };

    private HashMap targetTime;
    private HashMap targetCount;
    private HashMap yieldTime;
    private HashMap yieldCount;
    private long timer;
    
    private plasmaSearchProcessing() {
        targetTime = new HashMap();
        targetCount = new HashMap();
        yieldTime = new HashMap();
        yieldCount = new HashMap();
        timer = 0;
    }
    
    public plasmaSearchProcessing(long time, int count) {
        this(
          3 * time / 12, 10 * count, 
          1 * time / 12, 10 * count, 
          1 * time / 12, 10 * count, 
          2 * time / 12,  5 * count, 
          3 * time / 12, count,
          1 * time / 12, count, 
          1 * time / 12, 1
        );
    }
    
    public plasmaSearchProcessing(
            long time_collection,   int count_collection,
            long time_join,         int count_join,
            long time_presort,      int count_presort,
            long time_urlfetch,     int count_urlfetch,
            long time_postsort,     int count_postsort,
            long time_filter,       int count_filter,
            long time_snippetfetch, int count_snippetfetch) {
        this();
        
        targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
        targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
        targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
        targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
        targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
        targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
        targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
        targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
        targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
        targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
        targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
        targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
        targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
        targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
        
    }

    public Object clone() {
        plasmaSearchProcessing p = new plasmaSearchProcessing();
        p.targetTime = (HashMap) this.targetTime.clone();
        p.targetCount = (HashMap) this.targetCount.clone();
        p.yieldTime = (HashMap) this.yieldTime.clone();
        p.yieldCount = (HashMap) this.yieldCount.clone();
        return p;
    }
    
    public plasmaSearchProcessing(String s) {
        targetTime = new HashMap();
        targetCount = new HashMap();
        yieldTime = new HashMap();
        yieldCount = new HashMap();
        
        intoMap(s, targetTime, targetCount);
    }
    
    public long duetime() {
        // returns the old duetime value as sum of all waiting times
        long d = 0;
        for (int i = 0; i < sequence.length; i++) {
            d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
        }
        return d;
    }
    
    public void putYield(String s) {
        intoMap(s, yieldTime, yieldCount);
    }

    public String yieldToString() {
        return toString(yieldTime, yieldCount);
    }
    
    public String targetToString() {
        return toString(targetTime, targetCount);
    }
    
    public long getTargetTime(char type) {
        // sum up all time that was demanded and subtract all that had been wasted
        long sum = 0;
        Long t;
        Character element;
        for (int i = 0; i < sequence.length; i++) {
            element = new Character(sequence[i]);
            t = (Long) targetTime.get(element);
            if (t != null) sum += t.longValue();
            if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;
            t = (Long) yieldTime.get(element);
            if (t != null) sum -= t.longValue();
        }
        return minimumTargetTime;
    }
    
    public int getTargetCount(char type) {
        Integer i = (Integer) targetCount.get(new Character(type));
        if (i == null) return -1; else return i.intValue();
    }
    
    public long getYieldTime(char type) {
        Long l = (Long) yieldTime.get(new Character(type));
        if (l == null) return -1; else return l.longValue();
    }
    
    public int getYieldCount(char type) {
        Integer i = (Integer) yieldCount.get(new Character(type));
        if (i == null) return -1; else return i.intValue();
    }
    
    public void startTimer() {
        this.timer = System.currentTimeMillis();
    }
    
    public void setYieldTime(char type) {
        // sets a time that is computed using the timer
        long t = System.currentTimeMillis() - this.timer;
        yieldTime.put(new Character(type), new Long(t));
    }
    
    public void setYieldCount(char type, int count) {
        yieldCount.put(new Character(type), new Integer(count));
    }
    
    public String reportToString() {
        return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
    }
    
    public static String toString(HashMap time, HashMap count) {
        // put this into a format in such a way that it can be send in a http header or post argument
        // that means that no '=' or spaces are allowed
        StringBuffer sb = new StringBuffer(sequence.length * 10);
        Character element;
        Integer xi;
        Long xl;
        for (int i = 0; i < sequence.length; i++) {
            element = new Character(sequence[i]);
            sb.append("t");
            sb.append(element);
            xl = (Long) time.get(element);
            sb.append((xl == null) ? "0" : xl.toString());
            sb.append("|");
            sb.append("c");
            sb.append(element);
            xi = (Integer) count.get(element);
            sb.append((xi == null) ? "0" : xi.toString());
            sb.append("|");
        }
        return sb.toString();
    }
    
    public static void intoMap(String s, HashMap time, HashMap count) {
        // this is the reverse method to toString
        int p = 0;
        char ct;
        String elt;
        String v;
        int p1;
        while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
            ct = s.charAt(p);
            elt = s.substring(p + 1, p + 2);
            v = s.substring(p + 2, p1);
            if (ct == 't') {
                time.put(elt, new Long(Long.parseLong(v)));
            } else {
                count.put(elt, new Integer(Integer.parseInt(v)));
            }
        }
    }
    
    // the processes

    // collection
    public Map[] localSearchContainers(
            plasmaSearchQuery query,
            plasmaWordIndex wordIndex,
            Set urlselection) {
        // search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result

        // retrieve entities that belong to the hashes
        startTimer();
        long start = System.currentTimeMillis();
        Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(
                        query.queryHashes,
                        urlselection,
                        true,
                        true,
                        getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));
        if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
        long remaintime =  getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) - System.currentTimeMillis() + start;
        Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0) || (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(
                query.excludeHashes,
                urlselection,
                true,
                true,
                remaintime);
        setYieldTime(plasmaSearchProcessing.PROCESS_COLLECTION);
        setYieldCount(plasmaSearchProcessing.PROCESS_COLLECTION, inclusionContainers.size());

        return new Map[]{inclusionContainers, exclusionContainers};
    }
    
    // join
    public indexContainer localSearchJoinExclude(
            Collection includeContainers,
            Collection excludeContainers,
            long time, int maxDistance) {
        // join a search result and return the joincount (number of pages after join)

        // since this is a conjunction we return an empty entity if any word is not known
        if (includeContainers == null) return plasmaWordIndex.emptyContainer(null);

        // join the result
        startTimer();
        long start = System.currentTimeMillis();
        indexContainer rcLocal = indexContainer.joinContainers(includeContainers, time, maxDistance);
        long remaining = getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) - System.currentTimeMillis() + start;
        if ((rcLocal != null) && (remaining > 0)) {
            indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
        }
        if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null);
        setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);
        setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());

        return rcLocal;
    }
    
    // presort
    public plasmaSearchPreOrder preSort(
            plasmaSearchQuery query,
            plasmaSearchRankingProfile ranking,
            indexContainer resultIndex) {
        // we collect the urlhashes and construct a list with urlEntry objects
        // attention: if minEntries is too high, this method will not terminate within the maxTime

        assert (resultIndex != null);
        
        long preorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_PRESORT);
        
        startTimer();
        long pst = System.currentTimeMillis();
        resultIndex.sort();
        resultIndex.uniq(1000);
        preorderTime = preorderTime - (System.currentTimeMillis() - pst);
        if (preorderTime < 0) preorderTime = 200;
        plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, resultIndex, preorderTime);
        if (resultIndex.size() > query.wantedResults) preorder.remove(true, true);
        setYieldTime(plasmaSearchProcessing.PROCESS_PRESORT);
        setYieldCount(plasmaSearchProcessing.PROCESS_PRESORT, resultIndex.size());
        
        return preorder;
    }
    
    // urlfetch
    public plasmaSearchPostOrder urlFetch(
            plasmaSearchQuery query,
            plasmaSearchRankingProfile ranking,
            plasmaWordIndex wordIndex,
            plasmaSearchPreOrder preorder) {

        // start url-fetch
        long postorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_POSTSORT);
        //System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);
        long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);
        startTimer();
        plasmaSearchPostOrder acc = new plasmaSearchPostOrder(query, ranking);
        
        indexRWIEntry entry;
        indexURLEntry page;
        Long preranking;
        Object[] preorderEntry;
        indexURLEntry.Components comp;
        String pagetitle, pageurl, pageauthor;
        int minEntries = getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);
        try {
            ordering: while (preorder.hasNext()) {
                if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= 5 * minEntries)) break;
                preorderEntry = preorder.next();
                entry = (indexRWIEntry) preorderEntry[0];
                // load only urls if there was not yet a root url of that hash
                preranking = (Long) preorderEntry[1];
                // find the url entry
                page = wordIndex.loadedURL.load(entry.urlHash(), entry);
                if (page != null) {
                    comp = page.comp();
                    pagetitle = comp.title().toLowerCase();
                    if (comp.url() == null) continue ordering; // rare case where the url is corrupted
                    pageurl = comp.url().toString().toLowerCase();
                    pageauthor = comp.author().toLowerCase();
                    
                    // check exclusion
                    if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) continue ordering;
                    if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) continue ordering;
                    if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) continue ordering;
                    
                    // check url mask
                    if (!(pageurl.matches(query.urlMask))) continue ordering;
                    
                    // check constraints
                    if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&
                        (query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
                        (!(comp.title().startsWith("Index of")))) {
                        serverLog.logFine("PLASMA", "filtered out " + comp.url().toString());
                        // filter out bad results
                        Iterator wi = query.queryHashes.iterator();
                        while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
                    } else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
                        if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);
                        else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);
                        else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);
                        else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);
                    } else {
                        acc.addPage(page, preranking);
                    }
                }
            }
        } catch (kelondroException ee) {
            serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
        }
        setYieldTime(plasmaSearchProcessing.PROCESS_URLFETCH);
        setYieldCount(plasmaSearchProcessing.PROCESS_URLFETCH, acc.sizeFetched());

        acc.filteredResults = preorder.filteredCount();
        
        return acc;
    }

    //acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();
    
    // postsort
    public void postSort(
            boolean postsort,
            plasmaSearchPostOrder acc) {

        // start postsorting
        startTimer();
        acc.sortPages(postsort);
        setYieldTime(plasmaSearchProcessing.PROCESS_POSTSORT);
        setYieldCount(plasmaSearchProcessing.PROCESS_POSTSORT, acc.sizeOrdered());
    }
    
    // filter
    public void applyFilter(
            plasmaSearchPostOrder acc) {

        // apply filter
        startTimer();
        acc.removeRedundant();
        setYieldTime(plasmaSearchProcessing.PROCESS_FILTER);
        setYieldCount(plasmaSearchProcessing.PROCESS_FILTER, acc.sizeOrdered());
    }
}
refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// plasmaSearchProcessing.java`
refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany`
			`// first published 17.10.2005 on http://yacy.net`
			`//`
			`// This is a part of YaCy, a peer-to-peer based web search engine`
			`//`
			`// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $`
			`// $LastChangedRevision: 1986 $`
			`// $LastChangedBy: orbiter $`
			`//`
			`// LICENSE`
			`//`
			`// This program is free software; you can redistribute it and/or modify`
			`// it under the terms of the GNU General Public License as published by`
			`// the Free Software Foundation; either version 2 of the License, or`
			`// (at your option) any later version.`
			`//`
			`// This program is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`// GNU General Public License for more details.`
			`//`
			`// You should have received a copy of the GNU General Public License`
			`// along with this program; if not, write to the Free Software`
			`// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`

			`package de.anomic.plasma;`

			`import java.util.Collection;`
			`import java.util.HashMap;`
			`import java.util.Iterator;`
			`import java.util.Map;`
			`import java.util.Set;`

			`import de.anomic.index.indexContainer;`
			`import de.anomic.index.indexRWIEntry;`
			`import de.anomic.index.indexURLEntry;`
			`import de.anomic.kelondro.kelondroException;`
			`import de.anomic.server.logging.serverLog;`

			`/**`
			`*`
			`* This class provides search processes and keeps a timing record of the processes`
			`* It shall be used to initiate a search and also to evaluate`
			`* the real obtained timings after a search is performed`
			`*/`

			`public class plasmaSearchProcessing implements Cloneable {`

			`// collection:`
			`// time = time to get a RWI out of RAM cache, assortments and WORDS files`
			`// count = maximum number of RWI-entries that shall be collected`

			`// join`
			`// time = time to perform the join between all collected RWIs`
			`// count = maximum number of entries that shall be joined`

			`// presort:`
			`// time = time to do a sort of the joined URL-records`
			`// count = maximum number of entries that shall be pre-sorted`

			`// urlfetch:`
			`// time = time to fetch the real URLs from the LURL database`
			`// count = maximum number of urls that shall be fetched`

			`// postsort:`
			`// time = time for final sort of URLs`
			`// count = maximum number oof URLs that shall be retrieved during sort`

			`// snippetfetch:`
			`// time = time to fetch snippets for selected URLs`
			`// count = maximum number of snipptes to be fetched`

			`public static final char PROCESS_COLLECTION = 'c';`
			`public static final char PROCESS_JOIN = 'j';`
			`public static final char PROCESS_PRESORT = 'r';`
			`public static final char PROCESS_URLFETCH = 'u';`
			`public static final char PROCESS_POSTSORT = 'o';`
			`public static final char PROCESS_FILTER = 'f';`
			`public static final char PROCESS_SNIPPETFETCH = 's';`

			`private static final long minimumTargetTime = 100;`

			`public static char[] sequence = new char[]{`
			`PROCESS_COLLECTION,`
			`PROCESS_JOIN,`
			`PROCESS_PRESORT,`
			`PROCESS_URLFETCH,`
			`PROCESS_POSTSORT,`
			`PROCESS_FILTER,`
			`PROCESS_SNIPPETFETCH`
			`};`

			`private HashMap targetTime;`
			`private HashMap targetCount;`
			`private HashMap yieldTime;`
			`private HashMap yieldCount;`
			`private long timer;`

			`private plasmaSearchProcessing() {`
			`targetTime = new HashMap();`
			`targetCount = new HashMap();`
			`yieldTime = new HashMap();`
			`yieldCount = new HashMap();`
			`timer = 0;`
			`}`

			`public plasmaSearchProcessing(long time, int count) {`
			`this(`
			`3 * time / 12, 10 * count,`
			`1 * time / 12, 10 * count,`
			`1 * time / 12, 10 * count,`
			`2 * time / 12, 5 * count,`
			`3 * time / 12, count,`
			`1 * time / 12, count,`
			`1 * time / 12, 1`
			`);`
			`}`

			`public plasmaSearchProcessing(`
			`long time_collection, int count_collection,`
			`long time_join, int count_join,`
			`long time_presort, int count_presort,`
			`long time_urlfetch, int count_urlfetch,`
			`long time_postsort, int count_postsort,`
			`long time_filter, int count_filter,`
			`long time_snippetfetch, int count_snippetfetch) {`
			`this();`

			`targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));`
			`targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));`
			`targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));`
			`targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));`
			`targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));`
			`targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));`
			`targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));`
			`targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));`
			`targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));`
			`targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));`
			`targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));`
			`targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));`
			`targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));`
			`targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));`

			`}`

			`public Object clone() {`
			`plasmaSearchProcessing p = new plasmaSearchProcessing();`
			`p.targetTime = (HashMap) this.targetTime.clone();`
			`p.targetCount = (HashMap) this.targetCount.clone();`
			`p.yieldTime = (HashMap) this.yieldTime.clone();`
			`p.yieldCount = (HashMap) this.yieldCount.clone();`
			`return p;`
			`}`

			`public plasmaSearchProcessing(String s) {`
			`targetTime = new HashMap();`
			`targetCount = new HashMap();`
			`yieldTime = new HashMap();`
			`yieldCount = new HashMap();`

			`intoMap(s, targetTime, targetCount);`
			`}`

			`public long duetime() {`
			`// returns the old duetime value as sum of all waiting times`
			`long d = 0;`
			`for (int i = 0; i < sequence.length; i++) {`
			`d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();`
			`}`
			`return d;`
			`}`

			`public void putYield(String s) {`
			`intoMap(s, yieldTime, yieldCount);`
			`}`

			`public String yieldToString() {`
			`return toString(yieldTime, yieldCount);`
			`}`

			`public String targetToString() {`
			`return toString(targetTime, targetCount);`
			`}`

			`public long getTargetTime(char type) {`
			`// sum up all time that was demanded and subtract all that had been wasted`
			`long sum = 0;`
			`Long t;`
			`Character element;`
			`for (int i = 0; i < sequence.length; i++) {`
			`element = new Character(sequence[i]);`
			`t = (Long) targetTime.get(element);`
			`if (t != null) sum += t.longValue();`
			`if (type == sequence[i]) return (sum < 0) ? minimumTargetTime : sum;`
			`t = (Long) yieldTime.get(element);`
			`if (t != null) sum -= t.longValue();`
			`}`
			`return minimumTargetTime;`
			`}`

			`public int getTargetCount(char type) {`
			`Integer i = (Integer) targetCount.get(new Character(type));`
			`if (i == null) return -1; else return i.intValue();`
			`}`

			`public long getYieldTime(char type) {`
			`Long l = (Long) yieldTime.get(new Character(type));`
			`if (l == null) return -1; else return l.longValue();`
			`}`

			`public int getYieldCount(char type) {`
			`Integer i = (Integer) yieldCount.get(new Character(type));`
			`if (i == null) return -1; else return i.intValue();`
			`}`

			`public void startTimer() {`
			`this.timer = System.currentTimeMillis();`
			`}`

			`public void setYieldTime(char type) {`
			`// sets a time that is computed using the timer`
			`long t = System.currentTimeMillis() - this.timer;`
			`yieldTime.put(new Character(type), new Long(t));`
			`}`

			`public void setYieldCount(char type, int count) {`
			`yieldCount.put(new Character(type), new Integer(count));`
			`}`

			`public String reportToString() {`
			`return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);`
			`}`

			`public static String toString(HashMap time, HashMap count) {`
			`// put this into a format in such a way that it can be send in a http header or post argument`
			`// that means that no '=' or spaces are allowed`
			`StringBuffer sb = new StringBuffer(sequence.length * 10);`
			`Character element;`
			`Integer xi;`
			`Long xl;`
			`for (int i = 0; i < sequence.length; i++) {`
			`element = new Character(sequence[i]);`
			`sb.append("t");`
			`sb.append(element);`
			`xl = (Long) time.get(element);`
			`sb.append((xl == null) ? "0" : xl.toString());`
			`sb.append("\|");`
			`sb.append("c");`
			`sb.append(element);`
			`xi = (Integer) count.get(element);`
			`sb.append((xi == null) ? "0" : xi.toString());`
			`sb.append("\|");`
			`}`
			`return sb.toString();`
			`}`

			`public static void intoMap(String s, HashMap time, HashMap count) {`
			`// this is the reverse method to toString`
			`int p = 0;`
			`char ct;`
			`String elt;`
			`String v;`
			`int p1;`
			`while ((p < s.length()) && ((p1 = s.indexOf('\|', p)) > 0)) {`
			`ct = s.charAt(p);`
			`elt = s.substring(p + 1, p + 2);`
			`v = s.substring(p + 2, p1);`
			`if (ct == 't') {`
			`time.put(elt, new Long(Long.parseLong(v)));`
			`} else {`
			`count.put(elt, new Integer(Integer.parseInt(v)));`
			`}`
			`}`
			`}`

			`// the processes`

refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// collection`
refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`public Map[] localSearchContainers(`
			`plasmaSearchQuery query,`
			`plasmaWordIndex wordIndex,`
			`Set urlselection) {`
			`// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result`

			`// retrieve entities that belong to the hashes`
			`startTimer();`
			`long start = System.currentTimeMillis();`
			`Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(`
			`query.queryHashes,`
			`urlselection,`
			`true,`
			`true,`
			`getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));`
			`if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned`
			`long remaintime = getTargetTime(plasmaSearchProcessing.PROCESS_COLLECTION) - System.currentTimeMillis() + start;`
			`Map exclusionContainers = ((inclusionContainers == null) \|\| (inclusionContainers.size() == 0) \|\| (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(`
			`query.excludeHashes,`
			`urlselection,`
			`true,`
			`true,`
			`remaintime);`
			`setYieldTime(plasmaSearchProcessing.PROCESS_COLLECTION);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_COLLECTION, inclusionContainers.size());`

			`return new Map[]{inclusionContainers, exclusionContainers};`
			`}`

refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// join`
refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`public indexContainer localSearchJoinExclude(`
			`Collection includeContainers,`
			`Collection excludeContainers,`
			`long time, int maxDistance) {`
			`// join a search result and return the joincount (number of pages after join)`

			`// since this is a conjunction we return an empty entity if any word is not known`
			`if (includeContainers == null) return plasmaWordIndex.emptyContainer(null);`

			`// join the result`
			`startTimer();`
			`long start = System.currentTimeMillis();`
			`indexContainer rcLocal = indexContainer.joinContainers(includeContainers, time, maxDistance);`
			`long remaining = getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) - System.currentTimeMillis() + start;`
			`if ((rcLocal != null) && (remaining > 0)) {`
			`indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);`
			`}`
			`if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null);`
			`setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());`

			`return rcLocal;`
			`}`

refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// presort`
			`public plasmaSearchPreOrder preSort(`
refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`plasmaSearchQuery query,`
			`plasmaSearchRankingProfile ranking,`
			`indexContainer resultIndex) {`
			`// we collect the urlhashes and construct a list with urlEntry objects`
			`// attention: if minEntries is too high, this method will not terminate within the maxTime`

			`assert (resultIndex != null);`

			`long preorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_PRESORT);`

			`startTimer();`
			`long pst = System.currentTimeMillis();`
			`resultIndex.sort();`
			`resultIndex.uniq(1000);`
			`preorderTime = preorderTime - (System.currentTimeMillis() - pst);`
			`if (preorderTime < 0) preorderTime = 200;`
			`plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, resultIndex, preorderTime);`
			`if (resultIndex.size() > query.wantedResults) preorder.remove(true, true);`
			`setYieldTime(plasmaSearchProcessing.PROCESS_PRESORT);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_PRESORT, resultIndex.size());`

refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`return preorder;`
			`}`

			`// urlfetch`
			`public plasmaSearchPostOrder urlFetch(`
			`plasmaSearchQuery query,`
			`plasmaSearchRankingProfile ranking,`
			`plasmaWordIndex wordIndex,`
			`plasmaSearchPreOrder preorder) {`

refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// start url-fetch`
			`long postorderTime = getTargetTime(plasmaSearchProcessing.PROCESS_POSTSORT);`
			`//System.out.println("DEBUG: postorder-final (urlfetch) maxtime = " + postorderTime);`
			`long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : (System.currentTimeMillis() + postorderTime);`
			`startTimer();`
			`plasmaSearchPostOrder acc = new plasmaSearchPostOrder(query, ranking);`

			`indexRWIEntry entry;`
			`indexURLEntry page;`
			`Long preranking;`
			`Object[] preorderEntry;`
			`indexURLEntry.Components comp;`
			`String pagetitle, pageurl, pageauthor;`
			`int minEntries = getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);`
			`try {`
			`ordering: while (preorder.hasNext()) {`
some redesign of min/max and normalization computation during search result ordering this saves about 1 millisecond for each URL reference, which has some good effect on the search result computation if a word is searched that appears very often (speed-up of 1 second and more) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4033 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`if ((System.currentTimeMillis() >= postorderLimitTime) \|\| (acc.sizeFetched() >= 5 * minEntries)) break;`
refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`preorderEntry = preorder.next();`
			`entry = (indexRWIEntry) preorderEntry[0];`
			`// load only urls if there was not yet a root url of that hash`
			`preranking = (Long) preorderEntry[1];`
			`// find the url entry`
			`page = wordIndex.loadedURL.load(entry.urlHash(), entry);`
			`if (page != null) {`
			`comp = page.comp();`
			`pagetitle = comp.title().toLowerCase();`
			`if (comp.url() == null) continue ordering; // rare case where the url is corrupted`
			`pageurl = comp.url().toString().toLowerCase();`
			`pageauthor = comp.author().toLowerCase();`

			`// check exclusion`
			`if (plasmaSearchQuery.matches(pagetitle, query.excludeHashes)) continue ordering;`
			`if (plasmaSearchQuery.matches(pageurl, query.excludeHashes)) continue ordering;`
			`if (plasmaSearchQuery.matches(pageauthor, query.excludeHashes)) continue ordering;`

			`// check url mask`
			`if (!(pageurl.matches(query.urlMask))) continue ordering;`

			`// check constraints`
			`if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&`
			`(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&`
			`(!(comp.title().startsWith("Index of")))) {`
			`serverLog.logFine("PLASMA", "filtered out " + comp.url().toString());`
			`// filter out bad results`
			`Iterator wi = query.queryHashes.iterator();`
			`while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());`
			`} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {`
			`if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);`
			`else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);`
			`else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);`
			`else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);`
			`} else {`
			`acc.addPage(page, preranking);`
			`}`
			`}`
			`}`
			`} catch (kelondroException ee) {`
			`serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);`
			`}`
			`setYieldTime(plasmaSearchProcessing.PROCESS_URLFETCH);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_URLFETCH, acc.sizeFetched());`

refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`acc.filteredResults = preorder.filteredCount();`

			`return acc;`
			`}`

			`//acc.localContributions = (resultIndex == null) ? 0 : resultIndex.size();`

			`// postsort`
			`public void postSort(`
			`boolean postsort,`
			`plasmaSearchPostOrder acc) {`

refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// start postsorting`
			`startTimer();`
			`acc.sortPages(postsort);`
			`setYieldTime(plasmaSearchProcessing.PROCESS_POSTSORT);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_POSTSORT, acc.sizeOrdered());`
refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4031 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`}`

			`// filter`
			`public void applyFilter(`
			`plasmaSearchPostOrder acc) {`

refactoring of search processes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4030 6c8d7289-2bf4-0310-a012-ef5d649a1542 18 years ago			`// apply filter`
			`startTimer();`
			`acc.removeRedundant();`
			`setYieldTime(plasmaSearchProcessing.PROCESS_FILTER);`
			`setYieldCount(plasmaSearchProcessing.PROCESS_FILTER, acc.sizeOrdered());`
			`}`
			`}`