- some refactoring to search process git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4222 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
bf9a9e4e5e
commit
6f1308da2f
@ -1,253 +0,0 @@
|
||||
// plasmaSearchProcessing.java
|
||||
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 17.10.2005 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.plasma;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import de.anomic.index.indexContainer;
|
||||
import de.anomic.index.indexRWIEntry;
|
||||
import de.anomic.server.serverByteBuffer;
|
||||
|
||||
/**
|
||||
*
|
||||
* This class provides search processes and keeps a timing record of the processes
|
||||
* It shall be used to initiate a search and also to evaluate
|
||||
* the real obtained timings after a search is performed
|
||||
*/
|
||||
|
||||
public class plasmaSearchProcessing implements Cloneable {
|
||||
|
||||
// collection:
|
||||
// time = time to get a RWI out of RAM cache, assortments and WORDS files
|
||||
// count = maximum number of RWI-entries that shall be collected
|
||||
|
||||
// join
|
||||
// time = time to perform the join between all collected RWIs
|
||||
// count = maximum number of entries that shall be joined
|
||||
|
||||
// presort:
|
||||
// time = time to do a sort of the joined URL-records
|
||||
// count = maximum number of entries that shall be pre-sorted
|
||||
|
||||
// urlfetch:
|
||||
// time = time to fetch the real URLs from the LURL database
|
||||
// count = maximum number of urls that shall be fetched
|
||||
|
||||
// postsort:
|
||||
// time = time for final sort of URLs
|
||||
// count = maximum number oof URLs that shall be retrieved during sort
|
||||
|
||||
// snippetfetch:
|
||||
// time = time to fetch snippets for selected URLs
|
||||
// count = maximum number of snipptes to be fetched
|
||||
|
||||
public static final String COLLECTION = "collection";
|
||||
public static final String JOIN = "join";
|
||||
public static final String PRESORT = "presort";
|
||||
public static final String URLFETCH = "urlfetch";
|
||||
|
||||
private static final long minimumTargetTime = 100;
|
||||
|
||||
private long targetTime;
|
||||
private int targetCount;
|
||||
private ArrayList yield;
|
||||
private long timer;
|
||||
|
||||
private plasmaSearchProcessing() {
|
||||
targetTime = minimumTargetTime;
|
||||
targetCount = 10;
|
||||
yield = new ArrayList();
|
||||
timer = 0;
|
||||
}
|
||||
|
||||
public plasmaSearchProcessing(long time, int count) {
|
||||
this();
|
||||
this.targetTime = time;
|
||||
this.targetCount = count;
|
||||
}
|
||||
|
||||
public static class Entry {
|
||||
public String process;
|
||||
public int count;
|
||||
public long time;
|
||||
public Entry(String process, int count, long time) {
|
||||
this.process = process;
|
||||
this.count = count;
|
||||
this.time = time;
|
||||
}
|
||||
}
|
||||
|
||||
public int getTargetCount() {
|
||||
return this.targetCount;
|
||||
}
|
||||
|
||||
public long getTargetTime() {
|
||||
return this.targetTime;
|
||||
}
|
||||
|
||||
public void startTimer() {
|
||||
this.timer = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void yield(String s, int count) {
|
||||
long t = System.currentTimeMillis() - this.timer;
|
||||
Entry e = new Entry(s, count, t);
|
||||
yield.add(e);
|
||||
}
|
||||
|
||||
public Iterator events() {
|
||||
// iteratese Entry-type Objects
|
||||
return yield.iterator();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
// returns number of events / Entry-Objects in yield array
|
||||
return yield.size();
|
||||
}
|
||||
|
||||
public Map[] localSearchContainers(
|
||||
plasmaSearchQuery query,
|
||||
plasmaWordIndex wordIndex,
|
||||
Set urlselection) {
|
||||
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
|
||||
|
||||
// retrieve entities that belong to the hashes
|
||||
startTimer();
|
||||
Map inclusionContainers = (query.queryHashes.size() == 0) ? new HashMap() : wordIndex.getContainers(
|
||||
query.queryHashes,
|
||||
urlselection,
|
||||
true,
|
||||
true);
|
||||
if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
|
||||
Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0)) ? new HashMap() : wordIndex.getContainers(
|
||||
query.excludeHashes,
|
||||
urlselection,
|
||||
true,
|
||||
true);
|
||||
yield(plasmaSearchProcessing.COLLECTION, inclusionContainers.size());
|
||||
|
||||
return new Map[]{inclusionContainers, exclusionContainers};
|
||||
}
|
||||
|
||||
public indexContainer localSearchJoinExclude(
|
||||
Collection includeContainers,
|
||||
Collection excludeContainers,
|
||||
int maxDistance) {
|
||||
// join a search result and return the joincount (number of pages after join)
|
||||
|
||||
// since this is a conjunction we return an empty entity if any word is not known
|
||||
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null, 0);
|
||||
|
||||
// join the result
|
||||
startTimer();
|
||||
indexContainer rcLocal = indexContainer.joinContainers(includeContainers, maxDistance);
|
||||
if (rcLocal != null) {
|
||||
indexContainer.excludeContainers(rcLocal, excludeContainers);
|
||||
}
|
||||
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null, 0);
|
||||
yield(plasmaSearchProcessing.JOIN, rcLocal.size());
|
||||
|
||||
return rcLocal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) {
|
||||
// collect references according to domains
|
||||
long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
|
||||
TreeMap doms = new TreeMap();
|
||||
synchronized (inputContainer) {
|
||||
Iterator i = inputContainer.entries();
|
||||
indexRWIEntry iEntry;
|
||||
String dom, paths;
|
||||
while (i.hasNext()) {
|
||||
iEntry = (indexRWIEntry) i.next();
|
||||
if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer
|
||||
dom = iEntry.urlHash().substring(6);
|
||||
if ((paths = (String) doms.get(dom)) == null) {
|
||||
doms.put(dom, iEntry.urlHash().substring(0, 6));
|
||||
} else {
|
||||
doms.put(dom, paths + iEntry.urlHash().substring(0, 6));
|
||||
}
|
||||
if (System.currentTimeMillis() > timeout)
|
||||
break;
|
||||
}
|
||||
}
|
||||
// construct a result string
|
||||
serverByteBuffer bb = new serverByteBuffer(inputContainer.size() * 6);
|
||||
bb.append('{');
|
||||
Iterator i = doms.entrySet().iterator();
|
||||
Map.Entry entry;
|
||||
while (i.hasNext()) {
|
||||
entry = (Map.Entry) i.next();
|
||||
bb.append((String) entry.getKey());
|
||||
bb.append(':');
|
||||
bb.append((String) entry.getValue());
|
||||
if (System.currentTimeMillis() > timeout)
|
||||
break;
|
||||
if (i.hasNext())
|
||||
bb.append(',');
|
||||
}
|
||||
bb.append('}');
|
||||
return bb;
|
||||
}
|
||||
|
||||
public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) {
|
||||
// target is a mapping from url-hashes to a string of peer-hashes
|
||||
if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) {
|
||||
//System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString());
|
||||
ci = ci.trim(1, ci.length() - 2);
|
||||
String dom, url, peers;
|
||||
while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) {
|
||||
assert ci.length() >= 6 : "ci.length() = " + ci.length();
|
||||
dom = ci.toString(0, 6);
|
||||
ci.trim(7);
|
||||
while ((ci.length() > 0) && (ci.byteAt(0) != ',')) {
|
||||
assert ci.length() >= 6 : "ci.length() = " + ci.length();
|
||||
url = ci.toString(0, 6) + dom;
|
||||
ci.trim(6);
|
||||
peers = (String) target.get(url);
|
||||
if (peers == null) {
|
||||
target.put(url, peerhash);
|
||||
} else {
|
||||
target.put(url, peers + peerhash);
|
||||
}
|
||||
//System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url));
|
||||
}
|
||||
if (ci.byteAt(0) == ',') ci.trim(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,93 @@
|
||||
// serverProfiling.java
|
||||
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 17.11.2007 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.server;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class serverProfiling implements Cloneable {
|
||||
|
||||
private static final long minimumTargetTime = 100;
|
||||
private long targetTime;
|
||||
private int targetCount;
|
||||
private ArrayList yield;
|
||||
private long timer;
|
||||
|
||||
private serverProfiling() {
|
||||
targetTime = minimumTargetTime;
|
||||
targetCount = 10;
|
||||
yield = new ArrayList();
|
||||
timer = 0;
|
||||
}
|
||||
|
||||
public serverProfiling(long time, int count) {
|
||||
this();
|
||||
this.targetTime = time;
|
||||
this.targetCount = count;
|
||||
}
|
||||
|
||||
public static class Entry {
|
||||
public String process;
|
||||
public int count;
|
||||
public long time;
|
||||
|
||||
public Entry(String process, int count, long time) {
|
||||
this.process = process;
|
||||
this.count = count;
|
||||
this.time = time;
|
||||
}
|
||||
}
|
||||
|
||||
public int getTargetCount() {
|
||||
return this.targetCount;
|
||||
}
|
||||
|
||||
public long getTargetTime() {
|
||||
return this.targetTime;
|
||||
}
|
||||
|
||||
public void startTimer() {
|
||||
this.timer = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void yield(String s, int count) {
|
||||
long t = System.currentTimeMillis() - this.timer;
|
||||
Entry e = new Entry(s, count, t);
|
||||
yield.add(e);
|
||||
}
|
||||
|
||||
public Iterator events() {
|
||||
// iteratese Entry-type Objects
|
||||
return yield.iterator();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
// returns number of events / Entry-Objects in yield array
|
||||
return yield.size();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue