introduction of search profiles; very experimental

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@976 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent efd11c13b6
commit 4dcbc26ef1

@ -151,7 +151,7 @@ public class IndexControl_p {
// generate an urlx array
plasmaWordIndexEntity index = null;
try {
index = switchboard.wordIndex.getEntity(keyhash, true);
index = switchboard.wordIndex.getEntity(keyhash, true, -1);
Iterator en = index.elements(true);
int i = 0;
urlx = new String[index.size()];
@ -258,7 +258,7 @@ public class IndexControl_p {
plasmaWordIndexEntity[] indexes = new plasmaWordIndexEntity[1];
String result;
long starttime = System.currentTimeMillis();
indexes[0] = switchboard.wordIndex.getEntity(keyhash, true);
indexes[0] = switchboard.wordIndex.getEntity(keyhash, true, -1);
// built urlCache
Iterator urlIter = indexes[0].elements(true);
HashMap knownURLs = new HashMap();
@ -436,7 +436,7 @@ public class IndexControl_p {
// search for a word hash and generate a list of url links
plasmaWordIndexEntity index = null;
try {
index = switchboard.wordIndex.getEntity(keyhash, true);
index = switchboard.wordIndex.getEntity(keyhash, true, -1);
final StringBuffer result = new StringBuffer(1024);
if (index.size() == 0) {

@ -69,12 +69,14 @@ public class NetworkPicture {
int height = 480;
int passiveLimit = 300;
int potentialLimit = 300;
int maxCount = 1000;
if (post != null) {
width = post.getInt("width", 640);
height = post.getInt("height", 420);
passiveLimit = post.getInt("pal", 300);
potentialLimit = post.getInt("pol", 300);
maxCount = post.getInt("max", 1000);
}
int innerradius = Math.min(width, height) / 5;
@ -98,7 +100,6 @@ public class NetworkPicture {
//System.out.println("Seed Maximum distance is " + yacySeed.maxDHTDistance);
//System.out.println("Seed Minimum distance is " + yacySeed.minDHTNumber);
final int maxCount = 300;
yacySeed seed;
int angle;
long lastseen;

@ -88,7 +88,7 @@ public final class query {
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
de.anomic.plasma.plasmaWordIndexEntity entity = null;
try {
entity = sb.wordIndex.getEntity(env, true);
entity = sb.wordIndex.getEntity(env, true, -1);
prop.put("response", entity.size());
entity.close();
} catch (IOException e) {

@ -158,7 +158,7 @@ public class plasmaDbImporter extends Thread {
try {
wordCounter++;
wordHash = (String) importWordHashIterator.next();
importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
if (importWordIdxEntity.size() == 0) {
importWordIdxEntity.deleteComplete();

@ -63,6 +63,7 @@ public final class plasmaSearchEvent {
private plasmaCrawlLURL urlStore;
private plasmaSnippetCache snippetCache;
private plasmaWordIndexEntity rcLocal, rcGlobal; // caches for results
private plasmaSearchProfile profileLocal, profileGlobal;
private yacySearch[] searchThreads;
public plasmaSearchEvent(plasmaSearchQuery query, serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL urlStore, plasmaSnippetCache snippetCache) {
@ -73,6 +74,13 @@ public final class plasmaSearchEvent {
this.snippetCache = snippetCache;
this.rcLocal = new plasmaWordIndexEntity(null);
this.rcGlobal = new plasmaWordIndexEntity(null);
if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
this.profileLocal = new plasmaSearchProfile(4 * query.maximumTime / 10, query.wantedResults);
this.profileGlobal = new plasmaSearchProfile(6 * query.maximumTime / 10, query.wantedResults);
} else {
this.profileLocal = new plasmaSearchProfile(query.maximumTime, query.wantedResults);
this.profileGlobal = null;
}
this.searchThreads = null;
}
@ -80,9 +88,8 @@ public final class plasmaSearchEvent {
// combine all threads
if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
int fetchcount = ((int) (query.maximumTime / 1000L)) * 5; // number of wanted results until break in search
int fetchpeers = ((int) (query.maximumTime / 1000L)) * 2; // number of target peers; means 30 peers in 10 seconds
long fetchtime = query.maximumTime * 6 / 10; // time to waste
int fetchpeers = (int) (query.maximumTime / 1000L); // number of target peers; means 10 peers in 10 seconds
if (fetchpeers > 10) fetchpeers = 10;
// remember time
long start = System.currentTimeMillis();
@ -91,16 +98,12 @@ public final class plasmaSearchEvent {
serverInstantThread.oneTimeJob(this, "localSearch", log, 0);
// do a global search
int globalContributions = globalSearch(fetchcount, fetchpeers, fetchtime);
int globalContributions = globalSearch(fetchpeers);
log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
try {
// combine the result and order
long remainingTime = query.maximumTime - (System.currentTimeMillis() - start);
if (remainingTime < 500) remainingTime = 500;
if (remainingTime > 3000) remainingTime = 3000;
plasmaSearchResult result = order(remainingTime, query.wantedResults);
plasmaSearchResult result = order();
result.globalContributions = globalContributions;
result.localContributions = rcLocal.size();
@ -112,6 +115,7 @@ public final class plasmaSearchEvent {
rcLocal = null;
// return search result
log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
return result;
} catch (IOException e) {
return null;
@ -120,14 +124,16 @@ public final class plasmaSearchEvent {
// do a local search
long start = System.currentTimeMillis();
try {
localSearch(query.maximumTime);
plasmaSearchResult result = order(query.maximumTime - (System.currentTimeMillis() - start), query.wantedResults);
localSearch();
plasmaSearchResult result = order();
result.localContributions = rcLocal.size();
// clean up
if ((rcLocal != null) && (!(rcLocal.isTMPEntity()))) rcLocal.close();
rcLocal = null;
// return search result
log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
return result;
} catch (IOException e) {
return null;
@ -135,19 +141,14 @@ public final class plasmaSearchEvent {
}
}
public void localSearch() throws IOException {
// method called by a one-time
localSearch(query.maximumTime * 6 / 10);
}
public int localSearch(long time) throws IOException {
public int localSearch() throws IOException {
// search for the set of hashes and return an array of urlEntry elements
long stamp = System.currentTimeMillis();
// retrieve entities that belong to the hashes
Set entities = wordIndex.getEntities(query.queryHashes, true, true);
profileLocal.startTimer();
Set entities = wordIndex.getEntities(query.queryHashes, true, true, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_COLLECTION));
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_COLLECTION);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_COLLECTION, (entities == null) ? 0 : entities.size());
// since this is a conjunction we return an empty entity if any word is not known
if (entities == null) {
@ -156,31 +157,28 @@ public final class plasmaSearchEvent {
}
// join the result
long remainingTime = time - (System.currentTimeMillis() - stamp);
if (remainingTime < 1000) remainingTime = 1000;
rcLocal = plasmaWordIndexEntity.joinEntities(entities, remainingTime);
log.logFine("SEARCH TIME FOR FINDING " + rcLocal.size() + " ELEMENTS: " + ((System.currentTimeMillis() - stamp) / 1000) + " seconds");
profileLocal.startTimer();
rcLocal = plasmaWordIndexEntity.joinEntities(entities, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_JOIN));
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_JOIN);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_JOIN, rcLocal.size());
return rcLocal.size();
}
public int globalSearch(int fetchcount, int fetchpeers, long timelimit) {
public int globalSearch(int fetchpeers) {
// do global fetching
// the result of the fetch is then in the rcGlobal
if (fetchpeers < 10) fetchpeers = 10;
if (fetchcount > query.wantedResults * 10) fetchcount = query.wantedResults * 10;
// set a duetime for clients
long duetime = timelimit - 4000; // subtract network traffic overhead, guessed 4 seconds
if (duetime < 1000) { duetime = 1000; }
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long timeout = System.currentTimeMillis() + timelimit;
searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchcount, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, duetime);
long timeout = System.currentTimeMillis() + profileGlobal.duetime() + 4000;
searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal);
// wait until wanted delay passed or wanted result appeared
while (System.currentTimeMillis() < timeout) {
// check if all threads have been finished or results so far are enough
if (rcGlobal.size() >= fetchcount * 3) break; // we have enough
if (rcGlobal.size() >= profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) * 3) break; // we have enough
if (yacySearch.remainingWaiting(searchThreads) == 0) break; // we cannot expect more
// wait a little time ..
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
@ -189,7 +187,7 @@ public final class plasmaSearchEvent {
return rcGlobal.size();
}
public plasmaSearchResult order(long maxTime, int minEntries) throws IOException {
public plasmaSearchResult order() throws IOException {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@ -197,19 +195,29 @@ public final class plasmaSearchEvent {
searchResult.merge(rcLocal, -1);
searchResult.merge(rcGlobal, -1);
long preorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_PRESORT);
long postorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_POSTSORT);
profileLocal.startTimer();
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query);
preorder.addEntity(searchResult, preorderTime);
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_PRESORT);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_PRESORT, rcLocal.size());
profileLocal.startTimer();
plasmaSearchResult acc = new plasmaSearchResult(query);
if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
if (searchResult.size() == 0) return acc; // case that we have nothing to do
Iterator e = searchResult.elements(true);
// start url-fetch
plasmaWordIndexEntry entry;
long startCreateTime = System.currentTimeMillis();
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + postorderTime;
plasmaCrawlLURL.Entry page;
int minEntries = profileLocal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT);
try {
while (e.hasNext()) {
if ((acc.sizeFetched() >= minEntries) &&
(System.currentTimeMillis() - startCreateTime >= maxTime)) break;
entry = (plasmaWordIndexEntry) e.next();
while (preorder.hasNext()) {
if ((acc.sizeFetched() >= minEntries) && (System.currentTimeMillis() >= postorderLimitTime)) break;
entry = (plasmaWordIndexEntry) preorder.next();
// find the url entry
page = urlStore.getEntry(entry.getUrlHash());
// add a result
@ -218,10 +226,15 @@ public final class plasmaSearchEvent {
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
}
long startSortTime = System.currentTimeMillis();
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_URLFETCH);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_URLFETCH, acc.sizeFetched());
// start postsorting
profileLocal.startTimer();
acc.sortResults();
serverLog.logFine("PLASMA", "plasmaSearchEvent.order: minEntries = " + minEntries + ", effectiveEntries = " + acc.sizeOrdered() + ", demanded Time = " + maxTime + ", effectiveTime = " + (System.currentTimeMillis() - startCreateTime) + ", createTime = " + (startSortTime - startCreateTime) + ", sortTime = " + (System.currentTimeMillis() - startSortTime));
return acc;
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_POSTSORT);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_POSTSORT, acc.sizeOrdered());
return acc;
}
public void flushResults() {
@ -230,31 +243,38 @@ public final class plasmaSearchEvent {
// it is wise to call this within a separate thread because this method waits untill all
if (searchThreads == null) return;
// wait untill all threads are finished
// wait until all threads are finished
int remaining;
int count = 0;
String wordHash;
long starttime = System.currentTimeMillis();
while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) {
try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {}
// flush the rcGlobal as much as is there so far
synchronized (rcGlobal) {
Iterator hashi = query.queryHashes.iterator();
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
Iterator i = rcGlobal.elements(true);
plasmaWordIndexEntry entry;
while (i.hasNext()) {
entry = (plasmaWordIndexEntry) i.next();
wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
}
}
// the rcGlobal was flushed, empty it
count += rcGlobal.size();
rcGlobal.deleteComplete();
}
// wait a little bit before trying again
try {Thread.currentThread().sleep(3000);} catch (InterruptedException e) {}
if (System.currentTimeMillis() - starttime > 90000) {
yacySearch.interruptAlive(searchThreads);
serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED");
serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED; SEARCH WAS " + query.queryWords);
break;
}
}
// now flush the rcGlobal into wordIndex
Iterator hashi = query.queryHashes.iterator();
String wordHash;
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
Iterator i = rcGlobal.elements(true);
plasmaWordIndexEntry entry;
while (i.hasNext()) {
entry = (plasmaWordIndexEntry) i.next();
wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
}
}
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + rcGlobal.size() + " GLOBAL SEARCH RESULTS");
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
// finally delete the temporary index
rcGlobal = null;

@ -0,0 +1,103 @@
// plasmaSearchPreOder.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// Created: 23.10.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.util.TreeMap;
import java.util.Set;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.Iterator;
import java.net.URL;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.serverCodings;
public final class plasmaSearchPreOrder {
private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry
private plasmaSearchQuery query;
public plasmaSearchPreOrder(plasmaSearchQuery query) {
this.pageAcc = new TreeMap();
this.query = query;
}
public plasmaSearchPreOrder cloneSmart() {
// clones only the top structure
plasmaSearchPreOrder theClone = new plasmaSearchPreOrder(query);
theClone.pageAcc = (TreeMap) this.pageAcc.clone();
return theClone;
}
public boolean hasNext() {
return pageAcc.size() > 0;
}
public plasmaWordIndexEntry next() {
Object top = pageAcc.lastKey();
return (plasmaWordIndexEntry) pageAcc.remove(top);
}
public void addEntity(plasmaWordIndexEntity entity, long maxTime) {
Iterator i = entity.elements(true);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
plasmaWordIndexEntry entry;
while (i.hasNext()) {
if (System.currentTimeMillis() > limitTime) break;
entry = (plasmaWordIndexEntry) i.next();
addEntry(entry);
}
}
public void addEntry(plasmaWordIndexEntry indexEntry) {
long ranking = 0;
if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking = 4096 * indexEntry.getQuality();
else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking = 4096 * indexEntry.getVirtualAge();
if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += indexEntry.getQuality();
else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge();
pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), indexEntry);
}
}

@ -0,0 +1,276 @@
// plasmaSearchProfile.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// Created: 17.10.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.util.HashMap;
import java.lang.StringBuffer;
import java.lang.Cloneable;
/**
*
* This class provides timing properties for search processes
* It shall be used to initiate a search and also to evaluate
* the real obtained timings after a search is performed
*/
public class plasmaSearchProfile implements Cloneable {
// collection:
// time = time to get a RWI out of RAM cache, assortments and WORDS files
// count = maximum number of RWI-entries that shall be collected
// join
// time = time to perform the join between all collected RWIs
// count = maximum number of entries that shall be joined
// presort:
// time = time to do a sort of the joined URL-records
// count = maximum number of entries that shall be pre-sorted
// urlfetch:
// time = time to fetch the real URLs from the LURL database
// count = maximum number of urls that shall be fetched
// postsort:
// time = time for final sort of URLs
// count = maximum number oof URLs that shall be retrieved during sort
// snippetfetch:
// time = time to fetch snippets for selected URLs
// count = maximum number of snipptes to be fetched
public static final char PROCESS_COLLECTION = 'c';
public static final char PROCESS_JOIN = 'j';
public static final char PROCESS_PRESORT = 'r';
public static final char PROCESS_URLFETCH = 'u';
public static final char PROCESS_POSTSORT = 'o';
public static final char PROCESS_SNIPPETFETCH = 's';
public static char[] sequence = new char[]{
PROCESS_COLLECTION,
PROCESS_JOIN,
PROCESS_PRESORT,
PROCESS_URLFETCH,
PROCESS_POSTSORT,
PROCESS_SNIPPETFETCH
};
private HashMap targetTime;
private HashMap targetCount;
private HashMap yieldTime;
private HashMap yieldCount;
private long timer;
private plasmaSearchProfile() {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
timer = 0;
}
public plasmaSearchProfile(long time, int count) {
this(
3 * time / 12, 10 * count,
1 * time / 12, 10 * count,
1 * time / 12, 10 * count,
2 * time / 12, 5 * count,
4 * time / 12, count,
1 * time / 12, 1
);
}
public plasmaSearchProfile(
long time_collection, int count_collection,
long time_join, int count_join,
long time_presort, int count_presort,
long time_urlfetch, int count_urlfetch,
long time_postsort, int count_postsort,
long time_snippetfetch, int count_snippetfetch) {
this();
targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
}
public Object clone() {
plasmaSearchProfile p = new plasmaSearchProfile();
p.targetTime = (HashMap) this.targetTime.clone();
p.targetCount = (HashMap) this.targetCount.clone();
p.yieldTime = (HashMap) this.yieldTime.clone();
p.yieldCount = (HashMap) this.yieldCount.clone();
return (Object) p;
}
public plasmaSearchProfile(String s) {
targetTime = new HashMap();
targetCount = new HashMap();
yieldTime = new HashMap();
yieldCount = new HashMap();
intoMap(s, targetTime, targetCount);
}
public long duetime() {
// returns the old duetime value as sum of all waiting times
long d = 0;
for (int i = 0; i < sequence.length; i++) {
d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
}
return d;
}
public void putYield(String s) {
intoMap(s, yieldTime, yieldCount);
}
public String yieldToString() {
return toString(yieldTime, yieldCount);
}
public String targetToString() {
return toString(targetTime, targetCount);
}
public long getTargetTime(char type) {
// sum up all time that was demanded and subtract all that had been wasted
long sum = 0;
Long t;
Character element;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
t = (Long) targetTime.get(element);
if (t != null) sum += t.longValue();
if (type == sequence[i]) return (sum < 0) ? 0 : sum;
t = (Long) yieldTime.get(element);
if (t != null) sum -= t.longValue();
}
return 0;
}
public int getTargetCount(char type) {
Integer i = (Integer) targetCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public long getYieldTime(char type) {
Long l = (Long) yieldTime.get(new Character(type));
if (l == null) return -1; else return l.longValue();
}
public int getYieldCount(char type) {
Integer i = (Integer) yieldCount.get(new Character(type));
if (i == null) return -1; else return i.intValue();
}
public void startTimer() {
this.timer = System.currentTimeMillis();
}
public void setYieldTime(char type) {
// sets a time that is computed using the timer
long t = System.currentTimeMillis() - this.timer;
yieldTime.put(new Character(type), new Long(t));
}
public void setYieldCount(char type, int count) {
yieldCount.put(new Character(type), new Integer(count));
}
public String reportToString() {
return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
}
public static String toString(HashMap time, HashMap count) {
// put this into a format in such a way that it can be send in a http header or post argument
// that means that no '=' or spaces are allowed
StringBuffer sb = new StringBuffer(sequence.length * 10);
Character element;
Integer xi;
Long xl;
for (int i = 0; i < sequence.length; i++) {
element = new Character(sequence[i]);
sb.append("t");
sb.append(element);
xl = (Long) time.get(element);
sb.append((xl == null) ? "0" : xl.toString());
sb.append("|");
sb.append("c");
sb.append(element);
xi = (Integer) count.get(element);
sb.append((xi == null) ? "0" : xi.toString());
sb.append("|");
}
return sb.toString();
}
public static void intoMap(String s, HashMap time, HashMap count) {
// this is the reverse method to toString
int p = 0;
char ct;
String elt;
String v;
int p1;
while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
ct = s.charAt(p);
elt = s.substring(p + 1, p + 2);
v = s.substring(p + 2, p1);
if (ct == 't') {
time.put(elt, new Long(Long.parseLong(v)));
} else {
count.put(elt, new Integer(Integer.parseInt(v)));
}
}
}
}

@ -135,7 +135,6 @@ public final class plasmaSearchResult {
String[] urlcomps;
String[] descrcomps;
long ranking;
long inc = 4096 * 4096;
String queryhash;
for (int i = 0; i < results.size(); i++) {
// take out values from result array
@ -147,14 +146,10 @@ public final class plasmaSearchResult {
// apply pre-calculated order attributes
ranking = 0;
if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking = 4096 * indexEntry.getQuality();
else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking = 4096 * indexEntry.getVirtualAge();
if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += indexEntry.getQuality();
else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge();
// apply 'common-sense' heuristic using references
for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc;
for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc;
for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking++;
for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking++;
// apply query-in-result matching
Set urlcomph = plasmaSearchQuery.words2hashes(urlcomps);
@ -162,8 +157,8 @@ public final class plasmaSearchResult {
Iterator shi = query.queryHashes.iterator();
while (shi.hasNext()) {
queryhash = (String) shi.next();
if (urlcomph.contains(queryhash)) ranking += 10 * inc;
if (descrcomph.contains(queryhash)) ranking += 100 * inc;
if (urlcomph.contains(queryhash)) ranking += 10;
if (descrcomph.contains(queryhash)) ranking += 100;
}
// insert value

@ -168,7 +168,7 @@ public class plasmaSnippetCache {
return new result(line, source, null);
}
public synchronized void storeToCache(String wordhashes, String urlhash, String snippet) {
public void storeToCache(String wordhashes, String urlhash, String snippet) {
// generate key
String key = urlhash + wordhashes;
@ -371,13 +371,14 @@ public class plasmaSnippetCache {
this.log);
}
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) {
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) {
// fetch snippets
int i = 0;
plasmaCrawlLURL.Entry urlentry;
String urlstring;
plasmaSnippetCache.result snippet;
while ((acc.hasMoreElements()) && (i < fetchcount)) {
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) {
urlentry = acc.nextElement();
if (urlentry.url().getHost().endsWith(".yacyh")) continue;
urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());

@ -1443,55 +1443,21 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (date == null) return ""; else return DateFormatter.format(date);
}
/*
public class presearch extends Thread {
Set queryhashes;
char[] order;
String urlmask;
long time;
int searchcount, fetchcount;
public presearch(Set queryhashes, char[] order, long time, String urlmask, int searchcount, int fetchcount) {
this.queryhashes = queryhashes;
this.order = order;
this.urlmask = urlmask;
this.time = time;
this.searchcount = searchcount;
this.fetchcount = fetchcount;
}
public void run() {
plasmaWordIndexEntity idx = null;
try {
// search the database locally
log.logFine("presearch: started job");
idx = searchManager.searchHashes(queryhashes, time);
log.logFine("presearch: found " + idx.size() + " results");
plasmaSearchResult acc = searchManager.order(idx, queryhashes, stopwords, order, time, searchcount);
if (acc == null) return;
log.logFine("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch");
// take some elements and fetch the snippets
snippetCache.fetch(acc, queryhashes, urlmask, fetchcount);
} catch (IOException e) {
log.logSevere("presearch: failed", e);
} finally {
if (idx != null) try { idx.close(); } catch (Exception e){}
}
log.logFine("presearch: job terminated");
}
}
*/
//public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) {
public serverObjects searchFromLocal(plasmaSearchQuery query) {
// tell all threads to do nothing for a specific time
//log.logInfo("A");
wordIndex.intermission(2 * query.maximumTime);
//log.logInfo("B");
intermissionAllThreads(2 * query.maximumTime);
//log.logInfo("C");
serverObjects prop = new serverObjects();
try {
//log.logInfo("D");
try {
// filter out words that appear in bluelist
//log.logInfo("E");
query.filterOut(blueList);
// log
@ -1510,8 +1476,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
plasmaSearchResult acc = theSearch.search();
// fetch snippets
if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT)
snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10);
//if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10, 1000);
log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// result is a List of urlEntry elements: prepare answer
@ -1531,8 +1496,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String host, hash, address, descr = "";
yacySeed seed;
plasmaSnippetCache.result snippet;
long targetTime = timestamp + query.maximumTime;
if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 5000;
//kelondroMScoreCluster ref = new kelondroMScoreCluster();
while ((acc.hasMoreElements()) && (i < query.wantedResults)) {
while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) {
urlentry = acc.nextElement();
url = urlentry.url();
urlhash = urlentry.hash();
@ -1639,15 +1606,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
wordIndex.intermission(2 * query.maximumTime);
intermissionAllThreads(2 * query.maximumTime);
query.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
serverObjects prop = new serverObjects();
try {
log.logInfo("INIT HASH SEARCH: " + query.queryHashes + " - " + query.wantedResults + " links");
long timestamp = System.currentTimeMillis();
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, log, wordIndex, urlPool.loadedURL, snippetCache);
int idxc = theSearch.localSearch(query.maximumTime * 8 / 10);
long remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp);
if (remainingTime < 500) remainingTime = 500;
plasmaSearchResult acc = theSearch.order(remainingTime, 10);
int idxc = theSearch.localSearch();
plasmaSearchResult acc = theSearch.order();
// result is a List of urlEntry elements
if (acc == null) {

@ -148,23 +148,29 @@ public final class plasmaWordIndex {
return condenser.getWords().size();
}
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {
return ramCache.getIndex(wordHash, deleteIfEmpty);
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
return ramCache.getIndex(wordHash, deleteIfEmpty, maxTime);
}
public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty) {
public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
// retrieve entities that belong to the hashes
HashSet entities = new HashSet();
String singleHash;
plasmaWordIndexEntity singleEntity;
Iterator i = wordHashes.iterator();
long start = System.currentTimeMillis();
long remaining;
while (i.hasNext()) {
// check time
remaining = maxTime - (System.currentTimeMillis() - start);
if ((maxTime > 0) && (remaining <= 0)) break;
// get next hash:
singleHash = (String) i.next();
// retrieve index
singleEntity = getEntity(singleHash, true);
singleEntity = getEntity(singleHash, true, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - entities.size()));
// check result
if (((singleEntity == null) || (singleEntity.size() == 0)) && (interruptIfEmpty)) return null;

@ -158,18 +158,20 @@ public final class plasmaWordIndexAssortmentCluster {
if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer);
if (newContainer == null) return null;
newContainer.add(removeFromAll(wordHash));
newContainer.add(removeFromAll(wordHash, -1));
if (newContainer.size() > clusterCapacity) return newContainer;
storeStretched(wordHash, newContainer);
return null;
}
public plasmaWordIndexEntryContainer removeFromAll(String wordHash) {
public plasmaWordIndexEntryContainer removeFromAll(String wordHash, long maxTime) {
// collect all records from all the assortments and return them
plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
if (buffer != null) record.add(buffer);
if (System.currentTimeMillis() > limitTime) break;
}
return record;
}

@ -416,9 +416,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return ((long) intTime) * ((long) 1000) + startTime;
}
private boolean flushFromAssortmentCluster(String key) {
private boolean flushFromAssortmentCluster(String key, long maxTime) {
// this should only be called if the assortment shall be deleted or returned in an index entity
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key);
maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
if (container == null) {
return false;
} else {
@ -428,12 +429,19 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) {
flushThread.pause();
long start = System.currentTimeMillis();
flushFromMem(wordHash);
flushFromAssortmentCluster(wordHash);
if (maxTime < 0) {
flushFromAssortmentCluster(wordHash, -1);
} else {
long remaining = maxTime - (System.currentTimeMillis() - start);
if (remaining > 0) flushFromAssortmentCluster(wordHash, remaining);
}
flushThread.proceed();
return backend.getIndex(wordHash, deleteIfEmpty);
long r = maxTime - (System.currentTimeMillis() - start);
return backend.getIndex(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
}
public long getUpdateTime(String wordHash) {
@ -454,7 +462,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
}
assortmentCluster.removeFromAll(wordHash);
assortmentCluster.removeFromAll(wordHash, -1);
backend.deleteIndex(wordHash);
flushThread.proceed();
}
@ -462,7 +470,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
flushThread.pause();
flushFromMem(wordHash);
flushFromAssortmentCluster(wordHash);
flushFromAssortmentCluster(wordHash, -1);
int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete);
flushThread.proceed();
return removed;
@ -562,7 +570,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else {
// take out all words from the assortment to see if it fits
// together with the extracted assortment
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash);
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash, -1);
if (size + container.size() > assortmentCluster.clusterCapacity) {
// this will also be too big to integrate, add to entity
entity.addEntries(container);

@ -181,7 +181,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
}
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) {
try {
return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
} catch (IOException e) {
@ -210,7 +210,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
plasmaWordIndexEntity pi = null;
int count = 0;
try {
pi = getIndex(wordHash, true);
pi = getIndex(wordHash, true, -1);
for (int i = 0; i < urlHashes.length; i++)
if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
int size = pi.size();

@ -314,7 +314,7 @@ public final class plasmaWordIndexDistribution {
((nexthash = (String) wordHashIterator.next()) != null) &&
(nexthash.trim().length() > 0)
) {
indexEntity = this.wordIndex.getEntity(nexthash, true);
indexEntity = this.wordIndex.getEntity(nexthash, true, -1);
if (indexEntity.size() == 0) {
indexEntity.deleteComplete();
} else if ((indexEntity.size() <= count)|| // if we havn't exceeded the limit
@ -355,7 +355,7 @@ public final class plasmaWordIndexDistribution {
}
} catch (kelondroException e) {
this.log.logSevere("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash(), e);
try {indexEntity.deleteComplete();} catch (IOException ee) {}
indexEntity.deleteComplete();
}
} else {
// make an on-the-fly entity and insert values
@ -389,7 +389,7 @@ public final class plasmaWordIndexDistribution {
tmpEntities.add(tmpEntity);
} catch (kelondroException e) {
this.log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash(), e);
try {indexEntity.deleteComplete();} catch (IOException ee) {}
indexEntity.deleteComplete();
}
indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
indexEntity = null;
@ -427,7 +427,7 @@ public final class plasmaWordIndexDistribution {
urlHashes[c++] = indexEntry.getUrlHash();
}
wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true);
indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true);
indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true, -1);
sz = indexEntity.size();
indexEntity.close();
log.logFine("Deleted partial index (" + c + " URLs) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left");

@ -50,6 +50,7 @@ import java.util.Set;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
public final class plasmaWordIndexEntity {
@ -128,11 +129,7 @@ public final class plasmaWordIndexEntity {
if (theTmpMap == null) {
int size = theIndex.size();
if ((size == 0) && (delete)) {
try {
deleteComplete();
} catch (IOException e) {
delete = false;
}
deleteComplete();
return 0;
} else {
return size;
@ -164,6 +161,7 @@ public final class plasmaWordIndexEntity {
}
public boolean addEntry(plasmaWordIndexEntry entry) throws IOException {
if (entry == null) return false;
if (theTmpMap == null) {
return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes()) == null);
} else {
@ -191,9 +189,9 @@ public final class plasmaWordIndexEntity {
return count;
}
public boolean deleteComplete() throws IOException {
public boolean deleteComplete() {
if (theTmpMap == null) {
theIndex.close();
try {theIndex.close();} catch (IOException e) {}
// remove file
boolean success = theLocation.delete();
// and also the paren directory if that is empty
@ -257,10 +255,7 @@ public final class plasmaWordIndexEntity {
} catch (IOException e) {
i = null;
throw new RuntimeException("dbenum: " + e.getMessage());
} catch (kelondroException e) {
i = null;
throw new RuntimeException("dbenum: " + e.getMessage());
}
}
}
public void remove() {
throw new UnsupportedOperationException();
@ -305,9 +300,13 @@ public final class plasmaWordIndexEntity {
// a time=-1 means: no timeout
Iterator i = otherEntity.elements(true);
long timeout = (time == -1) ? Long.MAX_VALUE : System.currentTimeMillis() + time;
try {
while ((i.hasNext()) && (System.currentTimeMillis() < timeout)) {
addEntry((plasmaWordIndexEntry) i.next());
}
} catch (kelondroException e) {
serverLog.logSevere("PLASMA", "plasmaWordIndexEntity.merge: " + e.getMessage());
}
}
public static plasmaWordIndexEntity joinEntities(Set entities, long time) throws IOException {

@ -50,7 +50,7 @@ public interface plasmaWordIndexInterface {
public Iterator wordHashes(String startWordHash, boolean up);
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime);
public long getUpdateTime(String wordHash);
public void deleteIndex(String wordHash);

@ -182,4 +182,12 @@ public final class serverLog {
if (fileIn != null) try {fileIn.close();}catch(Exception e){}
}
}
public static final String format(String s, int n, int fillChar) {
int l = s.length();
if (l >= n) return s;
StringBuffer sb = new StringBuffer(l + n);
for (int i = l + n; i > n; n--) sb.insert(0, fillChar);
return sb.toString();
}
}

@ -58,6 +58,7 @@ import de.anomic.plasma.plasmaWordIndexEntry;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaSearchProfile;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.tools.crypt;
@ -337,9 +338,9 @@ public final class yacyClient {
}
}
public static int search(String wordhashes, int count, boolean global, yacySeed targetPeer,
public static int search(String wordhashes, boolean global, yacySeed targetPeer,
plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache,
plasmaURLPattern blacklist, plasmaSnippetCache snippets, long duetime) {
plasmaURLPattern blacklist, plasmaSnippetCache snippets, plasmaSearchProfile profile) {
// send a search request to peer with remote Hash
// this mainly converts the words into word hashes
@ -376,15 +377,17 @@ public final class yacyClient {
"&query=" + wordhashes;
*/
final serverObjects obj = new serverObjects(9);
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", count);
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("ttl", "0");
obj.put("duetime", Long.toString(duetime));
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
long duetime = profile.duetime();
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", profile.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT));
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("ttl", "0");
obj.put("duetime", Long.toString(duetime));
obj.put("profile", profile.targetToString()); // new duetimes splitted by specific search tasks
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
//yacyCore.log.logDebug("yacyClient.search url=" + url);
final long timestamp = System.currentTimeMillis();
@ -400,7 +403,11 @@ public final class yacyClient {
obj
)
);
// compute all computation times
final long totalrequesttime = System.currentTimeMillis() - timestamp;
String returnProfile = (String) result.get("profile");
if (returnProfile != null) profile.putYield(returnProfile);
/*
HashMap result = nxTools.table(httpc.wget(new URL(url),
@ -464,7 +471,7 @@ public final class yacyClient {
} catch (NumberFormatException e) {
searchtime = totalrequesttime;
}
yacyCore.log.logFine("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
yacyCore.log.logFine("SEARCH " + results + " URLS FROM " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
return results;
} catch (Exception e) {
yacyCore.log.logSevere("yacyClient.search error: '" + targetPeer.get(yacySeed.NAME, "anonymous") + "' failed - " + e);

@ -53,12 +53,12 @@ import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaWordIndexEntity;
import de.anomic.plasma.plasmaSearchProfile;
import de.anomic.server.logging.serverLog;
public class yacySearch extends Thread {
final private Set wordhashes;
final private int count;
final private boolean global;
final private plasmaCrawlLURL urlManager;
final private plasmaWordIndexEntity entityCache;
@ -66,13 +66,12 @@ public class yacySearch extends Thread {
final private plasmaSnippetCache snippetCache;
final private yacySeed targetPeer;
private int links;
final private long duetime;
final private plasmaSearchProfile profile;
public yacySearch(Set wordhashes, int count, boolean global, yacySeed targetPeer,
plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) {
public yacySearch(Set wordhashes, boolean global, yacySeed targetPeer,
plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) {
super("yacySearch_" + targetPeer.getName());
this.wordhashes = wordhashes;
this.count = count;
this.global = global;
this.urlManager = urlManager;
this.entityCache = entityCache;
@ -80,11 +79,11 @@ public class yacySearch extends Thread {
this.snippetCache = snippetCache;
this.targetPeer = targetPeer;
this.links = -1;
this.duetime = duetime;
this.profile = (plasmaSearchProfile) profile.clone();
}
public void run() {
this.links = yacyClient.search(set2string(wordhashes), count, global, targetPeer, urlManager, entityCache, blacklist, snippetCache, duetime);
this.links = yacyClient.search(set2string(wordhashes), global, targetPeer, urlManager, entityCache, blacklist, snippetCache, profile);
if (links != 0) {
//yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + links + " links for word hash " + wordhashes);
yacyCore.seedDB.mySeed.incRI(links);
@ -103,6 +102,10 @@ public class yacySearch extends Thread {
return this.links;
}
public plasmaSearchProfile profile() {
return this.profile;
}
private static yacySeed[] selectPeers(Set wordhashes, int seedcount) {
// find out a specific number of seeds, that would be relevant for the given word hash(es)
// the result is ordered by relevance: [0] is most relevant
@ -166,7 +169,7 @@ public class yacySearch extends Thread {
}
public static yacySearch[] searchHashes(Set wordhashes, plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache,
int count, int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) {
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) {
// check own peer status
if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getAddress() == null) { return null; }
@ -178,8 +181,8 @@ public class yacySearch extends Thread {
if (targets == 0) return null;
yacySearch[] searchThreads = new yacySearch[targets];
for (int i = 0; i < targets; i++) {
searchThreads[i]= new yacySearch(wordhashes, count, true, targetPeers[i],
urlManager, entityCache, blacklist, snippetCache, duetime);
searchThreads[i]= new yacySearch(wordhashes, true, targetPeers[i],
urlManager, entityCache, blacklist, snippetCache, profile);
searchThreads[i].start();
try {Thread.currentThread().sleep(20);} catch (InterruptedException e) {}

@ -755,7 +755,7 @@ public final class yacy {
try {
wordCounter++;
wordHash = (String) importWordHashIterator.next();
importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
if (importWordIdxEntity.size() == 0) {
importWordIdxEntity.deleteComplete();
@ -878,7 +878,7 @@ public final class yacy {
try {
wordCounter++;
wordhash = (String) wordHashIterator.next();
wordIdxEntity = wordIndex.getEntity(wordhash, true);
wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
// the combined container will fit, read the container
Iterator wordIdxEntries = wordIdxEntity.elements(true);

Loading…
Cancel
Save