diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 6a5d40995..37516bf74 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -151,7 +151,7 @@ public class IndexControl_p { // generate an urlx array plasmaWordIndexEntity index = null; try { - index = switchboard.wordIndex.getEntity(keyhash, true); + index = switchboard.wordIndex.getEntity(keyhash, true, -1); Iterator en = index.elements(true); int i = 0; urlx = new String[index.size()]; @@ -258,7 +258,7 @@ public class IndexControl_p { plasmaWordIndexEntity[] indexes = new plasmaWordIndexEntity[1]; String result; long starttime = System.currentTimeMillis(); - indexes[0] = switchboard.wordIndex.getEntity(keyhash, true); + indexes[0] = switchboard.wordIndex.getEntity(keyhash, true, -1); // built urlCache Iterator urlIter = indexes[0].elements(true); HashMap knownURLs = new HashMap(); @@ -436,7 +436,7 @@ public class IndexControl_p { // search for a word hash and generate a list of url links plasmaWordIndexEntity index = null; try { - index = switchboard.wordIndex.getEntity(keyhash, true); + index = switchboard.wordIndex.getEntity(keyhash, true, -1); final StringBuffer result = new StringBuffer(1024); if (index.size() == 0) { diff --git a/htroot/NetworkPicture.java b/htroot/NetworkPicture.java index dbd9c9035..e6d0d905b 100644 --- a/htroot/NetworkPicture.java +++ b/htroot/NetworkPicture.java @@ -69,12 +69,14 @@ public class NetworkPicture { int height = 480; int passiveLimit = 300; int potentialLimit = 300; + int maxCount = 1000; if (post != null) { width = post.getInt("width", 640); height = post.getInt("height", 420); passiveLimit = post.getInt("pal", 300); potentialLimit = post.getInt("pol", 300); + maxCount = post.getInt("max", 1000); } int innerradius = Math.min(width, height) / 5; @@ -98,7 +100,6 @@ public class NetworkPicture { //System.out.println("Seed Maximum distance is " + yacySeed.maxDHTDistance); //System.out.println("Seed Minimum distance is " + yacySeed.minDHTNumber); - final int maxCount = 300; yacySeed seed; int angle; long lastseen; diff --git a/htroot/yacy/query.java b/htroot/yacy/query.java index 6a725a9ea..90128c2e6 100644 --- a/htroot/yacy/query.java +++ b/htroot/yacy/query.java @@ -88,7 +88,7 @@ public final class query { // shall contain a word hash, the number of assigned lurls to this hash is returned de.anomic.plasma.plasmaWordIndexEntity entity = null; try { - entity = sb.wordIndex.getEntity(env, true); + entity = sb.wordIndex.getEntity(env, true, -1); prop.put("response", entity.size()); entity.close(); } catch (IOException e) { diff --git a/source/de/anomic/plasma/plasmaDbImporter.java b/source/de/anomic/plasma/plasmaDbImporter.java index d69bcba40..5faddd78d 100644 --- a/source/de/anomic/plasma/plasmaDbImporter.java +++ b/source/de/anomic/plasma/plasmaDbImporter.java @@ -158,7 +158,7 @@ public class plasmaDbImporter extends Thread { try { wordCounter++; wordHash = (String) importWordHashIterator.next(); - importWordIdxEntity = importWordIndex.getEntity(wordHash, true); + importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1); if (importWordIdxEntity.size() == 0) { importWordIdxEntity.deleteComplete(); diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 984646ccd..74acfcaff 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -63,6 +63,7 @@ public final class plasmaSearchEvent { private plasmaCrawlLURL urlStore; private plasmaSnippetCache snippetCache; private plasmaWordIndexEntity rcLocal, rcGlobal; // caches for results + private plasmaSearchProfile profileLocal, profileGlobal; private yacySearch[] searchThreads; public plasmaSearchEvent(plasmaSearchQuery query, serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL urlStore, plasmaSnippetCache snippetCache) { @@ -73,6 +74,13 @@ public final class plasmaSearchEvent { this.snippetCache = snippetCache; this.rcLocal = new plasmaWordIndexEntity(null); this.rcGlobal = new plasmaWordIndexEntity(null); + if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) { + this.profileLocal = new plasmaSearchProfile(4 * query.maximumTime / 10, query.wantedResults); + this.profileGlobal = new plasmaSearchProfile(6 * query.maximumTime / 10, query.wantedResults); + } else { + this.profileLocal = new plasmaSearchProfile(query.maximumTime, query.wantedResults); + this.profileGlobal = null; + } this.searchThreads = null; } @@ -80,9 +88,8 @@ public final class plasmaSearchEvent { // combine all threads if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) { - int fetchcount = ((int) (query.maximumTime / 1000L)) * 5; // number of wanted results until break in search - int fetchpeers = ((int) (query.maximumTime / 1000L)) * 2; // number of target peers; means 30 peers in 10 seconds - long fetchtime = query.maximumTime * 6 / 10; // time to waste + int fetchpeers = (int) (query.maximumTime / 1000L); // number of target peers; means 10 peers in 10 seconds + if (fetchpeers > 10) fetchpeers = 10; // remember time long start = System.currentTimeMillis(); @@ -91,16 +98,12 @@ public final class plasmaSearchEvent { serverInstantThread.oneTimeJob(this, "localSearch", log, 0); // do a global search - int globalContributions = globalSearch(fetchcount, fetchpeers, fetchtime); + int globalContributions = globalSearch(fetchpeers); log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); try { // combine the result and order - long remainingTime = query.maximumTime - (System.currentTimeMillis() - start); - if (remainingTime < 500) remainingTime = 500; - if (remainingTime > 3000) remainingTime = 3000; - - plasmaSearchResult result = order(remainingTime, query.wantedResults); + plasmaSearchResult result = order(); result.globalContributions = globalContributions; result.localContributions = rcLocal.size(); @@ -112,6 +115,7 @@ public final class plasmaSearchEvent { rcLocal = null; // return search result + log.logFine("SEARCHRESULT: " + profileLocal.reportToString()); return result; } catch (IOException e) { return null; @@ -120,14 +124,16 @@ public final class plasmaSearchEvent { // do a local search long start = System.currentTimeMillis(); try { - localSearch(query.maximumTime); - plasmaSearchResult result = order(query.maximumTime - (System.currentTimeMillis() - start), query.wantedResults); + localSearch(); + plasmaSearchResult result = order(); result.localContributions = rcLocal.size(); // clean up if ((rcLocal != null) && (!(rcLocal.isTMPEntity()))) rcLocal.close(); rcLocal = null; + // return search result + log.logFine("SEARCHRESULT: " + profileLocal.reportToString()); return result; } catch (IOException e) { return null; @@ -135,19 +141,14 @@ public final class plasmaSearchEvent { } } - - public void localSearch() throws IOException { - // method called by a one-time - localSearch(query.maximumTime * 6 / 10); - } - - public int localSearch(long time) throws IOException { + public int localSearch() throws IOException { // search for the set of hashes and return an array of urlEntry elements - long stamp = System.currentTimeMillis(); - // retrieve entities that belong to the hashes - Set entities = wordIndex.getEntities(query.queryHashes, true, true); + profileLocal.startTimer(); + Set entities = wordIndex.getEntities(query.queryHashes, true, true, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_COLLECTION)); + profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_COLLECTION); + profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_COLLECTION, (entities == null) ? 0 : entities.size()); // since this is a conjunction we return an empty entity if any word is not known if (entities == null) { @@ -156,31 +157,28 @@ public final class plasmaSearchEvent { } // join the result - long remainingTime = time - (System.currentTimeMillis() - stamp); - if (remainingTime < 1000) remainingTime = 1000; - rcLocal = plasmaWordIndexEntity.joinEntities(entities, remainingTime); - log.logFine("SEARCH TIME FOR FINDING " + rcLocal.size() + " ELEMENTS: " + ((System.currentTimeMillis() - stamp) / 1000) + " seconds"); - + profileLocal.startTimer(); + rcLocal = plasmaWordIndexEntity.joinEntities(entities, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_JOIN)); + profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_JOIN); + profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_JOIN, rcLocal.size()); + return rcLocal.size(); } - public int globalSearch(int fetchcount, int fetchpeers, long timelimit) { + public int globalSearch(int fetchpeers) { // do global fetching // the result of the fetch is then in the rcGlobal if (fetchpeers < 10) fetchpeers = 10; - if (fetchcount > query.wantedResults * 10) fetchcount = query.wantedResults * 10; - - // set a duetime for clients - long duetime = timelimit - 4000; // subtract network traffic overhead, guessed 4 seconds - if (duetime < 1000) { duetime = 1000; } + + log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS"); - long timeout = System.currentTimeMillis() + timelimit; - searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchcount, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, duetime); + long timeout = System.currentTimeMillis() + profileGlobal.duetime() + 4000; + searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal); // wait until wanted delay passed or wanted result appeared while (System.currentTimeMillis() < timeout) { // check if all threads have been finished or results so far are enough - if (rcGlobal.size() >= fetchcount * 3) break; // we have enough + if (rcGlobal.size() >= profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) * 3) break; // we have enough if (yacySearch.remainingWaiting(searchThreads) == 0) break; // we cannot expect more // wait a little time .. try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {} @@ -189,7 +187,7 @@ public final class plasmaSearchEvent { return rcGlobal.size(); } - public plasmaSearchResult order(long maxTime, int minEntries) throws IOException { + public plasmaSearchResult order() throws IOException { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime @@ -197,19 +195,29 @@ public final class plasmaSearchEvent { searchResult.merge(rcLocal, -1); searchResult.merge(rcGlobal, -1); + long preorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_PRESORT); + long postorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_POSTSORT); + + profileLocal.startTimer(); + plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query); + preorder.addEntity(searchResult, preorderTime); + profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_PRESORT); + profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_PRESORT, rcLocal.size()); + + profileLocal.startTimer(); plasmaSearchResult acc = new plasmaSearchResult(query); if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty if (searchResult.size() == 0) return acc; // case that we have nothing to do - Iterator e = searchResult.elements(true); + // start url-fetch plasmaWordIndexEntry entry; - long startCreateTime = System.currentTimeMillis(); + long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + postorderTime; plasmaCrawlLURL.Entry page; + int minEntries = profileLocal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT); try { - while (e.hasNext()) { - if ((acc.sizeFetched() >= minEntries) && - (System.currentTimeMillis() - startCreateTime >= maxTime)) break; - entry = (plasmaWordIndexEntry) e.next(); + while (preorder.hasNext()) { + if ((acc.sizeFetched() >= minEntries) && (System.currentTimeMillis() >= postorderLimitTime)) break; + entry = (plasmaWordIndexEntry) preorder.next(); // find the url entry page = urlStore.getEntry(entry.getUrlHash()); // add a result @@ -218,10 +226,15 @@ public final class plasmaSearchEvent { } catch (kelondroException ee) { serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee); } - long startSortTime = System.currentTimeMillis(); + profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_URLFETCH); + profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_URLFETCH, acc.sizeFetched()); + + // start postsorting + profileLocal.startTimer(); acc.sortResults(); - serverLog.logFine("PLASMA", "plasmaSearchEvent.order: minEntries = " + minEntries + ", effectiveEntries = " + acc.sizeOrdered() + ", demanded Time = " + maxTime + ", effectiveTime = " + (System.currentTimeMillis() - startCreateTime) + ", createTime = " + (startSortTime - startCreateTime) + ", sortTime = " + (System.currentTimeMillis() - startSortTime)); - return acc; + profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_POSTSORT); + profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_POSTSORT, acc.sizeOrdered()); + return acc; } public void flushResults() { @@ -229,32 +242,39 @@ public final class plasmaSearchEvent { // this must be called after search results had been computed // it is wise to call this within a separate thread because this method waits untill all if (searchThreads == null) return; - - // wait untill all threads are finished + + // wait until all threads are finished int remaining; + int count = 0; + String wordHash; long starttime = System.currentTimeMillis(); while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) { - try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {} + // flush the rcGlobal as much as is there so far + synchronized (rcGlobal) { + Iterator hashi = query.queryHashes.iterator(); + while (hashi.hasNext()) { + wordHash = (String) hashi.next(); + Iterator i = rcGlobal.elements(true); + plasmaWordIndexEntry entry; + while (i.hasNext()) { + entry = (plasmaWordIndexEntry) i.next(); + wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false); + } + } + // the rcGlobal was flushed, empty it + count += rcGlobal.size(); + rcGlobal.deleteComplete(); + } + // wait a little bit before trying again + try {Thread.currentThread().sleep(3000);} catch (InterruptedException e) {} if (System.currentTimeMillis() - starttime > 90000) { yacySearch.interruptAlive(searchThreads); - serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED"); + serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED; SEARCH WAS " + query.queryWords); break; } } - // now flush the rcGlobal into wordIndex - Iterator hashi = query.queryHashes.iterator(); - String wordHash; - while (hashi.hasNext()) { - wordHash = (String) hashi.next(); - Iterator i = rcGlobal.elements(true); - plasmaWordIndexEntry entry; - while (i.hasNext()) { - entry = (plasmaWordIndexEntry) i.next(); - wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false); - } - } - serverLog.logFine("PLASMA", "FINISHED FLUSHING " + rcGlobal.size() + " GLOBAL SEARCH RESULTS"); + serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords); // finally delete the temporary index rcGlobal = null; diff --git a/source/de/anomic/plasma/plasmaSearchPreOrder.java b/source/de/anomic/plasma/plasmaSearchPreOrder.java new file mode 100644 index 000000000..5bc4f8de2 --- /dev/null +++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java @@ -0,0 +1,103 @@ +// plasmaSearchPreOder.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// Created: 23.10.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.plasma; + +import java.util.TreeMap; +import java.util.Set; +import java.util.HashSet; +import java.util.ArrayList; +import java.util.Iterator; +import java.net.URL; + +import de.anomic.kelondro.kelondroMScoreCluster; +import de.anomic.server.serverCodings; + +public final class plasmaSearchPreOrder { + + private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry + private plasmaSearchQuery query; + + public plasmaSearchPreOrder(plasmaSearchQuery query) { + this.pageAcc = new TreeMap(); + this.query = query; + } + + public plasmaSearchPreOrder cloneSmart() { + // clones only the top structure + plasmaSearchPreOrder theClone = new plasmaSearchPreOrder(query); + theClone.pageAcc = (TreeMap) this.pageAcc.clone(); + return theClone; + } + + + public boolean hasNext() { + return pageAcc.size() > 0; + } + + public plasmaWordIndexEntry next() { + Object top = pageAcc.lastKey(); + return (plasmaWordIndexEntry) pageAcc.remove(top); + } + + public void addEntity(plasmaWordIndexEntity entity, long maxTime) { + Iterator i = entity.elements(true); + long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; + plasmaWordIndexEntry entry; + while (i.hasNext()) { + if (System.currentTimeMillis() > limitTime) break; + entry = (plasmaWordIndexEntry) i.next(); + addEntry(entry); + } + } + + public void addEntry(plasmaWordIndexEntry indexEntry) { + long ranking = 0; + if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking = 4096 * indexEntry.getQuality(); + else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking = 4096 * indexEntry.getVirtualAge(); + if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += indexEntry.getQuality(); + else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge(); + pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), indexEntry); + } + + +} \ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaSearchProfile.java b/source/de/anomic/plasma/plasmaSearchProfile.java new file mode 100644 index 000000000..5b910d002 --- /dev/null +++ b/source/de/anomic/plasma/plasmaSearchProfile.java @@ -0,0 +1,276 @@ +// plasmaSearchProfile.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// Created: 17.10.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.plasma; + +import java.util.HashMap; +import java.lang.StringBuffer; +import java.lang.Cloneable; + +/** + * + * This class provides timing properties for search processes + * It shall be used to initiate a search and also to evaluate + * the real obtained timings after a search is performed + */ + +public class plasmaSearchProfile implements Cloneable { + + // collection: + // time = time to get a RWI out of RAM cache, assortments and WORDS files + // count = maximum number of RWI-entries that shall be collected + + // join + // time = time to perform the join between all collected RWIs + // count = maximum number of entries that shall be joined + + // presort: + // time = time to do a sort of the joined URL-records + // count = maximum number of entries that shall be pre-sorted + + // urlfetch: + // time = time to fetch the real URLs from the LURL database + // count = maximum number of urls that shall be fetched + + // postsort: + // time = time for final sort of URLs + // count = maximum number oof URLs that shall be retrieved during sort + + // snippetfetch: + // time = time to fetch snippets for selected URLs + // count = maximum number of snipptes to be fetched + + public static final char PROCESS_COLLECTION = 'c'; + public static final char PROCESS_JOIN = 'j'; + public static final char PROCESS_PRESORT = 'r'; + public static final char PROCESS_URLFETCH = 'u'; + public static final char PROCESS_POSTSORT = 'o'; + public static final char PROCESS_SNIPPETFETCH = 's'; + + public static char[] sequence = new char[]{ + PROCESS_COLLECTION, + PROCESS_JOIN, + PROCESS_PRESORT, + PROCESS_URLFETCH, + PROCESS_POSTSORT, + PROCESS_SNIPPETFETCH + }; + + private HashMap targetTime; + private HashMap targetCount; + private HashMap yieldTime; + private HashMap yieldCount; + private long timer; + + private plasmaSearchProfile() { + targetTime = new HashMap(); + targetCount = new HashMap(); + yieldTime = new HashMap(); + yieldCount = new HashMap(); + timer = 0; + } + + public plasmaSearchProfile(long time, int count) { + this( + 3 * time / 12, 10 * count, + 1 * time / 12, 10 * count, + 1 * time / 12, 10 * count, + 2 * time / 12, 5 * count, + 4 * time / 12, count, + 1 * time / 12, 1 + ); + } + + public plasmaSearchProfile( + long time_collection, int count_collection, + long time_join, int count_join, + long time_presort, int count_presort, + long time_urlfetch, int count_urlfetch, + long time_postsort, int count_postsort, + long time_snippetfetch, int count_snippetfetch) { + this(); + + targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection)); + targetTime.put(new Character(PROCESS_JOIN), new Long(time_join)); + targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort)); + targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch)); + targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort)); + targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch)); + targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection)); + targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join)); + targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort)); + targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch)); + targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort)); + targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch)); + + } + + public Object clone() { + plasmaSearchProfile p = new plasmaSearchProfile(); + p.targetTime = (HashMap) this.targetTime.clone(); + p.targetCount = (HashMap) this.targetCount.clone(); + p.yieldTime = (HashMap) this.yieldTime.clone(); + p.yieldCount = (HashMap) this.yieldCount.clone(); + return (Object) p; + } + + public plasmaSearchProfile(String s) { + targetTime = new HashMap(); + targetCount = new HashMap(); + yieldTime = new HashMap(); + yieldCount = new HashMap(); + + intoMap(s, targetTime, targetCount); + } + + public long duetime() { + // returns the old duetime value as sum of all waiting times + long d = 0; + for (int i = 0; i < sequence.length; i++) { + d += ((Long) targetTime.get(new Character(sequence[i]))).longValue(); + } + return d; + } + + public void putYield(String s) { + intoMap(s, yieldTime, yieldCount); + } + + public String yieldToString() { + return toString(yieldTime, yieldCount); + } + + public String targetToString() { + return toString(targetTime, targetCount); + } + + public long getTargetTime(char type) { + // sum up all time that was demanded and subtract all that had been wasted + long sum = 0; + Long t; + Character element; + for (int i = 0; i < sequence.length; i++) { + element = new Character(sequence[i]); + t = (Long) targetTime.get(element); + if (t != null) sum += t.longValue(); + if (type == sequence[i]) return (sum < 0) ? 0 : sum; + t = (Long) yieldTime.get(element); + if (t != null) sum -= t.longValue(); + } + return 0; + } + + public int getTargetCount(char type) { + Integer i = (Integer) targetCount.get(new Character(type)); + if (i == null) return -1; else return i.intValue(); + } + + public long getYieldTime(char type) { + Long l = (Long) yieldTime.get(new Character(type)); + if (l == null) return -1; else return l.longValue(); + } + + public int getYieldCount(char type) { + Integer i = (Integer) yieldCount.get(new Character(type)); + if (i == null) return -1; else return i.intValue(); + } + + public void startTimer() { + this.timer = System.currentTimeMillis(); + } + + public void setYieldTime(char type) { + // sets a time that is computed using the timer + long t = System.currentTimeMillis() - this.timer; + yieldTime.put(new Character(type), new Long(t)); + } + + public void setYieldCount(char type, int count) { + yieldCount.put(new Character(type), new Integer(count)); + } + + public String reportToString() { + return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount); + } + + public static String toString(HashMap time, HashMap count) { + // put this into a format in such a way that it can be send in a http header or post argument + // that means that no '=' or spaces are allowed + StringBuffer sb = new StringBuffer(sequence.length * 10); + Character element; + Integer xi; + Long xl; + for (int i = 0; i < sequence.length; i++) { + element = new Character(sequence[i]); + sb.append("t"); + sb.append(element); + xl = (Long) time.get(element); + sb.append((xl == null) ? "0" : xl.toString()); + sb.append("|"); + sb.append("c"); + sb.append(element); + xi = (Integer) count.get(element); + sb.append((xi == null) ? "0" : xi.toString()); + sb.append("|"); + } + return sb.toString(); + } + + public static void intoMap(String s, HashMap time, HashMap count) { + // this is the reverse method to toString + int p = 0; + char ct; + String elt; + String v; + int p1; + while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) { + ct = s.charAt(p); + elt = s.substring(p + 1, p + 2); + v = s.substring(p + 2, p1); + if (ct == 't') { + time.put(elt, new Long(Long.parseLong(v))); + } else { + count.put(elt, new Integer(Integer.parseInt(v))); + } + } + } + +} diff --git a/source/de/anomic/plasma/plasmaSearchResult.java b/source/de/anomic/plasma/plasmaSearchResult.java index 1b7b14e78..16760812b 100644 --- a/source/de/anomic/plasma/plasmaSearchResult.java +++ b/source/de/anomic/plasma/plasmaSearchResult.java @@ -135,7 +135,6 @@ public final class plasmaSearchResult { String[] urlcomps; String[] descrcomps; long ranking; - long inc = 4096 * 4096; String queryhash; for (int i = 0; i < results.size(); i++) { // take out values from result array @@ -147,14 +146,10 @@ public final class plasmaSearchResult { // apply pre-calculated order attributes ranking = 0; - if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking = 4096 * indexEntry.getQuality(); - else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking = 4096 * indexEntry.getVirtualAge(); - if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += indexEntry.getQuality(); - else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge(); // apply 'common-sense' heuristic using references - for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc; - for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc; + for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking++; + for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking++; // apply query-in-result matching Set urlcomph = plasmaSearchQuery.words2hashes(urlcomps); @@ -162,8 +157,8 @@ public final class plasmaSearchResult { Iterator shi = query.queryHashes.iterator(); while (shi.hasNext()) { queryhash = (String) shi.next(); - if (urlcomph.contains(queryhash)) ranking += 10 * inc; - if (descrcomph.contains(queryhash)) ranking += 100 * inc; + if (urlcomph.contains(queryhash)) ranking += 10; + if (descrcomph.contains(queryhash)) ranking += 100; } // insert value diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index b31cef964..cb1be2de0 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -168,7 +168,7 @@ public class plasmaSnippetCache { return new result(line, source, null); } - public synchronized void storeToCache(String wordhashes, String urlhash, String snippet) { + public void storeToCache(String wordhashes, String urlhash, String snippet) { // generate key String key = urlhash + wordhashes; @@ -371,13 +371,14 @@ public class plasmaSnippetCache { this.log); } - public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) { + public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) { // fetch snippets int i = 0; plasmaCrawlLURL.Entry urlentry; String urlstring; plasmaSnippetCache.result snippet; - while ((acc.hasMoreElements()) && (i < fetchcount)) { + long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; + while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) { urlentry = acc.nextElement(); if (urlentry.url().getHost().endsWith(".yacyh")) continue; urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url()); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d8e9a6853..221143904 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1443,55 +1443,21 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (date == null) return ""; else return DateFormatter.format(date); } - /* - public class presearch extends Thread { - Set queryhashes; - char[] order; - String urlmask; - long time; - int searchcount, fetchcount; - public presearch(Set queryhashes, char[] order, long time, String urlmask, int searchcount, int fetchcount) { - this.queryhashes = queryhashes; - this.order = order; - this.urlmask = urlmask; - this.time = time; - this.searchcount = searchcount; - this.fetchcount = fetchcount; - } - public void run() { - plasmaWordIndexEntity idx = null; - try { - // search the database locally - log.logFine("presearch: started job"); - idx = searchManager.searchHashes(queryhashes, time); - log.logFine("presearch: found " + idx.size() + " results"); - plasmaSearchResult acc = searchManager.order(idx, queryhashes, stopwords, order, time, searchcount); - if (acc == null) return; - log.logFine("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch"); - - // take some elements and fetch the snippets - snippetCache.fetch(acc, queryhashes, urlmask, fetchcount); - } catch (IOException e) { - log.logSevere("presearch: failed", e); - } finally { - if (idx != null) try { idx.close(); } catch (Exception e){} - } - log.logFine("presearch: job terminated"); - } - } - - */ - //public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) { public serverObjects searchFromLocal(plasmaSearchQuery query) { // tell all threads to do nothing for a specific time + //log.logInfo("A"); wordIndex.intermission(2 * query.maximumTime); + //log.logInfo("B"); intermissionAllThreads(2 * query.maximumTime); - + //log.logInfo("C"); + serverObjects prop = new serverObjects(); - try { + //log.logInfo("D"); + try { // filter out words that appear in bluelist + //log.logInfo("E"); query.filterOut(blueList); // log @@ -1510,8 +1476,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser plasmaSearchResult acc = theSearch.search(); // fetch snippets - if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) - snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10); + //if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10, 1000); log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // result is a List of urlEntry elements: prepare answer @@ -1531,8 +1496,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String host, hash, address, descr = ""; yacySeed seed; plasmaSnippetCache.result snippet; + long targetTime = timestamp + query.maximumTime; + if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 5000; //kelondroMScoreCluster ref = new kelondroMScoreCluster(); - while ((acc.hasMoreElements()) && (i < query.wantedResults)) { + while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) { urlentry = acc.nextElement(); url = urlentry.url(); urlhash = urlentry.hash(); @@ -1639,15 +1606,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser wordIndex.intermission(2 * query.maximumTime); intermissionAllThreads(2 * query.maximumTime); + query.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; + serverObjects prop = new serverObjects(); try { log.logInfo("INIT HASH SEARCH: " + query.queryHashes + " - " + query.wantedResults + " links"); long timestamp = System.currentTimeMillis(); plasmaSearchEvent theSearch = new plasmaSearchEvent(query, log, wordIndex, urlPool.loadedURL, snippetCache); - int idxc = theSearch.localSearch(query.maximumTime * 8 / 10); - long remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp); - if (remainingTime < 500) remainingTime = 500; - plasmaSearchResult acc = theSearch.order(remainingTime, 10); + int idxc = theSearch.localSearch(); + plasmaSearchResult acc = theSearch.order(); // result is a List of urlEntry elements if (acc == null) { diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 82adbdc26..9827dcdb3 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -148,23 +148,29 @@ public final class plasmaWordIndex { return condenser.getWords().size(); } - public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) { - return ramCache.getIndex(wordHash, deleteIfEmpty); + public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) { + return ramCache.getIndex(wordHash, deleteIfEmpty, maxTime); } - public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty) { + public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { // retrieve entities that belong to the hashes HashSet entities = new HashSet(); String singleHash; plasmaWordIndexEntity singleEntity; Iterator i = wordHashes.iterator(); + long start = System.currentTimeMillis(); + long remaining; while (i.hasNext()) { + // check time + remaining = maxTime - (System.currentTimeMillis() - start); + if ((maxTime > 0) && (remaining <= 0)) break; + // get next hash: singleHash = (String) i.next(); // retrieve index - singleEntity = getEntity(singleHash, true); + singleEntity = getEntity(singleHash, true, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - entities.size())); // check result if (((singleEntity == null) || (singleEntity.size() == 0)) && (interruptIfEmpty)) return null; diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 3265261f0..86ae0b05c 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -158,18 +158,20 @@ public final class plasmaWordIndexAssortmentCluster { if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer); if (newContainer == null) return null; - newContainer.add(removeFromAll(wordHash)); + newContainer.add(removeFromAll(wordHash, -1)); if (newContainer.size() > clusterCapacity) return newContainer; storeStretched(wordHash, newContainer); return null; } - public plasmaWordIndexEntryContainer removeFromAll(String wordHash) { + public plasmaWordIndexEntryContainer removeFromAll(String wordHash, long maxTime) { // collect all records from all the assortments and return them plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash); + long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; for (int i = 0; i < clusterCount; i++) { buffer = assortments[i].remove(wordHash); if (buffer != null) record.add(buffer); + if (System.currentTimeMillis() > limitTime) break; } return record; } diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index f7d1a27ba..fec4861ca 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -416,9 +416,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { return ((long) intTime) * ((long) 1000) + startTime; } - private boolean flushFromAssortmentCluster(String key) { + private boolean flushFromAssortmentCluster(String key, long maxTime) { // this should only be called if the assortment shall be deleted or returned in an index entity - plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key); + maxTime = 8 * maxTime / 10; // reserve time for later adding to backend + plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime); if (container == null) { return false; } else { @@ -428,12 +429,19 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } } - public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) { + public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) { flushThread.pause(); + long start = System.currentTimeMillis(); flushFromMem(wordHash); - flushFromAssortmentCluster(wordHash); + if (maxTime < 0) { + flushFromAssortmentCluster(wordHash, -1); + } else { + long remaining = maxTime - (System.currentTimeMillis() - start); + if (remaining > 0) flushFromAssortmentCluster(wordHash, remaining); + } flushThread.proceed(); - return backend.getIndex(wordHash, deleteIfEmpty); + long r = maxTime - (System.currentTimeMillis() - start); + return backend.getIndex(wordHash, deleteIfEmpty, (r < 0) ? 0 : r); } public long getUpdateTime(String wordHash) { @@ -454,7 +462,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { hashScore.deleteScore(wordHash); hashDate.deleteScore(wordHash); } - assortmentCluster.removeFromAll(wordHash); + assortmentCluster.removeFromAll(wordHash, -1); backend.deleteIndex(wordHash); flushThread.proceed(); } @@ -462,7 +470,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { flushThread.pause(); flushFromMem(wordHash); - flushFromAssortmentCluster(wordHash); + flushFromAssortmentCluster(wordHash, -1); int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete); flushThread.proceed(); return removed; @@ -562,7 +570,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } else { // take out all words from the assortment to see if it fits // together with the extracted assortment - plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash); + plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash, -1); if (size + container.size() > assortmentCluster.clusterCapacity) { // this will also be too big to integrate, add to entity entity.addEntries(container); diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java index 033d3b3c3..d21c20c61 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java @@ -181,7 +181,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface { } } - public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) { + public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) { try { return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty); } catch (IOException e) { @@ -210,7 +210,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface { plasmaWordIndexEntity pi = null; int count = 0; try { - pi = getIndex(wordHash, true); + pi = getIndex(wordHash, true, -1); for (int i = 0; i < urlHashes.length; i++) if (pi.removeEntry(urlHashes[i], deleteComplete)) count++; int size = pi.size(); diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index 5d491d141..3ad53e287 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -314,7 +314,7 @@ public final class plasmaWordIndexDistribution { ((nexthash = (String) wordHashIterator.next()) != null) && (nexthash.trim().length() > 0) ) { - indexEntity = this.wordIndex.getEntity(nexthash, true); + indexEntity = this.wordIndex.getEntity(nexthash, true, -1); if (indexEntity.size() == 0) { indexEntity.deleteComplete(); } else if ((indexEntity.size() <= count)|| // if we havn't exceeded the limit @@ -355,7 +355,7 @@ public final class plasmaWordIndexDistribution { } } catch (kelondroException e) { this.log.logSevere("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash(), e); - try {indexEntity.deleteComplete();} catch (IOException ee) {} + indexEntity.deleteComplete(); } } else { // make an on-the-fly entity and insert values @@ -389,7 +389,7 @@ public final class plasmaWordIndexDistribution { tmpEntities.add(tmpEntity); } catch (kelondroException e) { this.log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash(), e); - try {indexEntity.deleteComplete();} catch (IOException ee) {} + indexEntity.deleteComplete(); } indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards indexEntity = null; @@ -427,7 +427,7 @@ public final class plasmaWordIndexDistribution { urlHashes[c++] = indexEntry.getUrlHash(); } wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true); - indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true); + indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true, -1); sz = indexEntity.size(); indexEntity.close(); log.logFine("Deleted partial index (" + c + " URLs) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left"); diff --git a/source/de/anomic/plasma/plasmaWordIndexEntity.java b/source/de/anomic/plasma/plasmaWordIndexEntity.java index 0947f9aba..a1827969f 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntity.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java @@ -50,6 +50,7 @@ import java.util.Set; import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroException; +import de.anomic.server.logging.serverLog; public final class plasmaWordIndexEntity { @@ -128,11 +129,7 @@ public final class plasmaWordIndexEntity { if (theTmpMap == null) { int size = theIndex.size(); if ((size == 0) && (delete)) { - try { - deleteComplete(); - } catch (IOException e) { - delete = false; - } + deleteComplete(); return 0; } else { return size; @@ -164,6 +161,7 @@ public final class plasmaWordIndexEntity { } public boolean addEntry(plasmaWordIndexEntry entry) throws IOException { + if (entry == null) return false; if (theTmpMap == null) { return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes()) == null); } else { @@ -191,9 +189,9 @@ public final class plasmaWordIndexEntity { return count; } - public boolean deleteComplete() throws IOException { + public boolean deleteComplete() { if (theTmpMap == null) { - theIndex.close(); + try {theIndex.close();} catch (IOException e) {} // remove file boolean success = theLocation.delete(); // and also the paren directory if that is empty @@ -257,10 +255,7 @@ public final class plasmaWordIndexEntity { } catch (IOException e) { i = null; throw new RuntimeException("dbenum: " + e.getMessage()); - } catch (kelondroException e) { - i = null; - throw new RuntimeException("dbenum: " + e.getMessage()); - } + } } public void remove() { throw new UnsupportedOperationException(); @@ -305,9 +300,13 @@ public final class plasmaWordIndexEntity { // a time=-1 means: no timeout Iterator i = otherEntity.elements(true); long timeout = (time == -1) ? Long.MAX_VALUE : System.currentTimeMillis() + time; + try { while ((i.hasNext()) && (System.currentTimeMillis() < timeout)) { addEntry((plasmaWordIndexEntry) i.next()); } + } catch (kelondroException e) { + serverLog.logSevere("PLASMA", "plasmaWordIndexEntity.merge: " + e.getMessage()); + } } public static plasmaWordIndexEntity joinEntities(Set entities, long time) throws IOException { diff --git a/source/de/anomic/plasma/plasmaWordIndexInterface.java b/source/de/anomic/plasma/plasmaWordIndexInterface.java index 86ea3a854..078518d2a 100644 --- a/source/de/anomic/plasma/plasmaWordIndexInterface.java +++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java @@ -50,7 +50,7 @@ public interface plasmaWordIndexInterface { public Iterator wordHashes(String startWordHash, boolean up); - public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty); + public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime); public long getUpdateTime(String wordHash); public void deleteIndex(String wordHash); diff --git a/source/de/anomic/server/logging/serverLog.java b/source/de/anomic/server/logging/serverLog.java index 707715a66..2ef6c69ef 100644 --- a/source/de/anomic/server/logging/serverLog.java +++ b/source/de/anomic/server/logging/serverLog.java @@ -182,4 +182,12 @@ public final class serverLog { if (fileIn != null) try {fileIn.close();}catch(Exception e){} } } + + public static final String format(String s, int n, int fillChar) { + int l = s.length(); + if (l >= n) return s; + StringBuffer sb = new StringBuffer(l + n); + for (int i = l + n; i > n; n--) sb.insert(0, fillChar); + return sb.toString(); + } } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 7f89fe62c..b37820275 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -58,6 +58,7 @@ import de.anomic.plasma.plasmaWordIndexEntry; import de.anomic.plasma.plasmaWordIndexEntryContainer; import de.anomic.plasma.plasmaURLPattern; import de.anomic.plasma.plasmaWordIndex; +import de.anomic.plasma.plasmaSearchProfile; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.tools.crypt; @@ -337,9 +338,9 @@ public final class yacyClient { } } - public static int search(String wordhashes, int count, boolean global, yacySeed targetPeer, + public static int search(String wordhashes, boolean global, yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, - plasmaURLPattern blacklist, plasmaSnippetCache snippets, long duetime) { + plasmaURLPattern blacklist, plasmaSnippetCache snippets, plasmaSearchProfile profile) { // send a search request to peer with remote Hash // this mainly converts the words into word hashes @@ -376,16 +377,18 @@ public final class yacyClient { "&query=" + wordhashes; */ final serverObjects obj = new serverObjects(9); - obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); - obj.put("youare", targetPeer.hash); - obj.put("key", key); - obj.put("count", count); - obj.put("resource", ((global) ? "global" : "local")); - obj.put("query", wordhashes); - obj.put("ttl", "0"); - obj.put("duetime", Long.toString(duetime)); - obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); - + long duetime = profile.duetime(); + obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); + obj.put("youare", targetPeer.hash); + obj.put("key", key); + obj.put("count", profile.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT)); + obj.put("resource", ((global) ? "global" : "local")); + obj.put("query", wordhashes); + obj.put("ttl", "0"); + obj.put("duetime", Long.toString(duetime)); + obj.put("profile", profile.targetToString()); // new duetimes splitted by specific search tasks + obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + //yacyCore.log.logDebug("yacyClient.search url=" + url); final long timestamp = System.currentTimeMillis(); @@ -400,7 +403,11 @@ public final class yacyClient { obj ) ); + + // compute all computation times final long totalrequesttime = System.currentTimeMillis() - timestamp; + String returnProfile = (String) result.get("profile"); + if (returnProfile != null) profile.putYield(returnProfile); /* HashMap result = nxTools.table(httpc.wget(new URL(url), @@ -464,7 +471,7 @@ public final class yacyClient { } catch (NumberFormatException e) { searchtime = totalrequesttime; } - yacyCore.log.logFine("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); + yacyCore.log.logFine("SEARCH " + results + " URLS FROM " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); return results; } catch (Exception e) { yacyCore.log.logSevere("yacyClient.search error: '" + targetPeer.get(yacySeed.NAME, "anonymous") + "' failed - " + e); diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index 0b111b1c0..17da66c8f 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -53,12 +53,12 @@ import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaURLPattern; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaWordIndexEntity; +import de.anomic.plasma.plasmaSearchProfile; import de.anomic.server.logging.serverLog; public class yacySearch extends Thread { final private Set wordhashes; - final private int count; final private boolean global; final private plasmaCrawlLURL urlManager; final private plasmaWordIndexEntity entityCache; @@ -66,13 +66,12 @@ public class yacySearch extends Thread { final private plasmaSnippetCache snippetCache; final private yacySeed targetPeer; private int links; - final private long duetime; + final private plasmaSearchProfile profile; - public yacySearch(Set wordhashes, int count, boolean global, yacySeed targetPeer, - plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) { + public yacySearch(Set wordhashes, boolean global, yacySeed targetPeer, + plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) { super("yacySearch_" + targetPeer.getName()); this.wordhashes = wordhashes; - this.count = count; this.global = global; this.urlManager = urlManager; this.entityCache = entityCache; @@ -80,11 +79,11 @@ public class yacySearch extends Thread { this.snippetCache = snippetCache; this.targetPeer = targetPeer; this.links = -1; - this.duetime = duetime; + this.profile = (plasmaSearchProfile) profile.clone(); } public void run() { - this.links = yacyClient.search(set2string(wordhashes), count, global, targetPeer, urlManager, entityCache, blacklist, snippetCache, duetime); + this.links = yacyClient.search(set2string(wordhashes), global, targetPeer, urlManager, entityCache, blacklist, snippetCache, profile); if (links != 0) { //yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + links + " links for word hash " + wordhashes); yacyCore.seedDB.mySeed.incRI(links); @@ -102,7 +101,11 @@ public class yacySearch extends Thread { public int links() { return this.links; } - + + public plasmaSearchProfile profile() { + return this.profile; + } + private static yacySeed[] selectPeers(Set wordhashes, int seedcount) { // find out a specific number of seeds, that would be relevant for the given word hash(es) // the result is ordered by relevance: [0] is most relevant @@ -166,7 +169,7 @@ public class yacySearch extends Thread { } public static yacySearch[] searchHashes(Set wordhashes, plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, - int count, int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) { + int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) { // check own peer status if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getAddress() == null) { return null; } @@ -178,8 +181,8 @@ public class yacySearch extends Thread { if (targets == 0) return null; yacySearch[] searchThreads = new yacySearch[targets]; for (int i = 0; i < targets; i++) { - searchThreads[i]= new yacySearch(wordhashes, count, true, targetPeers[i], - urlManager, entityCache, blacklist, snippetCache, duetime); + searchThreads[i]= new yacySearch(wordhashes, true, targetPeers[i], + urlManager, entityCache, blacklist, snippetCache, profile); searchThreads[i].start(); try {Thread.currentThread().sleep(20);} catch (InterruptedException e) {} diff --git a/source/yacy.java b/source/yacy.java index de7cfb3dd..6fa881845 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -755,7 +755,7 @@ public final class yacy { try { wordCounter++; wordHash = (String) importWordHashIterator.next(); - importWordIdxEntity = importWordIndex.getEntity(wordHash, true); + importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1); if (importWordIdxEntity.size() == 0) { importWordIdxEntity.deleteComplete(); @@ -878,7 +878,7 @@ public final class yacy { try { wordCounter++; wordhash = (String) wordHashIterator.next(); - wordIdxEntity = wordIndex.getEntity(wordhash, true); + wordIdxEntity = wordIndex.getEntity(wordhash, true, -1); // the combined container will fit, read the container Iterator wordIdxEntries = wordIdxEntity.elements(true);