diff --git a/htroot/index.java b/htroot/index.java index c4d3171e5..423d7a01f 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -51,10 +51,12 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.TreeSet; + import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpHeader; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaSearchQuery; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCore; import de.anomic.server.serverDate; @@ -73,11 +75,11 @@ public class index { if (!indexDistributeGranted || !indexReceiveGranted) { global = false; } // case if no values are requested + final String referer = (String) header.get("Referer"); if (post == null || env == null) { // save referrer // System.out.println("HEADER=" + header.toString()); - final String referer = (String) header.get("Referer"); if (referer != null) { URL url; try { url = new URL(referer); } catch (MalformedURLException e) { url = null; } @@ -122,7 +124,7 @@ public class index { // process search words final String querystring = (String) post.get("search", ""); if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {} - final TreeSet query = cleanQuery(querystring); + final TreeSet query = plasmaSearchQuery.cleanQuery(querystring); // filter out stopwords final TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords); if (filtered.size() > 0) { @@ -147,10 +149,15 @@ public class index { } // do the search - final serverObjects prop = ((plasmaSwitchboard) env).searchFromLocal(query, order1, order2, count, + plasmaSearchQuery thisSearch = new plasmaSearchQuery(query, referer, new String[]{order1, order2}, count, searchtime, urlmask, + ((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL, + "", 20); + final serverObjects prop = sb.searchFromLocal(thisSearch); + /* + final serverObjects prop = sb.searchFromLocal(query, order1, order2, count, ((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))), searchtime, urlmask); - + */ // remember the last search expression env.setConfig("last-search", querystring); // process result of search @@ -264,23 +271,6 @@ public class index { return prop; } - public static TreeSet cleanQuery(String words) { - // convert Umlaute - words = htmlFilterContentScraper.convertUmlaute(new serverByteBuffer(words.getBytes())).toString(); - // remove funny symbols - final String seps = "' .,:/-&"; - words = words.toLowerCase().trim(); - int c; - for (int i = 0; i < seps.length(); i++) { - if ((c = words.indexOf(seps.charAt(i))) >= 0) { words = words.substring(0, c) + (((c + 1) < words.length()) ? (" " + words.substring(c + 1)) : ""); } - } - - // the string is clean now, but we must generate a set out of it - final String[] a = words.split(" "); - final TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator); - for (int i = 0; i < a.length; i++) { query.add(a[i]); } - return query; - } } \ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaSearch.java b/source/de/anomic/plasma/plasmaSearch.java index 50070a07a..99e4c6b6e 100644 --- a/source/de/anomic/plasma/plasmaSearch.java +++ b/source/de/anomic/plasma/plasmaSearch.java @@ -58,10 +58,6 @@ import de.anomic.server.serverCodings; import de.anomic.server.logging.serverLog; public final class plasmaSearch { - - public static final char O_QUALITY = 'q'; - public static final char O_AGE = 'a'; - public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"'; private final plasmaCrawlLURL urlStore; private final plasmaWordIndex wordIndex; @@ -118,23 +114,9 @@ public final class plasmaSearch { return condenser.getWords().size(); } - - public static Set words2hashes(String[] words) { - HashSet hashes = new HashSet(); - for (int i = 0; i < words.length; i++) hashes.add(plasmaWordIndexEntry.word2hash(words[i])); - return hashes; - } - - public static Set words2hashes(Set words) { - Iterator i = words.iterator(); - HashSet hashes = new HashSet(); - while (i.hasNext()) hashes.add(plasmaWordIndexEntry.word2hash((String) i.next())); - return hashes; - } - public plasmaWordIndexEntity searchWords(Set words, long time) throws IOException { // search for the set of words and return an array of urlEntry elements - return searchHashes(words2hashes(words), time); + return searchHashes(plasmaSearchQuery.words2hashes(words), time); } public plasmaWordIndexEntity searchHashes(Set hashes, long time) throws IOException { @@ -296,23 +278,27 @@ public final class plasmaSearch { return conj; } - public plasmaSearch.result order(plasmaWordIndexEntity searchResult, Set searchhashes, Set stopwords, char[] priority, long maxTime, int minEntries) throws IOException { + public plasmaSearchResult order(plasmaWordIndexEntity searchResult, Set searchhashes, Set stopwords, char[] priority, long maxTime, int minEntries) throws IOException { // we collect the urlhashes from it and construct a List with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime - plasmaSearch.result acc = new result(searchhashes, stopwords, priority); + plasmaSearchResult acc = new plasmaSearchResult(searchhashes, stopwords, priority); if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty if (searchResult.size() == 0) return acc; // case that we have nothing to do Enumeration e = searchResult.elements(true); plasmaWordIndexEntry entry; long startCreateTime = System.currentTimeMillis(); + plasmaCrawlLURL.Entry page; try { while (e.hasMoreElements()) { if ((acc.sizeFetched() >= minEntries) && (System.currentTimeMillis() - startCreateTime >= maxTime)) break; entry = (plasmaWordIndexEntry) e.nextElement(); - acc.addResult(entry); + // find the url entry + page = urlStore.getEntry(entry.getUrlHash()); + // add a result + acc.addResult(entry, page); } } catch (kelondroException ee) { serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee); @@ -323,153 +309,4 @@ public final class plasmaSearch { return acc; } - public class result /*implements Enumeration*/ { - - TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry - kelondroMScoreCluster ref; // reference score computation for the commonSense heuristic - Set searchhashes; // hashes that are searched here - Set stopwords; // words that are excluded from the commonSense heuristic - char[] order; // order of heuristics - ArrayList results; // this is a buffer for plasmaWordIndexEntry + plasmaCrawlLURL.entry - objects - - public result(Set searchhashes, Set stopwords, char[] order) { - this.pageAcc = new TreeMap(); - ref = new kelondroMScoreCluster(); - this.searchhashes = searchhashes; - this.stopwords = stopwords; - this.order = order; - this.results = new ArrayList(); - } - - public result cloneSmart() { - // clones only the top structure - result theClone = new result(this.searchhashes, this.stopwords, this.order); - theClone.pageAcc = (TreeMap) this.pageAcc.clone(); - theClone.ref = this.ref; - theClone.results = this.results; - return theClone; - } - - public int sizeOrdered() { - return pageAcc.size(); - } - - public int sizeFetched() { - return results.size(); - } - - public boolean hasMoreElements() { - return pageAcc.size() > 0; - } - - public plasmaCrawlLURL.Entry nextElement() { - Object top = pageAcc.lastKey(); - return (plasmaCrawlLURL.Entry) pageAcc.remove(top); - } - - protected void addResult(plasmaWordIndexEntry indexEntry) { - // this does 3 things: - // 1. simply store indexEntry and page to a cache - // 2. calculate references and store them to cache - // 2. add reference to reference sorting table - - // find the url entry - plasmaCrawlLURL.Entry page = urlStore.getEntry(indexEntry.getUrlHash()); - - // take out relevant information for reference computation - URL url = page.url(); - String descr = page.descr(); - if ((url == null) || (descr == null)) return; - String[] urlcomps = url.toString().split(splitrex); // word components of the url - String[] descrcomps = descr.split(splitrex); // words in the description - - // store everything - Object[] resultVector = new Object[] {indexEntry, page, urlcomps, descrcomps}; - results.add(resultVector); - - // add references - addScoreFiltered(urlcomps); - addScoreFiltered(descrcomps); - } - - protected void sortResults() { - // finally sort the results - - // create a commonSense - set that represents a set of words that is - // treated as 'typical' for this search request - Object[] references = getReferences(16); - Set commonSense = new HashSet(); - for (int i = 0; i < references.length; i++) commonSense.add((String) references[i]); - - Object[] resultVector; - plasmaWordIndexEntry indexEntry; - plasmaCrawlLURL.Entry page; - String[] urlcomps; - String[] descrcomps; - long ranking; - long inc = 4096 * 4096; - String queryhash; - for (int i = 0; i < results.size(); i++) { - // take out values from result array - resultVector = (Object[]) results.get(i); - indexEntry = (plasmaWordIndexEntry) resultVector[0]; - page = (plasmaCrawlLURL.Entry) resultVector[1]; - urlcomps = (String[]) resultVector[2]; - descrcomps = (String[]) resultVector[3]; - - // apply pre-calculated order attributes - ranking = 0; - if (order[0] == O_QUALITY) ranking = 4096 * indexEntry.getQuality(); - else if (order[0] == O_AGE) ranking = 4096 * indexEntry.getVirtualAge(); - if (order[1] == O_QUALITY) ranking += indexEntry.getQuality(); - else if (order[1] == O_AGE) ranking += indexEntry.getVirtualAge(); - - // apply 'common-sense' heuristic using references - for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc; - for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc; - - // apply query-in-result matching - Set urlcomph = words2hashes(urlcomps); - Set descrcomph = words2hashes(descrcomps); - Iterator shi = searchhashes.iterator(); - while (shi.hasNext()) { - queryhash = (String) shi.next(); - if (urlcomph.contains(queryhash)) ranking += 10 * inc; - if (descrcomph.contains(queryhash)) ranking += 100 * inc; - } - - // insert value - //System.out.println("Ranking " + ranking + " for URL " + url.toString()); - pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), page); - } - // flush memory - results = null; - } - - public Object[] getReferences(int count) { - // create a list of words that had been computed by statistics over all - // words that appeared in the url or the description of all urls - return ref.getScores(count, false, 2, Integer.MAX_VALUE); - } - - private void addScoreFiltered(String[] words) { - String word; - for (int i = 0; i < words.length; i++) { - word = words[i].toLowerCase(); - if ((word.length() > 2) && - (!(stopwords.contains(word))) && - ("http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) && - (!(searchhashes.contains(plasmaWordIndexEntry.word2hash(word))))) - ref.incScore(word); - } - } - - private void printSplitLog(String x, String[] y) { - String s = ""; - for (int i = 0; i < y.length; i++) s = s + ", " + y[i]; - if (s.length() > 0) s = s.substring(2); - System.out.println("Split '" + x + "' = {" + s + "}"); - } - } - } diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java new file mode 100644 index 000000000..eff1493ee --- /dev/null +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -0,0 +1,52 @@ +// plasmaSearchEvent.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// Created: 10.10.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.plasma; + + +public final class plasmaSearchEvent { + + + public plasmaSearchEvent() { + } + +} diff --git a/source/de/anomic/plasma/plasmaSearchResult.java b/source/de/anomic/plasma/plasmaSearchResult.java new file mode 100644 index 000000000..07d97833a --- /dev/null +++ b/source/de/anomic/plasma/plasmaSearchResult.java @@ -0,0 +1,205 @@ +// plasmaSearchResult.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// Created: 10.10.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.plasma; + +import java.util.TreeMap; +import java.util.Set; +import java.util.HashSet; +import java.util.ArrayList; +import java.util.Iterator; +import java.net.URL; + +import de.anomic.kelondro.kelondroMScoreCluster; +import de.anomic.server.serverCodings; + +public final class plasmaSearchResult { + + public static final char O_QUALITY = 'q'; + public static final char O_AGE = 'a'; + public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"'; + + private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry + private kelondroMScoreCluster ref; // reference score computation for the commonSense heuristic + private Set searchhashes; // hashes that are searched here + private Set stopwords; // words that are excluded from the commonSense heuristic + private char[] order; // order of heuristics + private ArrayList results; // this is a buffer for plasmaWordIndexEntry + plasmaCrawlLURL.entry - objects + + public plasmaSearchResult(Set searchhashes, Set stopwords, char[] order) { + this.pageAcc = new TreeMap(); + ref = new kelondroMScoreCluster(); + this.searchhashes = searchhashes; + this.stopwords = stopwords; + this.order = order; + this.results = new ArrayList(); + } + + public plasmaSearchResult cloneSmart() { + // clones only the top structure + plasmaSearchResult theClone = new plasmaSearchResult(this.searchhashes, this.stopwords, this.order); + theClone.pageAcc = (TreeMap) this.pageAcc.clone(); + theClone.ref = this.ref; + theClone.results = this.results; + return theClone; + } + + public int sizeOrdered() { + return pageAcc.size(); + } + + public int sizeFetched() { + return results.size(); + } + + public boolean hasMoreElements() { + return pageAcc.size() > 0; + } + + public plasmaCrawlLURL.Entry nextElement() { + Object top = pageAcc.lastKey(); + return (plasmaCrawlLURL.Entry) pageAcc.remove(top); + } + + protected void addResult(plasmaWordIndexEntry indexEntry, plasmaCrawlLURL.Entry page) { + // this does 3 things: + // 1. simply store indexEntry and page to a cache + // 2. calculate references and store them to cache + // 2. add reference to reference sorting table + + // take out relevant information for reference computation + URL url = page.url(); + String descr = page.descr(); + if ((url == null) || (descr == null)) return; + String[] urlcomps = url.toString().split(splitrex); // word components of the url + String[] descrcomps = descr.split(splitrex); // words in the description + + // store everything + Object[] resultVector = new Object[] {indexEntry, page, urlcomps, descrcomps}; + results.add(resultVector); + + // add references + addScoreFiltered(urlcomps); + addScoreFiltered(descrcomps); + } + + protected void sortResults() { + // finally sort the results + + // create a commonSense - set that represents a set of words that is + // treated as 'typical' for this search request + Object[] references = getReferences(16); + Set commonSense = new HashSet(); + for (int i = 0; i < references.length; i++) commonSense.add((String) references[i]); + + Object[] resultVector; + plasmaWordIndexEntry indexEntry; + plasmaCrawlLURL.Entry page; + String[] urlcomps; + String[] descrcomps; + long ranking; + long inc = 4096 * 4096; + String queryhash; + for (int i = 0; i < results.size(); i++) { + // take out values from result array + resultVector = (Object[]) results.get(i); + indexEntry = (plasmaWordIndexEntry) resultVector[0]; + page = (plasmaCrawlLURL.Entry) resultVector[1]; + urlcomps = (String[]) resultVector[2]; + descrcomps = (String[]) resultVector[3]; + + // apply pre-calculated order attributes + ranking = 0; + if (order[0] == O_QUALITY) ranking = 4096 * indexEntry.getQuality(); + else if (order[0] == O_AGE) ranking = 4096 * indexEntry.getVirtualAge(); + if (order[1] == O_QUALITY) ranking += indexEntry.getQuality(); + else if (order[1] == O_AGE) ranking += indexEntry.getVirtualAge(); + + // apply 'common-sense' heuristic using references + for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc; + for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc; + + // apply query-in-result matching + Set urlcomph = plasmaSearchQuery.words2hashes(urlcomps); + Set descrcomph = plasmaSearchQuery.words2hashes(descrcomps); + Iterator shi = searchhashes.iterator(); + while (shi.hasNext()) { + queryhash = (String) shi.next(); + if (urlcomph.contains(queryhash)) ranking += 10 * inc; + if (descrcomph.contains(queryhash)) ranking += 100 * inc; + } + + // insert value + //System.out.println("Ranking " + ranking + " for URL " + url.toString()); + pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), page); + } + // flush memory + results = null; + } + + public Object[] getReferences(int count) { + // create a list of words that had been computed by statistics over all + // words that appeared in the url or the description of all urls + return ref.getScores(count, false, 2, Integer.MAX_VALUE); + } + + public void addScoreFiltered(String[] words) { + String word; + for (int i = 0; i < words.length; i++) { + word = words[i].toLowerCase(); + if ((word.length() > 2) && + (!(stopwords.contains(word))) && + ("http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) && + (!(searchhashes.contains(plasmaWordIndexEntry.word2hash(word))))) + ref.incScore(word); + } + } + + private void printSplitLog(String x, String[] y) { + String s = ""; + for (int i = 0; i < y.length; i++) s = s + ", " + y[i]; + if (s.length() > 0) s = s.substring(2); + System.out.println("Split '" + x + "' = {" + s + "}"); + } + + +} \ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 1c0310a34..0c4736152 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -374,7 +374,7 @@ public class plasmaSnippetCache { log); } - public void fetch(plasmaSearch.result acc, Set queryhashes, String urlmask, int fetchcount) { + public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) { // fetch snippets int i = 0; plasmaCrawlLURL.Entry urlentry; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 7deb49303..98b02414d 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1372,7 +1372,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser log.logFine("presearch: started job"); idx = searchManager.searchHashes(queryhashes, time); log.logFine("presearch: found " + idx.size() + " results"); - plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, time, searchcount); + plasmaSearchResult acc = searchManager.order(idx, queryhashes, stopwords, order, time, searchcount); if (acc == null) return; log.logFine("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch"); @@ -1387,63 +1387,64 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } - public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) { + //public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) { + public serverObjects searchFromLocal(plasmaSearchQuery query) { // tell all threads to do nothing for a specific time - wordIndex.intermission(time); - intermissionAllThreads(time); + wordIndex.intermission(query.maximumTime); + intermissionAllThreads(query.maximumTime); serverObjects prop = new serverObjects(); try { char[] order = new char[2]; - if (order1.equals("quality")) order[0] = plasmaSearch.O_QUALITY; else order[0] = plasmaSearch.O_AGE; - if (order2.equals("quality")) order[1] = plasmaSearch.O_QUALITY; else order[1] = plasmaSearch.O_AGE; + if (query.order[0].equals("quality")) order[0] = plasmaSearchResult.O_QUALITY; else order[0] = plasmaSearchResult.O_AGE; + if (query.order[1].equals("quality")) order[1] = plasmaSearchResult.O_QUALITY; else order[1] = plasmaSearchResult.O_AGE; // filter out words that appear in bluelist - Iterator it = querywords.iterator(); + Iterator it = query.queryWords.iterator(); String word, gs = ""; while (it.hasNext()) { word = (String) it.next(); if (blueList.contains(word)) it.remove(); else gs += "+" + word; } if (gs.length() > 0) gs = gs.substring(1); - Set queryhashes = plasmaSearch.words2hashes(querywords); // log - log.logInfo("INIT WORD SEARCH: " + gs + ":" + queryhashes + " - " + count + " links, " + (time / 1000) + " seconds"); + log.logInfo("INIT WORD SEARCH: " + gs + ":" + query.queryHashes + " - " + query.wantedResults + " links, " + (query.maximumTime / 1000) + " seconds"); long timestamp = System.currentTimeMillis(); // start a presearch, which makes only sense if we idle afterwards. // this is especially the case if we start a global search and idle until search - if (global) { - Thread preselect = new presearch(queryhashes, order, time / 10, urlmask, 10, 3); + if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) { + Thread preselect = new presearch(query.queryHashes, order, query.maximumTime / 10, query.urlMask, 10, 3); preselect.start(); } // do global fetching int globalresults = 0; - if (global) { - int fetchcount = ((int) time / 1000) * 5; // number of wanted results until break in search - int fetchpeers = ((int) time / 1000) * 2; // number of target peers; means 30 peers in 10 seconds - long fetchtime = time * 6 / 10; // time to waste + if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) { + int fetchcount = ((int) (query.maximumTime / 1000L)) * 5; // number of wanted results until break in search + int fetchpeers = ((int) (query.maximumTime / 1000L)) * 2; // number of target peers; means 30 peers in 10 seconds + long fetchtime = query.maximumTime * 6 / 10; // time to waste if (fetchpeers < 10) fetchpeers = 10; - if (fetchcount > count * 10) fetchcount = count * 10; - globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime); + if (fetchcount > query.wantedResults * 10) fetchcount = query.wantedResults * 10; + globalresults = yacySearch.searchHashes(query.queryHashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime); log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); } prop.put("globalresults", globalresults); // the result are written to the local DB // now search locally (the global results should be now in the local db) - long remainingTime = time - (System.currentTimeMillis() - timestamp); - plasmaWordIndexEntity idx = searchManager.searchHashes(queryhashes, remainingTime * 8 / 10); // the search + long remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp); + plasmaWordIndexEntity idx = searchManager.searchHashes(query.queryHashes, remainingTime * 8 / 10); // the search log.logFine("SEARCH TIME AFTER FINDING " + idx.size() + " ELEMENTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); - remainingTime = time - (System.currentTimeMillis() - timestamp); + remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp); if (remainingTime < 500) remainingTime = 500; if (remainingTime > 3000) remainingTime = 3000; - plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, remainingTime, 10); - if (!(global)) snippetCache.fetch(acc.cloneSmart(), queryhashes, urlmask, 10); + plasmaSearchResult acc = searchManager.order(idx, query.queryHashes, stopwords, order, remainingTime, 10); + if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) + snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10); log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // result is a List of urlEntry elements: prepare answer @@ -1463,7 +1464,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser yacySeed seed; plasmaSnippetCache.result snippet; //kelondroMScoreCluster ref = new kelondroMScoreCluster(); - while ((acc.hasMoreElements()) && (i < count)) { + while ((acc.hasMoreElements()) && (i < query.wantedResults)) { urlentry = acc.nextElement(); url = urlentry.url(); host = url.getHost(); @@ -1500,8 +1501,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser */ //addScoreForked(ref, gs, descr.split(" ")); //addScoreForked(ref, gs, urlstring.split("/")); - if (urlstring.matches(urlmask)) { //.* is default - snippet = snippetCache.retrieve(url, queryhashes, false, 260); + if (urlstring.matches(query.urlMask)) { //.* is default + snippet = snippetCache.retrieve(url, query.queryHashes, false, 260); if (snippet.source == plasmaSnippetCache.ERROR_NO_MATCH) { // suppress line: there is no match in that resource } else { @@ -1524,7 +1525,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser log.logFine("SEARCH TIME AFTER RESULT PREPARATION: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // calc some more cross-reference - remainingTime = time - (System.currentTimeMillis() - timestamp); + remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp); if (remainingTime < 0) remainingTime = 1000; /* while ((acc.hasMoreElements()) && (((time + timestamp) < System.currentTimeMillis()))) { @@ -1577,7 +1578,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser plasmaWordIndexEntity idx = searchManager.searchHashes(hashes, duetime * 8 / 10); // a nameless temporary index, not sorted by special order but by hash long remainingTime = duetime - (System.currentTimeMillis() - timestamp); if (remainingTime < 500) remainingTime = 500; - plasmaSearch.result acc = searchManager.order(idx, hashes, stopwords, new char[]{plasmaSearch.O_QUALITY, plasmaSearch.O_AGE}, remainingTime, 10); + plasmaSearchResult acc = searchManager.order(idx, hashes, stopwords, new char[]{plasmaSearchResult.O_QUALITY, plasmaSearchResult.O_AGE}, remainingTime, 10); // result is a List of urlEntry elements if (acc == null) {