From 6c1b14c8e1e9013ad74a1a06e16d6881cc135e2b Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 11 Jan 2011 22:58:14 +0000 Subject: [PATCH] - more control in access tracker: count number of returned search results (not only info how much is in the index) - extended query params for this - enhanced cora git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7430 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/AccessTracker_p.html | 3 ++ htroot/AccessTracker_p.java | 23 +++++++++------ htroot/AccessTracker_p.xml | 1 + htroot/yacy/search.java | 1 + htroot/yacysearch.rss | 2 -- htroot/yacysearch_location.java | 2 +- htroot/yacysearchitem.java | 4 ++- source/de/anomic/search/QueryParams.java | 3 ++ source/de/anomic/yacy/yacyClient.java | 2 +- .../yacy/cora/protocol/http/HTTPClient.java | 2 +- source/net/yacy/cora/services/SearchHub.java | 16 +++++++---- .../net/yacy/cora/services/SearchSRURSS.java | 28 +++++++++++++------ .../net/yacy/repository/LoaderDispatcher.java | 2 +- 13 files changed, 59 insertions(+), 30 deletions(-) diff --git a/htroot/AccessTracker_p.html b/htroot/AccessTracker_p.html index 772559fde..48aa6870a 100644 --- a/htroot/AccessTracker_p.html +++ b/htroot/AccessTracker_p.html @@ -75,6 +75,7 @@ Offset Expected Results Returned Results + Known Results Used Time (ms) URL fetch (ms) Snippet comp (ms) @@ -88,6 +89,7 @@ Ø #[querycount_avg]# + Ø #[transmitcount_avg]# Ø #[resultcount_avg]# Ø #[resulttime_avg]# Ø #[urltime_avg]# @@ -103,6 +105,7 @@ #[date]# #[offset]# #[querycount]# + #[transmitcount]# #[resultcount]# #[resulttime]# #[urltime]# diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index 28e093182..e8a522207 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -144,6 +144,7 @@ public class AccessTracker_p { QueryParams query; long qcountSum = 0; long rcountSum = 0; + long tcountSum = 0; long rcount = 0; long utimeSum = 0; long stimeSum = 0; @@ -175,6 +176,7 @@ public class AccessTracker_p { prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.queryHashes)); } prop.putNum("page_list_" + m + "_querycount", query.itemsPerPage); + prop.putNum("page_list_" + m + "_transmitcount", query.transmitcount); prop.putNum("page_list_" + m + "_resultcount", query.resultcount); prop.putNum("page_list_" + m + "_urltime", query.urlretrievaltime); prop.putNum("page_list_" + m + "_snippettime", query.snippetcomputationtime); @@ -182,6 +184,7 @@ public class AccessTracker_p { prop.putHTML("page_list_" + m + "_userAgent", query.userAgent); qcountSum += query.itemsPerPage; rcountSum += query.resultcount; + tcountSum += query.transmitcount; utimeSum += query.urlretrievaltime; stimeSum += query.snippetcomputationtime; rtimeSum += query.searchtime; @@ -203,24 +206,25 @@ public class AccessTracker_p { // return empty values to not break the table view prop.put("page_list", 1); prop.put("page_list_0_dark", 1 ); - prop.put("page_list_0_host", " "); - prop.put("page_list_0_date", " "); - prop.put("page_list_0_timestamp", " "); + prop.put("page_list_0_host", ""); + prop.put("page_list_0_date", ""); + prop.put("page_list_0_timestamp", ""); if (page == 2) { // local search prop.putNum("page_list_0_offset", ""); prop.put("page_list_0_querystring", ""); } else { // remote search - prop.put("page_list_0_peername", " "); - prop.put("page_list_0_queryhashes", " "); + prop.put("page_list_0_peername", ""); + prop.put("page_list_0_queryhashes", ""); } prop.putNum("page_list_0_querycount", ""); + prop.putNum("page_list_0_transmitcount", ""); prop.putNum("page_list_0_resultcount", ""); prop.putNum("page_list_0_urltime", ""); prop.putNum("page_list_0_snippettime", ""); prop.putNum("page_list_0_resulttime", ""); - prop.put("page_list_0_userAgent", " "); + prop.put("page_list_0_userAgent", ""); } if (rcount == 0) rcount = -1; prop.putNum("page_querycount_avg", (double) qcountSum / m); @@ -228,6 +232,7 @@ public class AccessTracker_p { prop.putNum("page_urltime_avg", (double) utimeSum / m); prop.putNum("page_snippettime_avg", (double) stimeSum / m); prop.putNum("page_resulttime_avg", (double) rtimeSum / m); + prop.putNum("page_transmitcount_avg", (double) tcountSum / rcount); prop.putNum("page_resultcount_avg1", (double) rcountSum / rcount); prop.putNum("page_urltime_avg1", (double) utimeSum1 / rcount); prop.putNum("page_snippettime_avg1", (double) stimeSum1 / rcount); @@ -274,12 +279,12 @@ public class AccessTracker_p { // return empty values to not break the table view if no results can be listed if (m==0) { prop.put("page_list", 1); - prop.put("page_list_0_dates_0_date", " "); + prop.put("page_list_0_dates_0_date", ""); prop.put("page_list_0_dates", 1); prop.putNum("page_list_0_qph", ""); prop.put("page_list_0_dark", 1 ); - prop.put("page_list_0_peername", " "); - prop.put("page_list_0_host", " "); + prop.put("page_list_0_peername", ""); + prop.put("page_list_0_host", ""); prop.putNum("page_list_0_count", ""); } else { prop.put("page_list", m); diff --git a/htroot/AccessTracker_p.xml b/htroot/AccessTracker_p.xml index 05af4b87e..c5c74d160 100644 --- a/htroot/AccessTracker_p.xml +++ b/htroot/AccessTracker_p.xml @@ -25,6 +25,7 @@ #[date]# #[offset]# #[querycount]# + #[transmitcount]# #[resultcount]# #[resulttime]# #[urltime]# diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 775f6ace8..99c947dbc 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -382,6 +382,7 @@ public final class search { links.append("resource").append(i).append('=').append(resource).append(serverCore.CRLF_STRING); } } + theQuery.transmitcount = accu.size() + 1; prop.put("links", links.toString()); prop.put("linkcount", accu.size()); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.RESULTLIST, "", accu.size(), System.currentTimeMillis() - timer), false); diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss index 78be4eb27..1a4f0132d 100644 --- a/htroot/yacysearch.rss +++ b/htroot/yacysearch.rss @@ -22,11 +22,9 @@ #[num-results_itemsPerPage]# - #{results}# #{/results}# - \ No newline at end of file diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index 74d9bbe92..c1d0e2c05 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -93,7 +93,7 @@ public class yacysearch_location { // get a queue of search results String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8080") + "/yacysearch.rss"; BlockingQueue results = new LinkedBlockingQueue(); - SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false); + SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false, null); // take the results and compute some locations RSSMessage message; diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 5e833b877..231c0811e 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -172,7 +172,7 @@ public class yacysearchitem { } else { prop.put("content_code", ""); } - + theQuery.transmitcount = item + 1; return prop; } @@ -203,6 +203,7 @@ public class yacysearchitem { prop.put("content_item_nl", (item == 0) ? 0 : 1); prop.put("content_item", 1); } + theQuery.transmitcount = item + 1; return prop; } @@ -232,6 +233,7 @@ public class yacysearchitem { } else { prop.put("content_items", "0"); } + theQuery.transmitcount = item + 1; return prop; } diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 35b78d413..7c0d5f1e4 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -99,6 +99,7 @@ public final class QueryParams { public final Long time; // values that are set after a search: public int resultcount; // number of found results + public int transmitcount; // number of results that had been shown to the user public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options public final String userAgent; @@ -152,6 +153,7 @@ public final class QueryParams { this.navigators = "all"; this.indexSegment = indexSegment; this.userAgent = userAgent; + this.transmitcount = 0; } public QueryParams( @@ -206,6 +208,7 @@ public final class QueryParams { this.specialRights = specialRights; this.indexSegment = indexSegment; this.userAgent = userAgent; + this.transmitcount = 0; } public Segment getSegment() { diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 2c9ba9002..9d68bb4f3 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -372,7 +372,7 @@ public final class yacyClient { public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException { String address = (targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") : targetSeed.getClusterAddress(); String urlBase = "http://" + address + "/yacysearch.rss"; - return SearchSRURSS.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global); + return SearchSRURSS.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global, null); } @SuppressWarnings("unchecked") diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index 7d3864cd6..f18112b94 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -114,7 +114,7 @@ public class HTTPClient { HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent); } - private static HttpClient initConnectionManager() { + public static HttpClient initConnectionManager() { // Create and initialize HTTP parameters final HttpParams httpParams = new BasicHttpParams(); /** diff --git a/source/net/yacy/cora/services/SearchHub.java b/source/net/yacy/cora/services/SearchHub.java index fd3e27511..ed08073cf 100644 --- a/source/net/yacy/cora/services/SearchHub.java +++ b/source/net/yacy/cora/services/SearchHub.java @@ -33,10 +33,13 @@ import net.yacy.cora.storage.ScoreMap; public class SearchHub { private static final String[] SRURSSServicesList = { + //"http://192.168.1.51:8000/yacysearch.rss"//, + "http://localhost:8008/yacysearch.rss"//, + /* "http://yacy.dyndns.org:8000/yacysearch.rss", "http://yacy.caloulinux.net:8085/yacysearch.rss", "http://algire.dyndns.org:8085/yacysearch.rss", - "http://breyvogel.dyndns.org:8002/yacysearch.rss" + "http://breyvogel.dyndns.org:8002/yacysearch.rss"*/ }; public final static SearchHub EMPTY = new SearchHub("", 0); @@ -136,20 +139,23 @@ public class SearchHub { * @param verify * @param global */ - public static void addSRURSSServices(SearchHub search, String[] rssServices, int count, boolean verify, boolean global) { + public static void addSRURSSServices(SearchHub search, String[] rssServices, int count, boolean verify, boolean global, String userAgent) { for (String service: rssServices) { - SearchSRURSS accumulator = new SearchSRURSS(search, service, count, verify, global); + SearchSRURSS accumulator = new SearchSRURSS(search, service, count, verify, global, userAgent); accumulator.start(); search.addAccumulator(accumulator); } } public static void main(String[] args) { + HTTPClient.setDefaultUserAgent("searchhub"); + HTTPClient.initConnectionManager(); + StringBuilder sb = new StringBuilder(); for (String s: args) sb.append(s).append(' '); String query = sb.toString().trim(); SearchHub search = new SearchHub(query, 10000); - addSRURSSServices(search, SRURSSServicesList, 100, false, false); + addSRURSSServices(search, SRURSSServicesList, 100, false, false, "searchhub"); try {Thread.sleep(100);} catch (InterruptedException e1) {} search.waitTermination(); ScoreMap result = search.getResults(); @@ -159,6 +165,6 @@ public class SearchHub { u = i.next(); System.out.println("[" + result.get(u) + "] " + u); } - try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) {} + try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) { e.printStackTrace(); } } } diff --git a/source/net/yacy/cora/services/SearchSRURSS.java b/source/net/yacy/cora/services/SearchSRURSS.java index a208468e1..5da00952e 100644 --- a/source/net/yacy/cora/services/SearchSRURSS.java +++ b/source/net/yacy/cora/services/SearchSRURSS.java @@ -50,6 +50,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { final boolean verify; final boolean global; final Map> result; + final String userAgent; private final BlockingQueue results; @@ -60,7 +61,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { final String urlBase, final int maximumRecordsInit, final boolean verify, - final boolean global) { + final boolean global, + final String userAgent) { this.results = new LinkedBlockingQueue(); this.result = result; this.query = query; @@ -69,6 +71,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { this.maximumRecordsInit = maximumRecordsInit; this.verify = verify; this.global = global; + this.userAgent = userAgent; } public SearchSRURSS( @@ -76,7 +79,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { final String urlBase, final int maximumRecordsInit, final boolean verify, - final boolean global) { + final boolean global, + final String userAgent) { this.results = new LinkedBlockingQueue(); this.result = search.getAccumulation(); this.query = search.getQuery(); @@ -85,10 +89,11 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { this.maximumRecordsInit = maximumRecordsInit; this.verify = verify; this.global = global; + this.userAgent = userAgent; } public void run() { - searchSRURSS(results, urlBase, query, timeoutInit, maximumRecordsInit, verify, global); + searchSRURSS(results, urlBase, query, timeoutInit, maximumRecordsInit, verify, global, userAgent); int p = 1; RSSMessage message; try { @@ -111,7 +116,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { final long timeoutInit, final int maximumRecordsInit, final boolean verify, - final boolean global) { + final boolean global, + final String userAgent) { Thread job = new Thread() { public void run() { int startRecord = 0; @@ -122,8 +128,9 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { long st = System.currentTimeMillis(); RSSFeed feed; try { - feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global); + feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, userAgent); } catch (IOException e1) { + e1.printStackTrace(); break mainloop; } if (feed == null || feed.isEmpty()) break mainloop; @@ -134,13 +141,14 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { try { queue.put(message); } catch (InterruptedException e) { + e.printStackTrace(); break innerloop; } } startRecord += recordsPerSession; timeout -= System.currentTimeMillis() - st; } - try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {} + try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) { e.printStackTrace(); } } }; job.start(); @@ -165,7 +173,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { int startRecord, int maximumRecords, boolean verify, - boolean global) throws IOException { + boolean global, + String userAgent) throws IOException { MultiProtocolURI uri = null; try { uri = new MultiProtocolURI(rssSearchServiceURL); @@ -181,8 +190,9 @@ public class SearchSRURSS extends Thread implements SearchAccumulator { parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords))); parts.put("verify", new StringBody(verify ? "true" : "false")); parts.put("resource", new StringBody(global ? "global" : "local")); - final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts); - //String debug = new String(result); System.out.println("*** DEBUG: " + debug); + parts.put("nav", new StringBody("none")); + final byte[] result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts); + String debug = new String(result); System.out.println("*** DEBUG: " + debug); final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); if (reader == null) { throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null"); diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 1271e9f78..c42f6d7ea 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -265,7 +265,7 @@ public final class LoaderDispatcher { if (protocol.equals("ftp")) response = ftpLoader.load(request, true); if (protocol.equals("smb")) response = smbLoader.load(request, true); if (protocol.equals("file")) response = fileLoader.load(request, true); - if (response != null) { + if (response != null && response.getContent() != null) { // we got something. Now check if we want to store that to the cache // first check looks if we want to store the content to the cache if (!crawlProfile.storeHTCache()) {