diff --git a/htroot/AccessTracker_p.html b/htroot/AccessTracker_p.html
index 772559fde..48aa6870a 100644
--- a/htroot/AccessTracker_p.html
+++ b/htroot/AccessTracker_p.html
@@ -75,6 +75,7 @@
Offset |
Expected Results |
Returned Results |
+ Known Results |
Used Time (ms) |
URL fetch (ms) |
Snippet comp (ms) |
@@ -88,6 +89,7 @@
|
|
Ø #[querycount_avg]# |
+ Ø #[transmitcount_avg]# |
Ø #[resultcount_avg]# |
Ø #[resulttime_avg]# |
Ø #[urltime_avg]# |
@@ -103,6 +105,7 @@
#[date]# |
#[offset]# |
#[querycount]# |
+ #[transmitcount]# |
#[resultcount]# |
#[resulttime]# |
#[urltime]# |
diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java
index 28e093182..e8a522207 100644
--- a/htroot/AccessTracker_p.java
+++ b/htroot/AccessTracker_p.java
@@ -144,6 +144,7 @@ public class AccessTracker_p {
QueryParams query;
long qcountSum = 0;
long rcountSum = 0;
+ long tcountSum = 0;
long rcount = 0;
long utimeSum = 0;
long stimeSum = 0;
@@ -175,6 +176,7 @@ public class AccessTracker_p {
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.queryHashes));
}
prop.putNum("page_list_" + m + "_querycount", query.itemsPerPage);
+ prop.putNum("page_list_" + m + "_transmitcount", query.transmitcount);
prop.putNum("page_list_" + m + "_resultcount", query.resultcount);
prop.putNum("page_list_" + m + "_urltime", query.urlretrievaltime);
prop.putNum("page_list_" + m + "_snippettime", query.snippetcomputationtime);
@@ -182,6 +184,7 @@ public class AccessTracker_p {
prop.putHTML("page_list_" + m + "_userAgent", query.userAgent);
qcountSum += query.itemsPerPage;
rcountSum += query.resultcount;
+ tcountSum += query.transmitcount;
utimeSum += query.urlretrievaltime;
stimeSum += query.snippetcomputationtime;
rtimeSum += query.searchtime;
@@ -203,24 +206,25 @@ public class AccessTracker_p {
// return empty values to not break the table view
prop.put("page_list", 1);
prop.put("page_list_0_dark", 1 );
- prop.put("page_list_0_host", " ");
- prop.put("page_list_0_date", " ");
- prop.put("page_list_0_timestamp", " ");
+ prop.put("page_list_0_host", "");
+ prop.put("page_list_0_date", "");
+ prop.put("page_list_0_timestamp", "");
if (page == 2) {
// local search
prop.putNum("page_list_0_offset", "");
prop.put("page_list_0_querystring", "");
} else {
// remote search
- prop.put("page_list_0_peername", " ");
- prop.put("page_list_0_queryhashes", " ");
+ prop.put("page_list_0_peername", "");
+ prop.put("page_list_0_queryhashes", "");
}
prop.putNum("page_list_0_querycount", "");
+ prop.putNum("page_list_0_transmitcount", "");
prop.putNum("page_list_0_resultcount", "");
prop.putNum("page_list_0_urltime", "");
prop.putNum("page_list_0_snippettime", "");
prop.putNum("page_list_0_resulttime", "");
- prop.put("page_list_0_userAgent", " ");
+ prop.put("page_list_0_userAgent", "");
}
if (rcount == 0) rcount = -1;
prop.putNum("page_querycount_avg", (double) qcountSum / m);
@@ -228,6 +232,7 @@ public class AccessTracker_p {
prop.putNum("page_urltime_avg", (double) utimeSum / m);
prop.putNum("page_snippettime_avg", (double) stimeSum / m);
prop.putNum("page_resulttime_avg", (double) rtimeSum / m);
+ prop.putNum("page_transmitcount_avg", (double) tcountSum / rcount);
prop.putNum("page_resultcount_avg1", (double) rcountSum / rcount);
prop.putNum("page_urltime_avg1", (double) utimeSum1 / rcount);
prop.putNum("page_snippettime_avg1", (double) stimeSum1 / rcount);
@@ -274,12 +279,12 @@ public class AccessTracker_p {
// return empty values to not break the table view if no results can be listed
if (m==0) {
prop.put("page_list", 1);
- prop.put("page_list_0_dates_0_date", " ");
+ prop.put("page_list_0_dates_0_date", "");
prop.put("page_list_0_dates", 1);
prop.putNum("page_list_0_qph", "");
prop.put("page_list_0_dark", 1 );
- prop.put("page_list_0_peername", " ");
- prop.put("page_list_0_host", " ");
+ prop.put("page_list_0_peername", "");
+ prop.put("page_list_0_host", "");
prop.putNum("page_list_0_count", "");
} else {
prop.put("page_list", m);
diff --git a/htroot/AccessTracker_p.xml b/htroot/AccessTracker_p.xml
index 05af4b87e..c5c74d160 100644
--- a/htroot/AccessTracker_p.xml
+++ b/htroot/AccessTracker_p.xml
@@ -25,6 +25,7 @@
#[date]#
#[offset]#
#[querycount]#
+ #[transmitcount]#
#[resultcount]#
#[resulttime]#
#[urltime]#
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 775f6ace8..99c947dbc 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -382,6 +382,7 @@ public final class search {
links.append("resource").append(i).append('=').append(resource).append(serverCore.CRLF_STRING);
}
}
+ theQuery.transmitcount = accu.size() + 1;
prop.put("links", links.toString());
prop.put("linkcount", accu.size());
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.RESULTLIST, "", accu.size(), System.currentTimeMillis() - timer), false);
diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss
index 78be4eb27..1a4f0132d 100644
--- a/htroot/yacysearch.rss
+++ b/htroot/yacysearch.rss
@@ -22,11 +22,9 @@
#[num-results_itemsPerPage]#
-
#{results}#
#{/results}#
-
\ No newline at end of file
diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java
index 74d9bbe92..c1d0e2c05 100644
--- a/htroot/yacysearch_location.java
+++ b/htroot/yacysearch_location.java
@@ -93,7 +93,7 @@ public class yacysearch_location {
// get a queue of search results
String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8080") + "/yacysearch.rss";
BlockingQueue results = new LinkedBlockingQueue();
- SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false);
+ SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false, null);
// take the results and compute some locations
RSSMessage message;
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index 5e833b877..231c0811e 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -172,7 +172,7 @@ public class yacysearchitem {
} else {
prop.put("content_code", "");
}
-
+ theQuery.transmitcount = item + 1;
return prop;
}
@@ -203,6 +203,7 @@ public class yacysearchitem {
prop.put("content_item_nl", (item == 0) ? 0 : 1);
prop.put("content_item", 1);
}
+ theQuery.transmitcount = item + 1;
return prop;
}
@@ -232,6 +233,7 @@ public class yacysearchitem {
} else {
prop.put("content_items", "0");
}
+ theQuery.transmitcount = item + 1;
return prop;
}
diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java
index 35b78d413..7c0d5f1e4 100644
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@@ -99,6 +99,7 @@ public final class QueryParams {
public final Long time;
// values that are set after a search:
public int resultcount; // number of found results
+ public int transmitcount; // number of results that had been shown to the user
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
public final String userAgent;
@@ -152,6 +153,7 @@ public final class QueryParams {
this.navigators = "all";
this.indexSegment = indexSegment;
this.userAgent = userAgent;
+ this.transmitcount = 0;
}
public QueryParams(
@@ -206,6 +208,7 @@ public final class QueryParams {
this.specialRights = specialRights;
this.indexSegment = indexSegment;
this.userAgent = userAgent;
+ this.transmitcount = 0;
}
public Segment getSegment() {
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 2c9ba9002..9d68bb4f3 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -372,7 +372,7 @@ public final class yacyClient {
public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException {
String address = (targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") : targetSeed.getClusterAddress();
String urlBase = "http://" + address + "/yacysearch.rss";
- return SearchSRURSS.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global);
+ return SearchSRURSS.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global, null);
}
@SuppressWarnings("unchecked")
diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java
index 7d3864cd6..f18112b94 100644
--- a/source/net/yacy/cora/protocol/http/HTTPClient.java
+++ b/source/net/yacy/cora/protocol/http/HTTPClient.java
@@ -114,7 +114,7 @@ public class HTTPClient {
HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent);
}
- private static HttpClient initConnectionManager() {
+ public static HttpClient initConnectionManager() {
// Create and initialize HTTP parameters
final HttpParams httpParams = new BasicHttpParams();
/**
diff --git a/source/net/yacy/cora/services/SearchHub.java b/source/net/yacy/cora/services/SearchHub.java
index fd3e27511..ed08073cf 100644
--- a/source/net/yacy/cora/services/SearchHub.java
+++ b/source/net/yacy/cora/services/SearchHub.java
@@ -33,10 +33,13 @@ import net.yacy.cora.storage.ScoreMap;
public class SearchHub {
private static final String[] SRURSSServicesList = {
+ //"http://192.168.1.51:8000/yacysearch.rss"//,
+ "http://localhost:8008/yacysearch.rss"//,
+ /*
"http://yacy.dyndns.org:8000/yacysearch.rss",
"http://yacy.caloulinux.net:8085/yacysearch.rss",
"http://algire.dyndns.org:8085/yacysearch.rss",
- "http://breyvogel.dyndns.org:8002/yacysearch.rss"
+ "http://breyvogel.dyndns.org:8002/yacysearch.rss"*/
};
public final static SearchHub EMPTY = new SearchHub("", 0);
@@ -136,20 +139,23 @@ public class SearchHub {
* @param verify
* @param global
*/
- public static void addSRURSSServices(SearchHub search, String[] rssServices, int count, boolean verify, boolean global) {
+ public static void addSRURSSServices(SearchHub search, String[] rssServices, int count, boolean verify, boolean global, String userAgent) {
for (String service: rssServices) {
- SearchSRURSS accumulator = new SearchSRURSS(search, service, count, verify, global);
+ SearchSRURSS accumulator = new SearchSRURSS(search, service, count, verify, global, userAgent);
accumulator.start();
search.addAccumulator(accumulator);
}
}
public static void main(String[] args) {
+ HTTPClient.setDefaultUserAgent("searchhub");
+ HTTPClient.initConnectionManager();
+
StringBuilder sb = new StringBuilder();
for (String s: args) sb.append(s).append(' ');
String query = sb.toString().trim();
SearchHub search = new SearchHub(query, 10000);
- addSRURSSServices(search, SRURSSServicesList, 100, false, false);
+ addSRURSSServices(search, SRURSSServicesList, 100, false, false, "searchhub");
try {Thread.sleep(100);} catch (InterruptedException e1) {}
search.waitTermination();
ScoreMap result = search.getResults();
@@ -159,6 +165,6 @@ public class SearchHub {
u = i.next();
System.out.println("[" + result.get(u) + "] " + u);
}
- try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) {}
+ try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) { e.printStackTrace(); }
}
}
diff --git a/source/net/yacy/cora/services/SearchSRURSS.java b/source/net/yacy/cora/services/SearchSRURSS.java
index a208468e1..5da00952e 100644
--- a/source/net/yacy/cora/services/SearchSRURSS.java
+++ b/source/net/yacy/cora/services/SearchSRURSS.java
@@ -50,6 +50,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
final boolean verify;
final boolean global;
final Map> result;
+ final String userAgent;
private final BlockingQueue results;
@@ -60,7 +61,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
final String urlBase,
final int maximumRecordsInit,
final boolean verify,
- final boolean global) {
+ final boolean global,
+ final String userAgent) {
this.results = new LinkedBlockingQueue();
this.result = result;
this.query = query;
@@ -69,6 +71,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
this.maximumRecordsInit = maximumRecordsInit;
this.verify = verify;
this.global = global;
+ this.userAgent = userAgent;
}
public SearchSRURSS(
@@ -76,7 +79,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
final String urlBase,
final int maximumRecordsInit,
final boolean verify,
- final boolean global) {
+ final boolean global,
+ final String userAgent) {
this.results = new LinkedBlockingQueue();
this.result = search.getAccumulation();
this.query = search.getQuery();
@@ -85,10 +89,11 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
this.maximumRecordsInit = maximumRecordsInit;
this.verify = verify;
this.global = global;
+ this.userAgent = userAgent;
}
public void run() {
- searchSRURSS(results, urlBase, query, timeoutInit, maximumRecordsInit, verify, global);
+ searchSRURSS(results, urlBase, query, timeoutInit, maximumRecordsInit, verify, global, userAgent);
int p = 1;
RSSMessage message;
try {
@@ -111,7 +116,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
final long timeoutInit,
final int maximumRecordsInit,
final boolean verify,
- final boolean global) {
+ final boolean global,
+ final String userAgent) {
Thread job = new Thread() {
public void run() {
int startRecord = 0;
@@ -122,8 +128,9 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
long st = System.currentTimeMillis();
RSSFeed feed;
try {
- feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global);
+ feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, userAgent);
} catch (IOException e1) {
+ e1.printStackTrace();
break mainloop;
}
if (feed == null || feed.isEmpty()) break mainloop;
@@ -134,13 +141,14 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
try {
queue.put(message);
} catch (InterruptedException e) {
+ e.printStackTrace();
break innerloop;
}
}
startRecord += recordsPerSession;
timeout -= System.currentTimeMillis() - st;
}
- try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {}
+ try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) { e.printStackTrace(); }
}
};
job.start();
@@ -165,7 +173,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
int startRecord,
int maximumRecords,
boolean verify,
- boolean global) throws IOException {
+ boolean global,
+ String userAgent) throws IOException {
MultiProtocolURI uri = null;
try {
uri = new MultiProtocolURI(rssSearchServiceURL);
@@ -181,8 +190,9 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
parts.put("verify", new StringBody(verify ? "true" : "false"));
parts.put("resource", new StringBody(global ? "global" : "local"));
- final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
- //String debug = new String(result); System.out.println("*** DEBUG: " + debug);
+ parts.put("nav", new StringBody("none"));
+ final byte[] result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
+ String debug = new String(result); System.out.println("*** DEBUG: " + debug);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 1271e9f78..c42f6d7ea 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -265,7 +265,7 @@ public final class LoaderDispatcher {
if (protocol.equals("ftp")) response = ftpLoader.load(request, true);
if (protocol.equals("smb")) response = smbLoader.load(request, true);
if (protocol.equals("file")) response = fileLoader.load(request, true);
- if (response != null) {
+ if (response != null && response.getContent() != null) {
// we got something. Now check if we want to store that to the cache
// first check looks if we want to store the content to the cache
if (!crawlProfile.storeHTCache()) {