|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
// yacyClient.java
|
|
|
|
|
// Protocol.java
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
// (C) by Michael Peter Christen; mc@yacy.net
|
|
|
|
|
// first published on http://www.anomic.de
|
|
|
|
@ -405,7 +405,8 @@ public final class Protocol {
|
|
|
|
|
final SeedDB seedDB,
|
|
|
|
|
final Seed target,
|
|
|
|
|
final int maxCount,
|
|
|
|
|
final long maxTime) {
|
|
|
|
|
final long maxTime,
|
|
|
|
|
final boolean preferHttps) {
|
|
|
|
|
// returns a list of
|
|
|
|
|
if ( target == null ) {
|
|
|
|
|
return null;
|
|
|
|
@ -430,15 +431,37 @@ public final class Protocol {
|
|
|
|
|
// final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
|
|
|
|
|
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, (int) maxTime);
|
|
|
|
|
RSSReader reader = null;
|
|
|
|
|
for (String ip: target.getIPs()) {
|
|
|
|
|
for (final String ip: target.getIPs()) {
|
|
|
|
|
MultiProtocolURL targetBaseURL = null;
|
|
|
|
|
try {
|
|
|
|
|
final byte[] result = httpClient.POSTbytes(new MultiProtocolURL("http://" + target.getPublicAddress(ip) + "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
|
|
|
|
|
byte[] result;
|
|
|
|
|
try {
|
|
|
|
|
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
} catch(final IOException e) {
|
|
|
|
|
if(targetBaseURL.isHTTPS()) {
|
|
|
|
|
/* Failed with https : retry with http */
|
|
|
|
|
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
|
|
|
|
|
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
if(result != null) {
|
|
|
|
|
/* Got something with http : mark peer SSL as unavailable on target peer */
|
|
|
|
|
markSSLUnavailableOnPeer(seedDB, target, ip, "yacyClient.queryRemoteCrawlURLs");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
|
|
|
|
|
} catch(MalformedURLException e) {
|
|
|
|
|
Network.log.warn("yacyClient.queryRemoteCrawlURLs malformed target URL for peer '" + target.getName()
|
|
|
|
|
+ "' on address : " + ip);
|
|
|
|
|
} catch (final IOException e ) {
|
|
|
|
|
reader = null;
|
|
|
|
|
Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
|
|
|
|
|
}
|
|
|
|
|
if (reader != null) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (reader != null) break;
|
|
|
|
|
Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
|
|
|
|
|
target.put(Seed.RCOUNT, "0");
|
|
|
|
|
seedDB.peerActions.interfaceDeparture(target, ip);
|
|
|
|
|
}
|
|
|
|
@ -1218,13 +1241,13 @@ public final class Protocol {
|
|
|
|
|
/* Thread still running : try also with interrupt*/
|
|
|
|
|
remoteRequest.interrupt();
|
|
|
|
|
}
|
|
|
|
|
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress(ip) + " does not answer (time-out)");
|
|
|
|
|
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + targetBaseURL + " does not answer (time-out)");
|
|
|
|
|
target.setFlagSolrAvailable(false || myseed);
|
|
|
|
|
return -1; // give up, leave remoteRequest abandoned.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rsp[0] == null || docList[0] == null) {
|
|
|
|
|
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress(ip) + " returned null");
|
|
|
|
|
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + targetBaseURL + " returned null");
|
|
|
|
|
if(!myseed) {
|
|
|
|
|
if(targetBaseURL.startsWith("https")) {
|
|
|
|
|
/* First mark https unavailable on this peer before removing anything else */
|
|
|
|
@ -1515,6 +1538,7 @@ public final class Protocol {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static Map<String, String> crawlReceipt(
|
|
|
|
|
final Switchboard sb,
|
|
|
|
|
final Seed mySeed,
|
|
|
|
|
final Seed target,
|
|
|
|
|
final String process,
|
|
|
|
@ -1548,49 +1572,60 @@ public final class Protocol {
|
|
|
|
|
|
|
|
|
|
// prepare request
|
|
|
|
|
final String salt = crypt.randomSalt();
|
|
|
|
|
|
|
|
|
|
final boolean preferHttps = sb.getConfigBool(SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED,
|
|
|
|
|
SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED_DEFAULT);
|
|
|
|
|
|
|
|
|
|
// determining target address
|
|
|
|
|
final String address = target.getPublicAddress(target.getIP());
|
|
|
|
|
if ( address == null ) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// send request
|
|
|
|
|
try {
|
|
|
|
|
// prepare request
|
|
|
|
|
final Map<String, ContentBody> parts = basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
|
|
|
|
parts.put("process", UTF8.StringBody(process));
|
|
|
|
|
parts.put("urlhash", UTF8.StringBody(((entry == null) ? "" : ASCII.String(entry.hash()))));
|
|
|
|
|
parts.put("result", UTF8.StringBody(result));
|
|
|
|
|
parts.put("reason", UTF8.StringBody(reason));
|
|
|
|
|
parts.put("wordh", UTF8.StringBody(wordhashes));
|
|
|
|
|
final String lurlstr;
|
|
|
|
|
if (entry == null) {
|
|
|
|
|
lurlstr = "";
|
|
|
|
|
} else {
|
|
|
|
|
final ArrayList<String> ldesc = entry.getDescription();
|
|
|
|
|
if (ldesc.isEmpty()) {
|
|
|
|
|
lurlstr = entry.toString();
|
|
|
|
|
} else { // add document abstract/description as snippet (remotely stored in description_txt)
|
|
|
|
|
lurlstr = entry.toString(ldesc.get(0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
parts.put("lurlEntry", UTF8.StringBody(crypt.simpleEncode(lurlstr, salt)));
|
|
|
|
|
// send request
|
|
|
|
|
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
|
|
|
|
|
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000);
|
|
|
|
|
final byte[] content =
|
|
|
|
|
httpClient.POSTbytes(
|
|
|
|
|
new MultiProtocolURL("http://" + address + "/yacy/crawlReceipt.html"),
|
|
|
|
|
target.getHexHash() + ".yacyh",
|
|
|
|
|
parts,
|
|
|
|
|
false, true);
|
|
|
|
|
return FileUtils.table(content);
|
|
|
|
|
} catch (final Exception e ) {
|
|
|
|
|
// most probably a network time-out exception
|
|
|
|
|
Network.log.warn("yacyClient.crawlReceipt error:" + e.getMessage());
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
for (final String ip : target.getIPs()) {
|
|
|
|
|
// send request
|
|
|
|
|
try {
|
|
|
|
|
// prepare request
|
|
|
|
|
final Map<String, ContentBody> parts = basicRequestParts(sb, target.hash, salt);
|
|
|
|
|
parts.put("process", UTF8.StringBody(process));
|
|
|
|
|
parts.put("urlhash", UTF8.StringBody(((entry == null) ? "" : ASCII.String(entry.hash()))));
|
|
|
|
|
parts.put("result", UTF8.StringBody(result));
|
|
|
|
|
parts.put("reason", UTF8.StringBody(reason));
|
|
|
|
|
parts.put("wordh", UTF8.StringBody(wordhashes));
|
|
|
|
|
final String lurlstr;
|
|
|
|
|
if (entry == null) {
|
|
|
|
|
lurlstr = "";
|
|
|
|
|
} else {
|
|
|
|
|
final ArrayList<String> ldesc = entry.getDescription();
|
|
|
|
|
if (ldesc.isEmpty()) {
|
|
|
|
|
lurlstr = entry.toString();
|
|
|
|
|
} else { // add document abstract/description as snippet (remotely stored in description_txt)
|
|
|
|
|
lurlstr = entry.toString(ldesc.get(0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
parts.put("lurlEntry", UTF8.StringBody(crypt.simpleEncode(lurlstr, salt)));
|
|
|
|
|
// send request
|
|
|
|
|
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000);
|
|
|
|
|
MultiProtocolURL targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
|
|
|
|
|
byte[] content;
|
|
|
|
|
try {
|
|
|
|
|
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
|
|
|
|
|
target.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
} catch(final IOException e) {
|
|
|
|
|
if(targetBaseURL.isHTTPS()) {
|
|
|
|
|
/* Failed using https : retry with http */
|
|
|
|
|
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
|
|
|
|
|
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
|
|
|
|
|
target.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
if(content != null) {
|
|
|
|
|
/* Success with http : mark SSL as unavailable on the target peer */
|
|
|
|
|
markSSLUnavailableOnPeer(sb.peers, target, ip, "yacyClient.crawlReceipt");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return FileUtils.table(content);
|
|
|
|
|
} catch (final Exception e ) {
|
|
|
|
|
// most probably a network time-out exception
|
|
|
|
|
Network.log.warn("yacyClient.crawlReceipt error:" + e.getMessage());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static AtomicInteger metadataRetrievalRunning = new AtomicInteger(0);
|
|
|
|
@ -1798,9 +1833,7 @@ public final class Protocol {
|
|
|
|
|
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
|
|
|
|
|
if(content != null) {
|
|
|
|
|
/* Success with http : mark SSL as unavailable on the target peer */
|
|
|
|
|
Network.log.info("yacyClient.transferRWI SSL unavailable on address " + ip);
|
|
|
|
|
targetSeed.setFlagSSLAvailable(false);
|
|
|
|
|
Switchboard.getSwitchboard().peers.updateConnected(targetSeed);
|
|
|
|
|
markSSLUnavailableOnPeer(Switchboard.getSwitchboard().peers, targetSeed, ip, "yacyClient.transferRWI");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
throw e;
|
|
|
|
@ -1923,29 +1956,42 @@ public final class Protocol {
|
|
|
|
|
/**
|
|
|
|
|
* Receive remote peers profile data
|
|
|
|
|
*
|
|
|
|
|
* @param sb a Switchboard instance holding server environment
|
|
|
|
|
* @param targetSeed
|
|
|
|
|
* @return profile or null
|
|
|
|
|
*/
|
|
|
|
|
public static Map<String, String> getProfile(final Seed targetSeed) {
|
|
|
|
|
public static Map<String, String> getProfile(final Switchboard sb, final Seed targetSeed) {
|
|
|
|
|
|
|
|
|
|
// this post a message to the remote message board
|
|
|
|
|
final String salt = crypt.randomSalt();
|
|
|
|
|
|
|
|
|
|
final boolean preferHttps = sb.getConfigBool(SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED,
|
|
|
|
|
SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED_DEFAULT);
|
|
|
|
|
|
|
|
|
|
for (String ip : targetSeed.getIPs()) {
|
|
|
|
|
String address = targetSeed.getPublicAddress(ip);
|
|
|
|
|
if ( address == null ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
for (final String ip : targetSeed.getIPs()) {
|
|
|
|
|
try {
|
|
|
|
|
final Map<String, ContentBody> parts =
|
|
|
|
|
basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
|
|
|
|
basicRequestParts(sb, targetSeed.hash, salt);
|
|
|
|
|
final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 15000);
|
|
|
|
|
final byte[] content =
|
|
|
|
|
httpclient.POSTbytes(
|
|
|
|
|
new MultiProtocolURL("http://" + address + "/yacy/profile.html"),
|
|
|
|
|
targetSeed.getHexHash() + ".yacyh",
|
|
|
|
|
parts,
|
|
|
|
|
false, true);
|
|
|
|
|
MultiProtocolURL targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, preferHttps);
|
|
|
|
|
byte[] content;
|
|
|
|
|
try {
|
|
|
|
|
content = httpclient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/profile.html"),
|
|
|
|
|
targetSeed.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
} catch(final IOException e) {
|
|
|
|
|
if(targetBaseURL.isHTTPS()) {
|
|
|
|
|
/* Failed with https : retry using http */
|
|
|
|
|
targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false);
|
|
|
|
|
content = httpclient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/profile.html"),
|
|
|
|
|
targetSeed.getHexHash() + ".yacyh", parts, false, true);
|
|
|
|
|
if(content != null) {
|
|
|
|
|
/* Got something with http : mark peer SSL as unavailable on target peer */
|
|
|
|
|
markSSLUnavailableOnPeer(sb.peers, targetSeed, ip, "yacyClient.getProfile");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return FileUtils.table(content);
|
|
|
|
|
} catch (final Exception e ) {
|
|
|
|
|
Network.log.warn("yacyClient.getProfile error:" + e.getMessage());
|
|
|
|
@ -2115,4 +2161,26 @@ public final class Protocol {
|
|
|
|
|
return "?" + sb.toString().substring(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Mark a SSL/TLS as unavailable on a connected peer and log an information
|
|
|
|
|
* level message. Use when http is successful whereas https is not on the target
|
|
|
|
|
* peer. All parameters must not be null.
|
|
|
|
|
*
|
|
|
|
|
* @param seedDB
|
|
|
|
|
* the seeds database to update
|
|
|
|
|
* @param peer
|
|
|
|
|
* the peer to update
|
|
|
|
|
* @param address
|
|
|
|
|
* the address on peer where http is successful but https fails.
|
|
|
|
|
* @param logPrefix
|
|
|
|
|
* a prefix to the log message
|
|
|
|
|
*/
|
|
|
|
|
private static void markSSLUnavailableOnPeer(final SeedDB seedDB, final Seed peer, final String address,
|
|
|
|
|
final String logPrefix) {
|
|
|
|
|
Network.log.info(logPrefix + " SSL/TLS unavailable on peer '" + peer.getName()
|
|
|
|
|
+ "' : can be reached using http but not https on address " + address);
|
|
|
|
|
peer.setFlagSSLAvailable(false);
|
|
|
|
|
seedDB.updateConnected(peer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|