From 7c1707872bd62ffa3becaa9d7112d288b48aa9c8 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 30 Sep 2014 22:22:13 +0200 Subject: [PATCH 01/17] search result showPicture update search parameter used parameter &cat=image is obsolete and returns no results - remove &cat=image and &cat=href references - remove &tenant= references (unused) Use contentdom=image and inurl: parameter to make showPicture link display something (open in new window because of used inurl modifier changes original query) --- htroot/index.java | 2 -- htroot/yacysearch.html | 1 - htroot/yacysearch.java | 5 +---- htroot/yacysearchitem.html | 2 +- htroot/yacysearchtrailer.java | 4 ++-- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/htroot/index.java b/htroot/index.java index c35928d25..49bf3e8ce 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -79,7 +79,6 @@ public class index { final int maximumRecords = sb.getConfigInt(SwitchboardConstants.SEARCH_ITEMS, 10); final String prefermaskfilter = (post == null) ? "" : post.get("prefermaskfilter", ""); final String constraint = (post == null) ? "" : post.get("constraint", ""); - final String cat = (post == null) ? "href" : post.get("cat", "href"); final int type = (post == null) ? 0 : post.getInt("type", 0); //final boolean indexDistributeGranted = sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW, true); @@ -118,7 +117,6 @@ public class index { prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter); prop.put("searchoptions_indexofChecked", ""); prop.put("results", ""); - prop.putHTML("cat", cat); prop.put("type", type); prop.put("depth", "0"); prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0); diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 188ed91fa..4c78087a6 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -106,7 +106,6 @@ To see a list of all APIs, please visit the - diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 9a81fb857..5107927fd 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -168,10 +168,8 @@ public class yacysearch { prop.put("resource", "global"); prop.put("urlmaskfilter", (post == null) ? ".*" : post.get("urlmaskfilter", ".*")); prop.put("prefermaskfilter", (post == null) ? "" : post.get("prefermaskfilter", "")); - prop.put("tenant", (post == null) ? "" : post.get("tenant", "")); prop.put("indexof", "off"); prop.put("constraint", ""); - prop.put("cat", "href"); prop.put("depth", "0"); prop.put( "search.verify", @@ -348,7 +346,7 @@ public class yacysearch { } } - if ( !block && (post == null || post.get("cat", "href").equals("href")) ) { + if ( !block ) { String urlmask = null; String tld = null; String inlink = null; @@ -858,7 +856,6 @@ public class yacysearch { prop.put("num-results", "3"); } - prop.put("cat", "href"); prop.put("depth", "0"); } diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 2f1a2b430..a8e452804 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -29,7 +29,7 @@ #(showMetadata)#:: | Metadata#(/showMetadata)# #(showParser)#:: | Parser#(/showParser)# #(showCitation)#:: | Citations#(/showCitation)# - #(showPictures)#:: | Pictures#(/showPictures)# + #(showPictures)#:: | Pictures#(/showPictures)# #(showCache)#:: | Cache#(/showCache)# #(showProxy)#:: | Augmented Browsing#(/showProxy)# #(showHostBrowser)#:: | #(/showHostBrowser)# diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 6dd8cabe6..8e1558a5f 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -525,5 +525,5 @@ public class yacysearchtrailer { } } -//http://localhost:8090/yacysearch.html?query=java+&maximumRecords=10&resource=local&verify=cacheonly&nav=hosts,authors,namespace,topics,filetype,protocol&urlmaskfilter=ftp://.*&prefermaskfilter=&cat=href&constraint=&contentdom=text&former=java+%2Fftp&startRecord=0 -//http://localhost:8090/yacysearch.html?query=java+&maximumRecords=10&resource=local&verify=cacheonly&nav=hosts,authors,namespace,topics,filetype,protocol&urlmaskfilter=.*&prefermaskfilter=&cat=href&constraint=&contentdom=text&former=java+%2Fvocabulary%2FGewerke%2FTore&startRecord=0 +//http://localhost:8090/yacysearch.html?query=java+&maximumRecords=10&resource=local&verify=cacheonly&nav=hosts,authors,namespace,topics,filetype,protocol&urlmaskfilter=ftp://.*&prefermaskfilter=&constraint=&contentdom=text&former=java+%2Fftp&startRecord=0 +//http://localhost:8090/yacysearch.html?query=java+&maximumRecords=10&resource=local&verify=cacheonly&nav=hosts,authors,namespace,topics,filetype,protocol&urlmaskfilter=.*&prefermaskfilter=&constraint=&contentdom=text&former=java+%2Fvocabulary%2FGewerke%2FTore&startRecord=0 From 65e6ae52fb6df83920b6bb667a4486c0ca67e532 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 1 Oct 2014 03:10:39 +0200 Subject: [PATCH 02/17] IPv6-enhanced Network monitoring page --- htroot/Network.csv | 1 - htroot/Network.html | 65 ++++++------- htroot/Network.java | 62 +++++++----- htroot/Network.xml | 11 +-- .../{RootStateNo.gif => NodeDisqualified.gif} | Bin htroot/env/grafics/NodeDisqualifiedIPv4.gif | Bin 0 -> 95 bytes htroot/env/grafics/NodeDisqualifiedIPv6.gif | Bin 0 -> 94 bytes .../{RootStateYes.gif => NodeQualified.gif} | Bin htroot/env/grafics/NodeQualifiedIPv4.gif | Bin 0 -> 124 bytes htroot/env/grafics/NodeQualifiedIPv6.gif | Bin 0 -> 94 bytes source/net/yacy/peers/Network.java | 92 ++++-------------- source/net/yacy/peers/Protocol.java | 20 ++-- 12 files changed, 99 insertions(+), 152 deletions(-) delete mode 100644 htroot/Network.csv rename htroot/env/grafics/{RootStateNo.gif => NodeDisqualified.gif} (100%) create mode 100644 htroot/env/grafics/NodeDisqualifiedIPv4.gif create mode 100644 htroot/env/grafics/NodeDisqualifiedIPv6.gif rename htroot/env/grafics/{RootStateYes.gif => NodeQualified.gif} (100%) create mode 100644 htroot/env/grafics/NodeQualifiedIPv4.gif create mode 100644 htroot/env/grafics/NodeQualifiedIPv6.gif diff --git a/htroot/Network.csv b/htroot/Network.csv deleted file mode 100644 index edcd00696..000000000 --- a/htroot/Network.csv +++ /dev/null @@ -1 +0,0 @@ -#(table)#:: ::#[active-count]#;#[active-links]#;#[active-words]#;#[passive-count]#;#[passive-links]#;#[passive-words]#;#[potential-count]#;#[potential-links]#;#[potential-words]#;#[all-count]#;#[all-links]#;#[all-words]#;#[my-links]#;#[my-words]##(/table)# diff --git a/htroot/Network.html b/htroot/Network.html index a96c2888c..cc745c8d2 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -104,54 +104,43 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window. - + + + + - - - + + - + - #(complete)#:: - - - - - - - - #(/complete)# + + #(c)#::#(/c)# + #{list}# - - + + + #(type)##(direct)#Junior passive::Junior direct::Junior offline#(/direct)#::#(direct)#senior passive::Senior direct::Senior offline#(/direct)#::#(direct)#Principal passive::Principal active::Principal offline#(/direct)##(/type)##(acceptcrawl)#no crawl::crawl possible::crawl possible#(/acceptcrawl)##(dhtreceive)#no DHT receive::DHT receive enabled::DHT receive enabled#(/dhtreceive)##{ips}##{/ips}# + + + - @@ -160,16 +149,18 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window. - #(complete)# - :: - - - - - - - - #(/complete)# + + #(c)#::#(/c)# + #{/list}#
send Message/
show Profile/
edit Wiki/
browse Blog
Hash Name
Info ReleaseAge#Seedscon/h
PPM QPH Last
Seen
UTC
Offset
Location
Uptime Links RWIsURLs for
Remote
Crawl
Sent
DHT Word Chunks
URLs
for
Remote
Crawl
Sent DHT
Word Chunks
Sent
URLs
Received
DHT Word Chunks
Received DHT
Word Chunks
Received
URLs
AddressHashAge#Seedscon/h
user agent
Peer
Ping
Location
user agent
send Message/
show Profile/
edit Wiki/
browse Blog
- m  - p  - w  - b  - #(updatedProfile)#::Profile updated#(/updatedProfile)# - #(updatedWiki)#::Wiki updated#(/updatedWiki)# - #(updatedBlog)#::Blog updated#(/updatedBlog)# - #(isCrawling)#::Crawl#(/isCrawling)# - #[shortname]##(ssl)#::https supported#(/ssl)##[hash]##[shortname]##(ssl)#::https supported#(/ssl)# - #(type)##(direct)#Junior passive::Junior direct::Junior offline#(/direct)#::#(direct)#senior passive::Senior direct::Senior offline#(/direct)#::#(direct)#Principal passive::Principal active::Principal offline#(/direct)##(/type)##(acceptcrawl)#no crawl::crawl possible::crawl possible#(/acceptcrawl)##(dhtreceive)#no DHT receive::DHT receive enabled::DHT receive enabled#(/dhtreceive)##(nodestate)#no node candidate::node candidate#(/nodestate)# #[version]##[age]##[seeds]##[connects]# #[ppm]# #[qph]# #[lastSeen]# #[utc]##[location]# #[uptime]# #[LCount]# #[ICount]##[sU]# #[rI]# #[rU]#http://#[ip]#:#[port]# #[ips]##[hash]##[age]##[seeds]##[connects]##[userAgent]#[Ping]#[location]##[userAgent]# + m  + p  + w  + b  + #(updatedProfile)#::Profile updated#(/updatedProfile)# + #(updatedWiki)#::Wiki updated#(/updatedWiki)# + #(updatedBlog)#::Blog updated#(/updatedBlog)# + #(isCrawling)#::Crawl#(/isCrawling)# +
diff --git a/htroot/Network.java b/htroot/Network.java index 8b070dd22..38c5599cd 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -32,6 +32,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.regex.Matcher; @@ -210,19 +211,21 @@ public class Network { Seed peer = new Seed(post.get("peerHash"), map); sb.updateMySeed(); - final int added = Protocol.hello(sb.peers.mySeed(), sb.peers.peerActions, peer); + final Map response = Protocol.hello(sb.peers.mySeed(), sb.peers.peerActions, peer); - if (added <= 0) { + if (response == null) { prop.put("table_comment",1); - prop.putHTML("table_comment_status","publish: disconnected peer '" + peer.getName() + "/" + post.get("peerHash") + "' from " + peer.getIPs()); + prop.putHTML("table_comment_status","publish: no response from peer '" + peer.getName() + "/" + post.get("peerHash") + "' from " + peer.getIPs()); } else { + String yourtype = response.get("yourtype"); + String yourip = response.get("yourip"); peer = sb.peers.getConnected(peer.hash); if (peer == null) { prop.put("table_comment",1); - prop.putHTML("table_comment_status","publish: disconnected peer 'UNKNOWN/" + post.get("peerHash") + "' from UNKNOWN"); + prop.putHTML("table_comment_status","publish: disconnected peer 'UNKNOWN/" + post.get("peerHash") + "' from UNKNOWN, yourtype = " + yourtype + ", yourip = " + yourip); } else { prop.put("table_comment",2); - prop.putHTML("table_comment_status","publish: handshaked " + peer.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + peer.getName() + "' at " + peer.getIPs()); + prop.putHTML("table_comment_status","publish: handshaked " + peer.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + peer.getName() + "' at " + peer.getIPs() +", yourtype = " + yourtype + ", yourip = " + yourip); prop.putHTML("table_comment_details",peer.toString()); } } @@ -285,7 +288,7 @@ public class Network { boolean dark = true; Seed seed; - final boolean complete = (post != null && post.containsKey("ip")); + final boolean c = (post != null && post.containsKey("ip")); final boolean onlyIncomingDHT = (post != null && post.containsKey("onlydhtin")); final boolean onlyNode = (post != null && post.containsKey("onlynode")); final long onlyAgeOverDays = post == null ? 0 : post.getLong("onlyageoverdays", 0); @@ -341,6 +344,7 @@ public class Network { prop.put(STR_TABLE_LIST + conCount + "_updatedWikiPage", 0); prop.put(STR_TABLE_LIST + conCount + "_updatedBlog", 0); prop.put(STR_TABLE_LIST + conCount + "_isCrawling", 0); + String ip = seed.getIP(); if (conCount >= maxCount) { break; } if (sb.peers != null && sb.peers.mySeed() != null && seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) { prop.put(STR_TABLE_LIST + conCount + "_dark", 2); @@ -356,7 +360,7 @@ public class Network { } else { prop.put(STR_TABLE_LIST + conCount + "_updatedWiki", 1); prop.putHTML(STR_TABLE_LIST + conCount + "_updatedWiki_page", wikiMap.get("page")); - prop.put(STR_TABLE_LIST + conCount + "_updatedWiki_address", seed.getPublicAddress()); + prop.put(STR_TABLE_LIST + conCount + "_updatedWiki_address", seed.getPublicAddress(ip)); } if ((blogMap = updatedBlog.get(seed.hash)) == null) { prop.put(STR_TABLE_LIST + conCount + "_updatedBlog", 0); @@ -364,7 +368,7 @@ public class Network { prop.put(STR_TABLE_LIST + conCount + "_updatedBlog", 1); prop.putHTML(STR_TABLE_LIST + conCount + "_updatedBlog_page", blogMap.get("page")); prop.putHTML(STR_TABLE_LIST + conCount + "_updatedBlog_subject", blogMap.get("subject")); - prop.put(STR_TABLE_LIST + conCount + "_updatedBlog_address", seed.getPublicAddress()); + prop.put(STR_TABLE_LIST + conCount + "_updatedBlog_address", seed.getPublicAddress(ip)); } PPM = seed.getPPM(); QPM = seed.getQPM(); @@ -386,30 +390,39 @@ public class Network { userAgent = ClientIdentification.yacyInternetCrawlerAgent.userAgent; location = ClientIdentification.generateLocation(); } else { - userAgent = sb.peers.peerActions.getUserAgent(seed.getIP()); + userAgent = sb.peers.peerActions.getUserAgent(ip); location = ClientIdentification.parseLocationInUserAgent(userAgent); } if (location.length() > 10) location = location.substring(0, 10); if (location.length() == 0) { - Locale l = Domains.getLocale(seed.getIP()); + Locale l = Domains.getLocale(ip); if (l != null) location = l.toString(); } prop.putHTML(STR_TABLE_LIST + conCount + "_location", location); - if (complete) { - prop.put(STR_TABLE_LIST + conCount + "_complete", 1); - prop.putHTML(STR_TABLE_LIST + conCount + "_complete_ip", seed.getIP() ); - prop.putHTML(STR_TABLE_LIST + conCount + "_complete_ips", seed.getIPs().toString() ); - prop.put(STR_TABLE_LIST + conCount + "_complete_port", seed.get(Seed.PORT, "-") ); - prop.put(STR_TABLE_LIST + conCount + "_complete_hash", seed.hash); - prop.put(STR_TABLE_LIST + conCount + "_complete_age", seed.getAge()); - prop.putNum(STR_TABLE_LIST + conCount + "_complete_seeds", seed.getLong(Seed.SCOUNT, 0L)); - prop.putNum(STR_TABLE_LIST + conCount + "_complete_connects", seed.getFloat(Seed.CCOUNT, 0F)); - prop.putHTML(STR_TABLE_LIST + conCount + "_complete_userAgent", userAgent); + String port = seed.get(Seed.PORT, "-"); + Set ips = seed.getIPs(); + int ipsc = 0; + for (String s: ips) { + prop.put(STR_TABLE_LIST + conCount + "_ips_" + ipsc + "_nodestate", seed.getFlagRootNode() ? 1 : 0); + prop.put(STR_TABLE_LIST + conCount + "_ips_" + ipsc + "_c", c ? 1 : 0); + prop.putHTML(STR_TABLE_LIST + conCount + "_ips_" + ipsc + "_c_hash", seed.hash); + prop.putHTML(STR_TABLE_LIST + conCount + "_ips_" + ipsc + "_c_ip", s); + prop.putHTML(STR_TABLE_LIST + conCount + "_ips_" + ipsc + "_c_port", port); + prop.put(STR_TABLE_LIST + conCount + "_ips_" + ipsc++ + "_c_ipv6", s.indexOf(':') >= 0 ? 1 : 0); + } + prop.put(STR_TABLE_LIST + conCount + "_ips", ipsc); + prop.put(STR_TABLE_LIST + conCount + "_port", port); + prop.put(STR_TABLE_LIST + conCount + "_hash", seed.hash); + prop.put(STR_TABLE_LIST + conCount + "_age", seed.getAge()); + prop.putNum(STR_TABLE_LIST + conCount + "_seeds", seed.getLong(Seed.SCOUNT, 0L)); + prop.putNum(STR_TABLE_LIST + conCount + "_connects", seed.getFloat(Seed.CCOUNT, 0F)); + if (c) { + prop.put(STR_TABLE_LIST + conCount + "_c", 1); + prop.putHTML(STR_TABLE_LIST + conCount + "_c_userAgent", userAgent); } else { - prop.put(STR_TABLE_LIST + conCount + "_complete", 0); + prop.put(STR_TABLE_LIST + conCount + "_c", 0); } - if (seed.isJunior()) { prop.put(STR_TABLE_LIST + conCount + "_type", 0); } else if(seed.isSenior()){ @@ -447,9 +460,6 @@ public class Network { prop.put(STR_TABLE_LIST + conCount + "_dhtreceive", 0); // red/red; offline was off } } - prop.put(STR_TABLE_LIST + conCount + "_nodestate", seed.getFlagRootNode() ? 1 : 0); - prop.put(STR_TABLE_LIST + conCount + "_nodestate_ip", seed.getIP()); - prop.put(STR_TABLE_LIST + conCount + "_nodestate_port", seed.get(Seed.PORT, "-") ); if (seed.getFlagAcceptRemoteIndex()) { prop.put(STR_TABLE_LIST + conCount + "_dhtreceive_peertags", ""); } else { @@ -479,7 +489,7 @@ public class Network { prop.put("table", 1); prop.putNum("table_num", conCount); prop.putNum("table_total", ((page == 1) && (iAmActive)) ? (size + 1) : size ); - prop.put("table_complete", ((complete)? 1 : 0) ); + prop.put("table_c", ((c)? 1 : 0) ); } } prop.put("page", page); diff --git a/htroot/Network.xml b/htroot/Network.xml index eb7d0838f..a01561ea5 100644 --- a/htroot/Network.xml +++ b/htroot/Network.xml @@ -21,17 +21,16 @@ #(type_direct)#direct::passive#(/type_direct)# #[acceptcrawl]# #[dhtreceive]# - #[nodestate]# + #(nodestate)#false::true#(/nodestate)# #[location]# #[type_url]# - #(complete)# - :: #[age]# #[seeds]# #[connects]# -
#[ip]#:#[port]#
- #[userAgent]# - #(/complete)# + #{ips}##(c)##[ip]#::#[ip]##(/c)##{/ips}# + #{ips}#
#(c)##(ipv6)#::[#(/ipv6)##[ip]##(ipv6)#::]#(/ipv6)#:#[port]#::#(/c)#
#{/ips}# + #[port]# + #(c)#::#[userAgent]##(/c)# #{/list}# :: diff --git a/htroot/env/grafics/RootStateNo.gif b/htroot/env/grafics/NodeDisqualified.gif similarity index 100% rename from htroot/env/grafics/RootStateNo.gif rename to htroot/env/grafics/NodeDisqualified.gif diff --git a/htroot/env/grafics/NodeDisqualifiedIPv4.gif b/htroot/env/grafics/NodeDisqualifiedIPv4.gif new file mode 100644 index 0000000000000000000000000000000000000000..1b2921e4d2c954caa4019cb60d846445ed0ec267 GIT binary patch literal 95 zcmZ?wbhEHb#e6&5G_v$9+zdj(!v$McnyLP5Y*mjxuOrO(ylGNjrr;Fjl0{z+w3#wUzh;~~&sFWX??eU$YXFR~Bnbcj literal 0 HcmV?d00001 diff --git a/htroot/env/grafics/RootStateYes.gif b/htroot/env/grafics/NodeQualified.gif similarity index 100% rename from htroot/env/grafics/RootStateYes.gif rename to htroot/env/grafics/NodeQualified.gif diff --git a/htroot/env/grafics/NodeQualifiedIPv4.gif b/htroot/env/grafics/NodeQualifiedIPv4.gif new file mode 100644 index 0000000000000000000000000000000000000000..8da5bb63fbc8d99f5dc3dbd4baa971f4866a7d54 GIT binary patch literal 124 zcmZ?wbhEHbn1B;=-iNrq1O)P~1Ob--2?y@tT5af}|sA~CmZtCqtD+}K}IQqWD V;pMbR1vY9EU4%BXs0uPz0{~t?EHeNA literal 0 HcmV?d00001 diff --git a/htroot/env/grafics/NodeQualifiedIPv6.gif b/htroot/env/grafics/NodeQualifiedIPv6.gif new file mode 100644 index 0000000000000000000000000000000000000000..556914c1bd4f524c238c51f2ae084917b7f295e7 GIT binary patch literal 94 zcmZ?wbhEHb7UYze-t 0 ) { - log.info("received " - + newSeeds - + " new peer(s), know a total of " - + this.sb.peers.sizeConnected() - + " different peers"); - } + publishMySeed(false); } // use our own formatter to prevent concurrency locks with other processes @@ -202,7 +195,7 @@ public class Network protected class publishThread extends Thread { - int added; + private Map result; private final Seed seed; private final Semaphore sync; private final List syncList; @@ -219,14 +212,14 @@ public class Network this.syncList = syncList; this.seed = seed; - this.added = 0; + this.result = null; } @Override public final void run() { try { - this.added = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed); - if ( this.added < 0 ) { + this.result = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed); + if ( this.result == null ) { // no or wrong response, delete that address final String cause = "peer ping to peer resulted in error response (added < 0)"; log.info("publish: disconnected " @@ -324,7 +317,7 @@ public class Network } } - private int publishMySeed(final boolean force) { + private boolean publishMySeed(final boolean force) { try { // call this after the httpd was started up @@ -349,9 +342,7 @@ public class Network // getting a list of peers to contact if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN).equals(Seed.PEERTYPE_VIRGIN) ) { - if ( attempts > PING_INITIAL ) { - attempts = PING_INITIAL; - } + if (attempts > PING_INITIAL) attempts = PING_INITIAL; final Set ch = Switchboard.getSwitchboard().clusterhashes; seeds = DHTSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection // add also all peers from cluster if this is a public robinson cluster @@ -363,9 +354,7 @@ public class Network seed = seeds.get(hash); if (seed == null) { seed = this.sb.peers.get(hash); - if ( seed == null ) { - continue; - } + if (seed == null) continue; } seeds.put(hash, seed); } @@ -385,12 +374,8 @@ public class Network seeds = DHTSelection.seedsByAge(this.sb.peers, false, attempts); // best for seed list maintenance/cleaning } - if ( seeds == null || seeds.isEmpty() ) { - return 0; - } - if ( seeds.size() < attempts ) { - attempts = seeds.size(); - } + if (seeds == null || seeds.isEmpty()) return false; + if (seeds.size() < attempts) attempts = seeds.size(); // This will try to get Peers that are not currently in amIAccessibleDB final Iterator si = seeds.values().iterator(); @@ -408,14 +393,13 @@ public class Network log.severe("publishMySeed: problem with news encoding", e); } this.sb.peers.mySeed().setUnusedFlags(); - int newSeeds = -1; //if (seeds.length > 1) { // holding a reference to all started threads int contactedSeedCount = 0; final List syncList = Collections.synchronizedList(new LinkedList()); // memory for threads final Semaphore sync = new Semaphore(attempts); - // going through the peer list and starting a new publisher thread for each peer + // go through the peer list and starting a new publisher thread for each peer int i = 0; while ( si.hasNext() ) { seed = si.next(); @@ -426,9 +410,7 @@ public class Network i++; final String address = seed.getPublicAddress(seed.getIP()); - if ( log.isFine() ) { - log.fine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug - } + if ( log.isFine() ) log.fine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug final String seederror = seed.isProper(false); if ( (address == null) || (seederror != null) ) { // we don't like that address, delete it @@ -456,15 +438,6 @@ public class Network // getting a reference to the finished thread final publishThread t = (publishThread) syncList.remove(0); - - // getting the amount of new reported seeds - if ( t.added >= 0 ) { - if ( newSeeds == -1 ) { - newSeeds = t.added; - } else { - newSeeds += t.added; - } - } } int accessible = 0; @@ -486,19 +459,9 @@ public class Network } } } - if ( log.isFine() ) { - log - .fine("DBSize before -> after Cleanup: " - + dbSize - + " -> " - + amIAccessibleDB.size()); - } + if (log.isFine()) log.fine("DBSize before -> after Cleanup: " + dbSize + " -> " + amIAccessibleDB.size()); } - log.info("PeerPing: I am accessible for " - + accessible - + " peer(s), not accessible for " - + notaccessible - + " peer(s)."); + log.info("PeerPing: I am accessible for " + accessible + " peer(s), not accessible for " + notaccessible + " peer(s)."); if ( (accessible + notaccessible) > 0 ) { final String newPeerType; @@ -517,11 +480,7 @@ public class Network if ( this.sb.peers.mySeed().orVirgin().equals(newPeerType) ) { log.info("PeerPing: myType is " + this.sb.peers.mySeed().orVirgin()); } else { - log.info("PeerPing: changing myType from '" - + this.sb.peers.mySeed().orVirgin() - + "' to '" - + newPeerType - + "'"); + log.info("PeerPing: changing myType from '" + this.sb.peers.mySeed().orVirgin() + "' to '" + newPeerType + "'"); this.sb.peers.mySeed().put(Seed.PEERTYPE, newPeerType); } } else { @@ -534,20 +493,11 @@ public class Network this.sb.peers.saveMySeed(); // if we have an address, we do nothing - if ( this.sb.peers.mySeed().isProper(true) == null && !force ) { - return 0; - } - if ( newSeeds > 0 ) { - return newSeeds; - } + if (this.sb.peers.mySeed().isProper(true) == null) return true; - // still no success: ask own NAT or internet responder - //final boolean DI604use = switchboard.getConfig("DI604use", "false").equals("true"); - //final String DI604pw = switchboard.getConfig("DI604pw", ""); + // still no success final String ip = this.sb.getConfig("staticIP", ""); - //if (ip.equals("")) ip = natLib.retrieveIP(DI604use, DI604pw); - // yacyCore.log.logDebug("DEBUG: new IP=" + ip); if (Seed.isProperIP(ip)) { this.sb.peers.mySeed().setIP(ip); } @@ -556,7 +506,7 @@ public class Network } log.info("publish: no recipient found, our address is " + this.sb.peers.mySeed().getIPs()); this.sb.peers.saveMySeed(); - return 0; + return false; } catch (final InterruptedException e ) { try { log.info("publish: Interruption detected while publishing my seed."); @@ -606,12 +556,10 @@ public class Network log.info("publish: Shutdown off all remaining publishing thread finished."); } catch (final Exception ee ) { - log.warn( - "publish: Unexpected error while trying to shutdown all remaining publishing threads.", - e); + log.warn("publish: Unexpected error while trying to shutdown all remaining publishing threads.", e); } - return 0; + return false; } } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index e2a79fa6a..03f885fdf 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -167,7 +167,7 @@ public final class Protocol { * * @return the number of new seeds */ - public static int hello( + public static Map hello( final Seed mySeed, final PeerActions peerActions, final Seed otherSeed) { @@ -201,7 +201,7 @@ public final class Protocol { Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() + "' interrupted."); - return -1; + return null; } Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() + "', peer " + address + "; exception: " + e.getMessage()); // try again (go into loop) @@ -211,7 +211,7 @@ public final class Protocol { if (result == null || result.size() == 0) { Network.log.info("yacyClient.hello result error: " + ((result == null) ? "result null" : ("result=" + result.toString()))); - return -1; + return null; } Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() @@ -233,7 +233,7 @@ public final class Protocol { try { // patch the remote peer address to avoid that remote peers spoof the network with wrong addresses final int p = address.lastIndexOf(':'); - if ( p < 0 ) return -1; + if ( p < 0 ) return null; String h = address.substring(0, p); if (h.charAt(0) == '[') h = h.substring(1); if (h.charAt(h.length() - 1) == ']') h = h.substring(0, h.length() - 1); @@ -241,11 +241,11 @@ public final class Protocol { otherPeer = Seed.genRemoteSeed(seed, false, ie.getHostAddress()); if ( !otherPeer.hash.equals(otherSeed.hash) ) { Network.log.info("yacyClient.hello: consistency error: otherPeer.hash = " + otherPeer.hash + ", otherHash = " + otherSeed.hash); - return -1; // no success + return null; // no success } } catch (final IOException e ) { Network.log.info("yacyClient.hello: consistency error: other seed bad:" + e.getMessage() + ", seed=" + seed); - return -1; // no success + return null; // no success } } } @@ -313,7 +313,7 @@ public final class Protocol { + mytype + ", rejecting other peer."); } - return -1; + return null; } if ( mySeed.orVirgin().equals(Seed.PEERTYPE_VIRGIN) ) { mySeed.put(Seed.PEERTYPE, mytype); @@ -322,7 +322,7 @@ public final class Protocol { final String error = mySeed.isProper(true); if ( error != null ) { Network.log.warn("yacyClient.hello mySeed error - not proper: " + error); - return -1; + return null; } //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time @@ -347,7 +347,7 @@ public final class Protocol { if ( i == 1 ) { final int p = address.indexOf(':'); if ( p < 0 ) { - return -1; + return null; } InetAddress ia = Domains.dnsResolve(address.substring(0, p)); if (ia == null) continue; @@ -371,7 +371,7 @@ public final class Protocol { // update event tracker EventTracker.update(EventTracker.EClass.PEERPING, new ProfilingGraph.EventPing(mySeed.getName(), otherSeed.getName(), true, connectedAfter - connectedBefore), false); - return count; + return result; } public static Seed querySeed(final Seed target, final String seedHash) { From fe917deb2d7668d59496053ff08f90d78a2b888b Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 1 Oct 2014 03:47:57 +0200 Subject: [PATCH 03/17] when pinging other peers, be able to select the right IP option --- htroot/Network.java | 4 +-- source/net/yacy/peers/Network.java | 5 ++- source/net/yacy/peers/Protocol.java | 47 ++++++++++++----------------- 3 files changed, 25 insertions(+), 31 deletions(-) diff --git a/htroot/Network.java b/htroot/Network.java index 38c5599cd..0c4a87865 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -208,10 +208,10 @@ public class Network { final ConcurrentMap map = new ConcurrentHashMap(); map.put(Seed.IP, post.get("peerIP")); map.put(Seed.PORT, post.get("peerPort")); - Seed peer = new Seed(post.get("peerHash"), map); + Seed peer = post.get("peerHash") == null ? null : new Seed(post.get("peerHash"), map); sb.updateMySeed(); - final Map response = Protocol.hello(sb.peers.mySeed(), sb.peers.peerActions, peer); + final Map response = Protocol.hello(sb.peers.mySeed(), sb.peers.peerActions, peer.getPublicAddress(post.get("peerIP")), peer.hash); if (response == null) { prop.put("table_comment",1); diff --git a/source/net/yacy/peers/Network.java b/source/net/yacy/peers/Network.java index 39b13eb20..ba1e2184f 100644 --- a/source/net/yacy/peers/Network.java +++ b/source/net/yacy/peers/Network.java @@ -218,7 +218,10 @@ public class Network @Override public final void run() { try { - this.result = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed); + for (String ip: this.seed.getIPs()) { + this.result = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed.getPublicAddress(ip), this.seed.hash); + if (this.result != null) break; + } if ( this.result == null ) { // no or wrong response, delete that address final String cause = "peer ping to peer resulted in error response (added < 0)"; diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 03f885fdf..88aad5073 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -170,8 +170,8 @@ public final class Protocol { public static Map hello( final Seed mySeed, final PeerActions peerActions, - final Seed otherSeed) { - final String address = otherSeed.getPublicAddress(otherSeed.getIP()); + final String targetAddress, + final String targetHash) { Map result = null; final String salt = crypt.randomSalt(); @@ -190,8 +190,8 @@ public final class Protocol { final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 30000); content = httpClient.POSTbytes( - new MultiProtocolURL("http://" + address + "/yacy/hello.html"), - Seed.b64Hash2hexHash(otherSeed.hash) + ".yacyh", + new MultiProtocolURL("http://" + targetAddress + "/yacy/hello.html"), + Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, false, true); responseTime = System.currentTimeMillis() - start; @@ -203,7 +203,7 @@ public final class Protocol { + "' interrupted."); return null; } - Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() + "', peer " + address + "; exception: " + e.getMessage()); + Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() + "', peer " + targetAddress + "; exception: " + e.getMessage()); // try again (go into loop) result = null; } @@ -216,7 +216,7 @@ public final class Protocol { Network.log.info("yacyClient.hello thread '" + Thread.currentThread().getName() + "' contacted peer at " - + address + + targetAddress + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " @@ -226,21 +226,21 @@ public final class Protocol { // check consistency with expectation Seed otherPeer = null; String seed; - if ( (otherSeed.hash != null) && (otherSeed.hash.length() > 0) && ((seed = result.get("seed0")) != null) ) { + if ( (targetHash != null) && (targetHash.length() > 0) && ((seed = result.get("seed0")) != null) ) { if ( seed.length() > Seed.maxsize ) { Network.log.info("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + Seed.maxsize + ")"); } else { try { // patch the remote peer address to avoid that remote peers spoof the network with wrong addresses - final int p = address.lastIndexOf(':'); + final int p = targetAddress.lastIndexOf(':'); if ( p < 0 ) return null; - String h = address.substring(0, p); + String h = targetAddress.substring(0, p); if (h.charAt(0) == '[') h = h.substring(1); if (h.charAt(h.length() - 1) == ']') h = h.substring(0, h.length() - 1); InetAddress ie = Domains.dnsResolve(h); otherPeer = Seed.genRemoteSeed(seed, false, ie.getHostAddress()); - if ( !otherPeer.hash.equals(otherSeed.hash) ) { - Network.log.info("yacyClient.hello: consistency error: otherPeer.hash = " + otherPeer.hash + ", otherHash = " + otherSeed.hash); + if ( !otherPeer.hash.equals(targetHash) ) { + Network.log.info("yacyClient.hello: consistency error: otherPeer.hash = " + otherPeer.hash + ", otherHash = " + targetHash); return null; // no success } } catch (final IOException e ) { @@ -285,7 +285,7 @@ public final class Protocol { accessible.IWasAccessed = false; } accessible.lastUpdated = System.currentTimeMillis(); - Network.amIAccessibleDB.put(otherSeed.hash, accessible); + Network.amIAccessibleDB.put(targetHash, accessible); /* * If we were reported as junior we have to check if your port forwarding channel is broken @@ -329,7 +329,6 @@ public final class Protocol { // read the seeds that the peer returned and integrate them into own database int i = 0; - int count = 0; String seedStr; Seed s; final int connectedBefore = peerActions.sizeConnected(); @@ -337,39 +336,31 @@ public final class Protocol { // integrate new seed into own database // the first seed, "seed0" is the seed of the responding peer if ( seedStr.length() > Seed.maxsize ) { - Network.log.info("hello/client: rejected contacting seed; too large (" - + seedStr.length() - + " > " - + Seed.maxsize - + ")"); + Network.log.info("hello/client: rejected contacting seed; too large ("+ seedStr.length() + " > " + Seed.maxsize + ")"); } else { try { if ( i == 1 ) { - final int p = address.indexOf(':'); + final int p = targetAddress.lastIndexOf(':'); if ( p < 0 ) { return null; } - InetAddress ia = Domains.dnsResolve(address.substring(0, p)); + InetAddress ia = Domains.dnsResolve(targetAddress.substring(0, p)); if (ia == null) continue; - final String host = ia.getHostAddress(); + final String host = ia.getHostAddress(); // the actual address of the target as we had been successful when contacting them is patched here s = Seed.genRemoteSeed(seedStr, false, host); } else { s = Seed.genRemoteSeed(seedStr, false, null); } - if ( peerActions.peerArrival(s, (i == 1)) ) { - count++; - } + peerActions.peerArrival(s, (i == 1)); } catch (final IOException e ) { - Network.log.info("hello/client: rejected contacting seed; bad (" - + e.getMessage() - + ")"); + Network.log.info("hello/client: rejected contacting seed; bad (" + e.getMessage() + ")"); } } } final int connectedAfter = peerActions.sizeConnected(); // update event tracker - EventTracker.update(EventTracker.EClass.PEERPING, new ProfilingGraph.EventPing(mySeed.getName(), otherSeed.getName(), true, connectedAfter - connectedBefore), false); + EventTracker.update(EventTracker.EClass.PEERPING, new ProfilingGraph.EventPing(mySeed.getName(), targetHash, true, connectedAfter - connectedBefore), false); return result; } From fb1fcc2b03617cd3c8ddb17b5b5e59bacf7d3692 Mon Sep 17 00:00:00 2001 From: reger Date: Wed, 1 Oct 2014 04:35:34 +0200 Subject: [PATCH 04/17] handle noarchive tag, skip writing page to cache http://mantis.tokeek.de/view.php?id=44 --- source/net/yacy/crawler/data/Cache.java | 1 + .../schema/CollectionConfiguration.java | 10 ++++---- .../yacy/search/schema/CollectionSchema.java | 24 ++++++++++++------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java index 683b3b17d..77c2131f4 100644 --- a/source/net/yacy/crawler/data/Cache.java +++ b/source/net/yacy/crawler/data/Cache.java @@ -201,6 +201,7 @@ public final class Cache { public static void store(final DigestURL url, final ResponseHeader responseHeader, final byte[] file) throws IOException { if (maxCacheSize == 0) return; + if (responseHeader.getXRobotsTag().contains("noarchive")) return; // don't cache, see http://noarchive.net/ if (responseHeader == null) throw new IOException("Cache.store of url " + url.toNormalform(false) + " not possible: responseHeader == null"); if (file == null) throw new IOException("Cache.store of url " + url.toNormalform(false) + " not possible: file == null"); log.info("storing content of url " + url.toNormalform(false) + ", " + file.length + " bytes"); diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 657b419ee..4b5e10b42 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -557,6 +557,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // bit 2: "follow" contained in html header meta // bit 3: "noindex" contained in html header meta // bit 4: "nofollow" contained in html header meta + // bit 5: "noarchive" contained in html header meta // bit 8: "all" contained in http header X-Robots-Tag // bit 9: "noindex" contained in http header X-Robots-Tag // bit 10: "nofollow" contained in http header X-Robots-Tag @@ -576,6 +577,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri if (robots_meta.indexOf("follow",0) == 0 || robots_meta.indexOf(" follow",0) >= 0 || robots_meta.indexOf(",follow",0) >= 0 ) b += 4; // set bit 2 if (robots_meta.indexOf("noindex",0) >= 0) b += 8; // set bit 3 if (robots_meta.indexOf("nofollow",0) >= 0) b += 16; // set bit 4 + if (robots_meta.indexOf("noarchive",0) >= 0) b += 32; // set bit 5 } String x_robots_tag = responseHeader == null ? "" : responseHeader.getXRobotsTag(); if (!x_robots_tag.isEmpty()) { @@ -1494,10 +1496,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri try { String doccountquery = CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + hostid + "\" AND " + - "-" + CollectionSchema.robots_i.getSolrFieldName() + ":8 AND " + // bit 3 - "-" + CollectionSchema.robots_i.getSolrFieldName() + ":24 AND " + // bit 3 + 4 - "-" + CollectionSchema.robots_i.getSolrFieldName() + ":512 AND " + // bit 9 - "-" + CollectionSchema.robots_i.getSolrFieldName() + ":1536 AND " + // bit 9 + 10 + "-" + CollectionSchema.robots_i.getSolrFieldName() + ":8 AND " + // bit 3 (noindex) + "-" + CollectionSchema.robots_i.getSolrFieldName() + ":24 AND " + // bit 3 + 4 (noindex + nofollow) + "-" + CollectionSchema.robots_i.getSolrFieldName() + ":512 AND " + // bit 9 (noindex) + "-" + CollectionSchema.robots_i.getSolrFieldName() + ":1536 AND " + // bit 9 + 10 (noindex + nofollow) "((-" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":" + AbstractSolrConnector.CATCHALL_TERM + ") OR (" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":true)) AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " + "-" + CollectionSchema.id.getSolrFieldName() + ":\"" + urlhash + "\" AND " + diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index 0db00542d..8ce136d27 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -111,15 +111,21 @@ public enum CollectionSchema implements SchemaDeclaration { scripts_sxt(SolrType.string, true, true, true, false, false, "normalized urls within a scripts tag"), scriptscount_i(SolrType.num_integer, true, true, false, false, false, "number of entries in scripts_sxt"), // encoded as binary value into an integer: - // bit 0: "all" contained in html header meta - // bit 1: "index" contained in html header meta - // bit 2: "noindex" contained in html header meta - // bit 3: "nofollow" contained in html header meta - // bit 8: "noarchive" contained in http header properties - // bit 9: "nosnippet" contained in http header properties - // bit 10: "noindex" contained in http header properties - // bit 11: "nofollow" contained in http header properties - // bit 12: "unavailable_after" contained in http header properties + // bit 0: "all" contained in html header meta + // bit 1: "index" contained in html header meta + // bit 2: "follow" contained in html header meta + // bit 3: "noindex" contained in html header meta + // bit 4: "nofollow" contained in html header meta + // bit 5: "noarchive" contained in html header meta + // bit 8: "all" contained in http header X-Robots-Tag + // bit 9: "noindex" contained in http header X-Robots-Tag + // bit 10: "nofollow" contained in http header X-Robots-Tag + // bit 11: "noarchive" contained in http header X-Robots-Tag + // bit 12: "nosnippet" contained in http header X-Robots-Tag + // bit 13: "noodp" contained in http header X-Robots-Tag + // bit 14: "notranslate" contained in http header X-Robots-Tag + // bit 15: "noimageindex" contained in http header X-Robots-Tag + // bit 16: "unavailable_after" contained in http header X-Robots-Tag robots_i(SolrType.num_integer, true, true, false, false, false, "content of tag and the \"X-Robots-Tag\" HTTP property"), metagenerator_t(SolrType.text_general, true, true, false, false, false, "content of tag"), inboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "internal links, only the protocol"), From 247e626083ffe58d2b82ecf24372960134f5dfc7 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 1 Oct 2014 10:21:03 +0200 Subject: [PATCH 05/17] IPv6 host parsing bugfixes --- htroot/IndexCreateQueues_p.java | 4 +- htroot/Settings_p.java | 2 +- .../cora/document/id/MultiProtocolURL.java | 2 +- source/net/yacy/cora/protocol/Domains.java | 66 +++++++++++++++++++ .../yacy/cora/protocol/HeaderFramework.java | 2 +- .../net/yacy/http/AbstractRemoteHandler.java | 4 +- source/net/yacy/http/YacyDomainHandler.java | 12 +--- source/net/yacy/peers/Protocol.java | 17 ++--- .../yacy/server/http/HTTPDProxyHandler.java | 24 +++---- source/net/yacy/server/http/HTTPDemon.java | 10 +-- 10 files changed, 89 insertions(+), 54 deletions(-) diff --git a/htroot/IndexCreateQueues_p.java b/htroot/IndexCreateQueues_p.java index 3adf3fa04..393889daf 100644 --- a/htroot/IndexCreateQueues_p.java +++ b/htroot/IndexCreateQueues_p.java @@ -14,6 +14,7 @@ import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.CrawlSwitchboard; @@ -141,8 +142,7 @@ public class IndexCreateQueues_p { int hc = 0; for (Map.Entry host: hosts.entrySet()) { String hostnameport = host.getKey(); - int p = hostnameport.lastIndexOf(':'); - String hostname = p < 0 ? hostnameport : hostnameport.substring(0, p); + String hostname = Domains.stripToHostName(hostnameport); prop.putHTML("crawler_host_" + hc + "_hostnameport", hostnameport); prop.putHTML("crawler_host_" + hc + "_hostname", hostname); prop.put("crawler_host_" + hc + "_embed", embed ? 1 : 0); diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index 6e1adb458..3b8eab40c 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -108,7 +108,7 @@ public final class Settings_p { } else { prop.put("use_proxyAccounts", "1"); //checked /*s = env.getConfig("proxyAccount", "proxy:void"); - pos = s.indexOf(":"); + pos = s.indexOf(':'); if (pos < 0) { prop.put("proxyuser","proxy"); } else { diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 9caa078c2..d1077d0ec 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -155,7 +155,7 @@ public class MultiProtocolURL implements Serializable, Comparable 0) target = target.substring(p + 3); + p = target.indexOf('/'); + if (p > 0) target = target.substring(0, p); + + // IPv4 / host heuristics + p = target.lastIndexOf(':'); + if ( p < 0 ) { + // may be IPv4 or IPv6, we chop off brackets if exist + if (target.charAt(0) == '[') target = target.substring(1); + if (target.charAt(target.length() - 1) == ']') target = target.substring(0, target.length() - 1); + return target; + } + + // the ':' at pos p may be either a port divider or a part of an IPv6 address + if (target.charAt(p - 1) == ']') { + target = target.substring(1, p - 1); + return target; + } + + // the ':' must be a port divider + target = target.substring(0, p); + return target; + } + + public static int stripToPort(String target) { + int port = 80; // default port + + // normalize + if (target == null || target.isEmpty()) return port; + target = target.toLowerCase().trim(); // we can lowercase this because host names are case-insensitive + + // extract the address (host:port) part (applies if this is an url) + int p = target.indexOf("://"); + if (p > 0) { + String protocol = target.substring(0, p); + target = target.substring(p + 3); + if ("https".equals(protocol)) port = 443; + if ("ftp".equals(protocol)) port = 21; + if ("smb".equals(protocol)) port = 445; + } + p = target.indexOf('/'); + if (p > 0) target = target.substring(0, p); + + // IPv4 / host heuristics + p = target.lastIndexOf(':'); + if ( p < 0 ) return port; + + // the ':' must be a port divider + port = Integer.parseInt(target.substring(p + 1)); + return port; + } + /** * resolve a host address using a local DNS cache and a DNS lookup if necessary * @param clienthost * @return the hosts InetAddress or null if the address cannot be resolved */ public static InetAddress dnsResolve(final String host0) { + // consider to call stripToHostName() before calling this if (host0 == null || host0.isEmpty()) return null; final String host = host0.toLowerCase().trim(); diff --git a/source/net/yacy/cora/protocol/HeaderFramework.java b/source/net/yacy/cora/protocol/HeaderFramework.java index 969ccaff2..e7886b444 100644 --- a/source/net/yacy/cora/protocol/HeaderFramework.java +++ b/source/net/yacy/cora/protocol/HeaderFramework.java @@ -574,7 +574,7 @@ public class HeaderFramework extends TreeMap implements Map= 0) { - newHost = hostPort.substring(0, posPort); - newPort = Integer.parseInt(hostPort.substring(posPort + 1)); - } else { - newHost = hostPort; - newPort = 80; - } + int newPort = Domains.stripToPort(hostPort); + String newHost = Domains.stripToHostName(hostPort); if (alternativeResolvers.myIPs().contains(newHost)) return; if (Domains.isLocal(newHost, null)) return; RequestDispatcher dispatcher = request.getRequestDispatcher(path + target); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 88aad5073..b465e5c95 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -232,12 +232,8 @@ public final class Protocol { } else { try { // patch the remote peer address to avoid that remote peers spoof the network with wrong addresses - final int p = targetAddress.lastIndexOf(':'); - if ( p < 0 ) return null; - String h = targetAddress.substring(0, p); - if (h.charAt(0) == '[') h = h.substring(1); - if (h.charAt(h.length() - 1) == ']') h = h.substring(0, h.length() - 1); - InetAddress ie = Domains.dnsResolve(h); + String host = Domains.stripToHostName(targetAddress); + InetAddress ie = Domains.dnsResolve(host); otherPeer = Seed.genRemoteSeed(seed, false, ie.getHostAddress()); if ( !otherPeer.hash.equals(targetHash) ) { Network.log.info("yacyClient.hello: consistency error: otherPeer.hash = " + otherPeer.hash + ", otherHash = " + targetHash); @@ -340,13 +336,10 @@ public final class Protocol { } else { try { if ( i == 1 ) { - final int p = targetAddress.lastIndexOf(':'); - if ( p < 0 ) { - return null; - } - InetAddress ia = Domains.dnsResolve(targetAddress.substring(0, p)); + String host = Domains.stripToHostName(targetAddress); + InetAddress ia = Domains.dnsResolve(host); if (ia == null) continue; - final String host = ia.getHostAddress(); // the actual address of the target as we had been successful when contacting them is patched here + host = ia.getHostAddress(); // the actual address of the target as we had been successful when contacting them is patched here s = Seed.genRemoteSeed(seedStr, false, host); } else { s = Seed.genRemoteSeed(seedStr, false, null); diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index 65e5760e6..ff8dcdfa8 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -423,13 +423,8 @@ public final class HTTPDProxyHandler { final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); // the ip from the connecting peer - int port, pos; - if ((pos = host.indexOf(':')) < 0) { - port = 80; - } else { - port = Integer.parseInt(host.substring(pos + 1)); - host = host.substring(0, pos); - } + int port = Domains.stripToPort(host); + host = Domains.stripToHostName(host); // resolve yacy and yacyh domains String yAddress = resolveYacyDomains(host); @@ -438,10 +433,10 @@ public final class HTTPDProxyHandler { final String remotePath = (args == null) ? path : (path + "?" + args); // with leading '/' // remove yacy-subdomain-path, when accessing /env - if ( (yAddress != null) + if ((yAddress != null) && (remotePath.startsWith("/env")) - && ((pos = yAddress.indexOf('/')) != -1) - ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); + && (yAddress.indexOf('/') != -1) + ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); modifyProxyHeaders(requestHeader, httpVer); @@ -1050,11 +1045,8 @@ public final class HTTPDProxyHandler { String orgHostName = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); if (orgHostName == null) orgHostName = "unknown"; orgHostName = orgHostName.toLowerCase(); - int pos = orgHostName.indexOf(':'); - if (pos != -1) { - orgHostPort = orgHostName.substring(pos+1); - orgHostName = orgHostName.substring(0,pos); - } + orgHostPort = Integer.toString(Domains.stripToPort(orgHostName)); + orgHostName = Domains.stripToHostName(orgHostName); String orgHostPath = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); if (orgHostPath == null) orgHostPath = ""; String orgHostArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); if (orgHostArgs == null) orgHostArgs = ""; if (orgHostArgs.length() > 0) orgHostArgs = "?" + orgHostArgs; @@ -1078,7 +1070,7 @@ public final class HTTPDProxyHandler { if (addr != null) if (addr != null) testHostNames.add(testHostName); } - pos = orgHostName.lastIndexOf('.'); + int pos = orgHostName.lastIndexOf('.'); if (pos != -1) { final Iterator iter = topLevelDomains.iterator(); while (iter.hasNext()) { diff --git a/source/net/yacy/server/http/HTTPDemon.java b/source/net/yacy/server/http/HTTPDemon.java index d2b606aa8..51d51b9cb 100644 --- a/source/net/yacy/server/http/HTTPDemon.java +++ b/source/net/yacy/server/http/HTTPDemon.java @@ -144,14 +144,8 @@ public final class HTTPDemon { final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); final String method = (String) conProp.get(HeaderFramework.CONNECTION_PROP_METHOD); - final int port; - final int pos = host.indexOf(':'); - if (pos != -1) { - port = NumberTools.parseIntDecSubstring(host, pos + 1); - host = host.substring(0, pos); - } else { - port = 80; - } + final int port = Domains.stripToPort(host); + host = Domains.stripToHostName(host); String urlString; try { From e4ccca9497458c7c476b23b73333cb87700919f5 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 1 Oct 2014 12:22:55 +0200 Subject: [PATCH 06/17] fix for xss bugs found by CTF365 --- htroot/yacyinteractive.java | 4 ++-- htroot/yacysearch.java | 15 +++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/htroot/yacyinteractive.java b/htroot/yacyinteractive.java index 595c2ac23..9a91763b9 100644 --- a/htroot/yacyinteractive.java +++ b/htroot/yacyinteractive.java @@ -53,8 +53,8 @@ public class yacyinteractive { final String maximumRecords = (post == null) ? sb.getConfig(SwitchboardConstants.SEARCH_ITEMS, "10") : post.get("maximumRecords", ""); final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1"); prop.putHTML("query", query); - prop.put("startRecord", startRecord); - prop.put("maximumRecords", maximumRecords); + prop.putHTML("startRecord", startRecord); + prop.putHTML("maximumRecords", maximumRecords); prop.putHTML("querys", query.replaceAll(" ", "+")); prop.put("serverlist", query.isEmpty() ? 1 : 0); prop.put("focus", focus ? 1 : 0); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 5107927fd..c856421ae 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -28,7 +28,6 @@ // if the shell's current path is HTROOT import java.io.IOException; -import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; import java.util.ConcurrentModificationException; @@ -868,13 +867,9 @@ public class yacysearch { prop.putHTML("prefermaskfilter", prefermask); prop.put("indexof", (indexof) ? "on" : "off"); prop.put("constraint", (constraint == null) ? "" : constraint.exportB64()); - prop.put("search.verify", snippetFetchStrategy == null - ? sb.getConfig("search.verify", "iffresh") - : snippetFetchStrategy.toName()); - prop.put( - "search.navigation", - (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all")); - prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text"))); + prop.put("search.verify", snippetFetchStrategy == null ? sb.getConfig("search.verify", "iffresh") : snippetFetchStrategy.toName()); + prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all")); + prop.putHTML("contentdom", (post == null ? "text" : post.get("contentdom", "text"))); // for RSS: don't HTML encode some elements prop.putXML("rss_query", originalquerystring); @@ -883,8 +878,8 @@ public class yacysearch { sb.localSearchLastAccess = System.currentTimeMillis(); // hostname and port (assume locahost if nothing helps) - final InetAddress hostIP = Domains.myPublicLocalIP(); - prop.put("myhost", hostIP != null ? hostIP.getHostAddress() : Domains.LOCALHOST); + final String hostIP = sb.peers.mySeed().getIP(); + prop.put("myhost", hostIP != null ? hostIP : Domains.LOCALHOST); prop.put("myport", sb.getConfig("port", "8090")); // return rewrite properties From 528f583d72db29b4c25869ce84513c0863d6e5d3 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 1 Oct 2014 15:32:10 +0200 Subject: [PATCH 07/17] ipv6 fixes --- htroot/Network.html | 2 +- htroot/Network.java | 18 +++--- .../env/templates/submenuComputation.template | 2 +- source/net/yacy/cora/protocol/Domains.java | 62 +++++++++++++------ .../net/yacy/http/AbstractRemoteHandler.java | 6 +- source/net/yacy/peers/Protocol.java | 10 +-- 6 files changed, 59 insertions(+), 41 deletions(-) diff --git a/htroot/Network.html b/htroot/Network.html index cc745c8d2..8103772e4 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -132,7 +132,7 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window. #[hash]# #[shortname]##(ssl)#::https supported#(/ssl)# - #(type)##(direct)#Junior passive::Junior direct::Junior offline#(/direct)#::#(direct)#senior passive::Senior direct::Senior offline#(/direct)#::#(direct)#Principal passive::Principal active::Principal offline#(/direct)##(/type)##(acceptcrawl)#no crawl::crawl possible::crawl possible#(/acceptcrawl)##(dhtreceive)#no DHT receive::DHT receive enabled::DHT receive enabled#(/dhtreceive)##{ips}##{/ips}# + #(type)##(direct)#Junior passive::Junior direct::Junior offline#(/direct)#::#(direct)#senior passive::Senior direct::Senior offline#(/direct)#::#(direct)#Principal passive::Principal active::Principal offline#(/direct)##(/type)##(acceptcrawl)#no crawl::crawl possible::crawl possible#(/acceptcrawl)##(dhtreceive)#no DHT receive::DHT receive enabled::DHT receive enabled#(/dhtreceive)##{ips}##{/ips}# #[version]# #[age]# #[seeds]# diff --git a/htroot/Network.java b/htroot/Network.java index 0c4a87865..ab40fa801 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -206,26 +206,30 @@ public class Network { } final ConcurrentMap map = new ConcurrentHashMap(); - map.put(Seed.IP, post.get("peerIP")); - map.put(Seed.PORT, post.get("peerPort")); + String challengeIP = post.get("peerIP"); + String challengePort = post.get("peerPort"); + map.put(Seed.IP, challengeIP); + map.put(Seed.PORT, challengePort); Seed peer = post.get("peerHash") == null ? null : new Seed(post.get("peerHash"), map); - + String challengeAddress = peer.getPublicAddress(challengeIP); sb.updateMySeed(); - final Map response = Protocol.hello(sb.peers.mySeed(), sb.peers.peerActions, peer.getPublicAddress(post.get("peerIP")), peer.hash); + Seed mySeed = sb.peers.mySeed(); + final Map response = Protocol.hello(mySeed, sb.peers.peerActions, challengeAddress, peer.hash); if (response == null) { + prop.put("table_comment",1); - prop.putHTML("table_comment_status","publish: no response from peer '" + peer.getName() + "/" + post.get("peerHash") + "' from " + peer.getIPs()); + prop.put("table_comment_status", "publish: no response from peer '" + peer.getName() + "/" + post.get("peerHash") + "' from " + challengeAddress + ""); } else { String yourtype = response.get("yourtype"); String yourip = response.get("yourip"); peer = sb.peers.getConnected(peer.hash); if (peer == null) { prop.put("table_comment",1); - prop.putHTML("table_comment_status","publish: disconnected peer 'UNKNOWN/" + post.get("peerHash") + "' from UNKNOWN, yourtype = " + yourtype + ", yourip = " + yourip); + prop.put("table_comment_status","publish: disconnected peer 'UNKNOWN/" + post.get("peerHash") + "' from " + challengeAddress + ", yourtype = " + yourtype + ", yourip = " + yourip); } else { prop.put("table_comment",2); - prop.putHTML("table_comment_status","publish: handshaked " + peer.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + peer.getName() + "' at " + peer.getIPs() +", yourtype = " + yourtype + ", yourip = " + yourip); + prop.put("table_comment_status","publish: handshaked " + peer.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + peer.getName() + "' at " + challengeAddress + ", yourtype = " + yourtype + ", yourip = " + yourip); prop.putHTML("table_comment_details",peer.toString()); } } diff --git a/htroot/env/templates/submenuComputation.template b/htroot/env/templates/submenuComputation.template index 544271965..b6062bcf8 100644 --- a/htroot/env/templates/submenuComputation.template +++ b/htroot/env/templates/submenuComputation.template @@ -11,8 +11,8 @@