From 66d0b5046a02e4361f9380c0b633ecf59035e4ab Mon Sep 17 00:00:00 2001 From: reger Date: Thu, 26 Mar 2015 00:21:31 +0100 Subject: [PATCH 1/7] fix NPE on viewfile of url not in index --- htroot/ViewFile.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 00ae68aaf..f6fceefc5 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -376,8 +376,8 @@ public class ViewFile { prop.put("showSnippet_teasertext", desc); prop.put("showSnippet", 1); } - // update index with parsed resouce if index entry is older - if (urlEntry.loaddate().before(response.lastModified())) { + // update index with parsed resouce if index entry is older or missing + if (urlEntry == null || urlEntry.loaddate().before(response.lastModified())) { Switchboard.getSwitchboard().toIndexer(response); } if (document != null) document.close(); From 839b962c2092c65454bacbdadd0b2b3db2a83d1f Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 28 Mar 2015 03:05:21 +0100 Subject: [PATCH 2/7] correct percent encoding for '%' char --- source/net/yacy/cora/document/id/MultiProtocolURL.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 782ce93ce..dd9a252f2 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -599,11 +599,12 @@ public class MultiProtocolURL implements Serializable, Comparable= '0' && s.charAt(i + 1) <= '9' && s.charAt(i + 2) >= '0' && s.charAt(i + 2) <= '9') { + // TODO: actually 0..9 A..F a..f is allowed (or any of hex[] sequence) sbuf.append((char)ch); // lets consider this is used for encoding, leave it that way } else { - sbuf.append("%23"); // RFC 1738 2.2 unsafe char shall be encoded + sbuf.append("%25"); // '%' RFC 1738 2.2 unsafe char shall be encoded } } else if (ch == '&') { if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) { From 96292cf3eb6473c6b73df410b9ca3ce81ef579ca Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 28 Mar 2015 21:12:00 +0100 Subject: [PATCH 3/7] shorten exception loggin on not available connection in Load_RSS_p servlet --- htroot/Load_RSS_p.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java index 5f04ca734..3cb2a06fc 100644 --- a/htroot/Load_RSS_p.java +++ b/htroot/Load_RSS_p.java @@ -262,7 +262,7 @@ public class Load_RSS_p { try { url = post.containsKey("url") ? new DigestURL(post.get("url", "")) : null; } catch (final MalformedURLException e) { - ConcurrentLog.warn("Load_RSS_p", "url not well-formed: '" + post.get("url", "") + "'"); + ConcurrentLog.warn("Load_RSS", "url not well-formed: '" + post.get("url", "") + "'"); } ClientIdentification.Agent agent = post == null ? ClientIdentification.yacyInternetCrawlerAgent : ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName)); @@ -275,7 +275,8 @@ public class Load_RSS_p { final byte[] resource = response == null ? null : response.getContent(); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); } catch (final IOException e) { - ConcurrentLog.logException(e); + ConcurrentLog.warn("Load_RSS", e.getMessage()); + return prop; // if no response nothing to process further } // index all selected items: description only From 2f84b04fa96c70bcc253269a64785fcd641202dc Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 29 Mar 2015 05:48:54 +0200 Subject: [PATCH 4/7] add err msg on failure during Load_rss --- htroot/Load_RSS_p.html | 4 ++-- htroot/Load_RSS_p.java | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/htroot/Load_RSS_p.html b/htroot/Load_RSS_p.html index db78b19bf..79249373b 100644 --- a/htroot/Load_RSS_p.html +++ b/htroot/Load_RSS_p.html @@ -59,11 +59,11 @@ #(/showload)# - + #(showerrmsg)#::
#[msgtxt]#
#(/showerrmsg)# - + #(showscheduledfeeds)#::
List of Scheduled RSS Feed Load Targets diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java index 3cb2a06fc..18a209da5 100644 --- a/htroot/Load_RSS_p.java +++ b/htroot/Load_RSS_p.java @@ -73,7 +73,8 @@ public class Load_RSS_p { prop.put("shownewfeeds", 0); prop.put("showscheduledfeeds", 0); prop.put("url", ""); - + prop.put("showerrmsg", 0); + if (post != null && post.containsKey("removeSelectedFeedsNewList")) { for (final Map.Entry entry: post.entrySet()) { if (entry.getValue().startsWith("mark_")) try { @@ -276,6 +277,8 @@ public class Load_RSS_p { rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); } catch (final IOException e) { ConcurrentLog.warn("Load_RSS", e.getMessage()); + prop.put("showerrmsg", 1); + prop.put("showerrmsg_msgtxt", "no valid response from given url"); return prop; // if no response nothing to process further } From c1dcc8c4566c36c7eebc4e14e8d55c832400f53b Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 29 Mar 2015 07:12:23 +0200 Subject: [PATCH 5/7] fix display and limit of max server connections after startup (on restart value returned to default=50) This has no effect on Jetty but the limit is still respected. --- source/net/yacy/yacy.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 5370f0499..b823366c3 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -63,6 +63,7 @@ import com.google.common.io.Files; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.order.Digest; +import net.yacy.cora.protocol.ConnectionInfo; import net.yacy.crawler.retrieval.Response; import net.yacy.server.serverSwitch; @@ -299,6 +300,9 @@ public final class yacy { httpServer = new Jetty9HttpServerImpl(port); httpServer.startupServer(); sb.setHttpServer(httpServer); + // TODO: this has no effect on Jetty (but needed to reflect configured value and limit is still used) + ConnectionInfo.setServerMaxcount(sb.getConfigInt("connectionsMax", ConnectionInfo.getMaxcount())); + ConcurrentLog.info("STARTUP",httpServer.getVersion()); // open the browser window From b1ec0644e5bd4bc8d5438c5fb1dab3adb53a95e2 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 31 Mar 2015 02:20:13 +0200 Subject: [PATCH 6/7] fix NPE in location search on missing/empty PubDate in underlaying rss data --- htroot/yacysearch_location.java | 2 +- .../yacy/cora/document/feed/RSSMessage.java | 22 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index 0f1f7212b..d1250dc9e 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -113,7 +113,7 @@ public class yacysearch_location { prop.put("kml_placemark_" + placemarkCounter + "_copyright", message.getCopyright()); prop.put("kml_placemark_" + placemarkCounter + "_subject", message.getSubject()); prop.put("kml_placemark_" + placemarkCounter + "_description", message.getDescriptions().size() > 0 ? message.getDescriptions().get(0) : ""); - prop.put("kml_placemark_" + placemarkCounter + "_date", message.getPubDate()); + prop.put("kml_placemark_" + placemarkCounter + "_date", (message.getPubDate() != null) ? message.getPubDate() : new Date()); prop.putXML("kml_placemark_" + placemarkCounter + "_url", message.getLink()); prop.put("kml_placemark_" + placemarkCounter + "_pointname", message.getTitle()); prop.put("kml_placemark_" + placemarkCounter + "_lon", lo); diff --git a/source/net/yacy/cora/document/feed/RSSMessage.java b/source/net/yacy/cora/document/feed/RSSMessage.java index fac200167..aea58547e 100644 --- a/source/net/yacy/cora/document/feed/RSSMessage.java +++ b/source/net/yacy/cora/document/feed/RSSMessage.java @@ -212,20 +212,26 @@ public class RSSMessage implements Hit, Comparable, Comparator Date: Wed, 1 Apr 2015 01:57:56 +0200 Subject: [PATCH 7/7] make location facet return results for location nav facet of field coordinate_p does not return results, now using coordinate_p_0_coordinate as alternative to get facet counts. As the actual facet value is not used this should not harm any analysis (even if facet is a incomplete location). If facet value is used in future likely *_geohash field could be introduced (for facet and other ... as transport value) --- source/net/yacy/search/query/QueryParams.java | 6 ++++-- source/net/yacy/search/query/SearchEvent.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 719376a72..2f50b1307 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -93,7 +93,7 @@ public final class QueryParams { private static final Map defaultfacetfields = new HashMap(); static { // the key shall match with configuration property search.navigation - defaultfacetfields.put("location", CollectionSchema.coordinate_p); + defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used defaultfacetfields.put("hosts", CollectionSchema.host_s); defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s); defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s); @@ -249,7 +249,9 @@ public final class QueryParams { for (String navkey: search_navigation) { CollectionSchema f = defaultfacetfields.get(navkey); // handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield)) - if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt"))) this.facetfields.add(f.getSolrFieldName()); + // dto. for coordinate_p_0_coordinate is not enabled but used for location facet (because coordinate_p not valid for facet field) + if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt") || f.name().equals("coordinate_p_0_coordinate") )) + this.facetfields.add(f.getSolrFieldName()); } if (LibraryProvider.autotagging != null) for (Tagging v: LibraryProvider.autotagging.getVocabularies()) { if (v.isFacet()) { diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index b560da446..0232c252a 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -795,7 +795,7 @@ public final class SearchEvent { // collect navigation information ReversibleScoreMap fcts; if (this.locationNavigator != null) { - fcts = facets.get(CollectionSchema.coordinate_p.getSolrFieldName()); + fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName()); if (fcts != null) { for (String coordinate: fcts) { int hc = fcts.get(coordinate);