diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index a58052e13..ae4e97ed2 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -298,11 +298,14 @@ italic_txt #iframesscount_i ## the protocol of the url -#url_protocol_s +url_protocol_s ## all path elements in the url url_paths_sxt +## the file name extension +url_file_ext_s + ## number of key-value pairs in search part of the url #url_parameter_i diff --git a/source/net/yacy/cora/services/federated/solr/JsonResponseWriter.java b/source/net/yacy/cora/services/federated/solr/JsonResponseWriter.java index 9bdbb9d09..d8df477fc 100644 --- a/source/net/yacy/cora/services/federated/solr/JsonResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/JsonResponseWriter.java @@ -141,6 +141,10 @@ public class JsonResponseWriter implements QueryResponseWriter { path.append('/').append(value.stringValue()); continue; } + if (YaCySchema.url_file_ext_s.name().equals(fieldName)) { + solitaireTag(writer, "ext", value.stringValue()); + continue; + } if (YaCySchema.last_modified.name().equals(fieldName)) { Date d = new Date(Long.parseLong(value.stringValue())); solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d)); diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 395abc630..2a31c4e17 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -286,9 +286,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } // path elements of link - if (allAttr || contains(YaCySchema.url_paths_sxt)) { - add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); - } + if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); + if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension()); if (allAttr || contains(YaCySchema.imagescount_i)) add(doc, YaCySchema.imagescount_i, md.limage()); if (allAttr || contains(YaCySchema.inboundlinkscount_i)) add(doc, YaCySchema.inboundlinkscount_i, md.llocal()); @@ -416,9 +415,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } // path elements of link - if (allAttr || contains(YaCySchema.url_paths_sxt)) { - add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); - } + if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); + if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension()); // get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme Set inboundLinks = yacydoc.inboundLinks(); @@ -860,13 +858,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash())); add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false)); final InetAddress address = digestURI.getInetAddress(); - if (address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress()); - if (digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost()); + if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress()); + if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost()); // path elements of link - add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths()); - add(solrdoc, YaCySchema.failreason_t, failReason); - add(solrdoc, YaCySchema.httpstatus_i, httpstatus); + if (contains(YaCySchema.url_paths_sxt)) add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths()); + if (contains(YaCySchema.url_file_ext_s)) add(solrdoc, YaCySchema.url_file_ext_s, digestURI.getFileExtension()); + + // fail reason and status + if (contains(YaCySchema.failreason_t)) add(solrdoc, YaCySchema.failreason_t, failReason); + if (contains(YaCySchema.httpstatus_i)) add(solrdoc, YaCySchema.httpstatus_i, httpstatus); return solrdoc; } diff --git a/source/net/yacy/search/index/YaCySchema.java b/source/net/yacy/search/index/YaCySchema.java index 8b76cebc6..7baf8994c 100644 --- a/source/net/yacy/search/index/YaCySchema.java +++ b/source/net/yacy/search/index/YaCySchema.java @@ -141,6 +141,7 @@ public enum YaCySchema implements Schema { url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"), url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"), + url_file_ext_s(SolrType.string, true, true, false, "the file name extension"), url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"), url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"), url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"),