From b1cfbc4a04799aae8061ffe8909076936c4d1667 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 13 Oct 2014 23:51:19 +0200 Subject: [PATCH] added new solr field url_paths_count_i which can be used to enhance the index browser and maybe also for ranking; possibly also for SEO-with-YaCy applications. --- defaults/solr.collection.schema | 3 +++ source/net/yacy/search/schema/CollectionConfiguration.java | 6 +++++- source/net/yacy/search/schema/CollectionSchema.java | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/defaults/solr.collection.schema b/defaults/solr.collection.schema index 5439ddee2..a78f61542 100644 --- a/defaults/solr.collection.schema +++ b/defaults/solr.collection.schema @@ -377,6 +377,9 @@ url_file_name_s ## the file name extension url_file_ext_s +## number of all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name +url_paths_count_i + ## all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name url_paths_sxt diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index f7f260f2a..7205229f6 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -234,7 +234,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri if (allAttr || contains(CollectionSchema.url_chars_i)) add(doc, CollectionSchema.url_chars_i, us.length()); if (allAttr || contains(CollectionSchema.url_protocol_s)) add(doc, CollectionSchema.url_protocol_s, digestURL.getProtocol()); - if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURL.getPaths()); + if (allAttr || contains(CollectionSchema.url_paths_sxt) || contains(CollectionSchema.url_paths_count_i)) { + String[] paths = digestURL.getPaths(); + if (allAttr || contains(CollectionSchema.url_paths_count_i)) add(doc, CollectionSchema.url_paths_count_i, paths.length); + if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, paths); + } if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filenameStub); if (allAttr || contains(CollectionSchema.url_file_name_tokens_t)) add(doc, CollectionSchema.url_file_name_tokens_t, MultiProtocolURL.toTokens(filenameStub)); if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension); diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index 8ce136d27..baf4331de 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -171,6 +171,7 @@ public enum CollectionSchema implements SchemaDeclaration { url_file_name_s(SolrType.string, true, true, false, false, true, "the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension"), url_file_name_tokens_t(SolrType.text_general, true, true, false, false, true, "tokens generated from url_file_name_s which can be used for better matching and result boosting"), url_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension"), + url_paths_count_i(SolrType.num_integer, true, true, false, false, false, "number of all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"), url_paths_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"), url_parameter_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url"), url_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url"),