From db024a4e1987066d451104c04e67e4cc461bc08a Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 21 Jan 2013 18:02:29 +0100 Subject: [PATCH] added new solr fields (unused yet; implementation will follow) --- defaults/solr.keys.list | 9 +++++++++ source/net/yacy/cora/federate/solr/YaCySchema.java | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index 32888e9c2..af2da6828 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -26,6 +26,9 @@ content_type ## content of title tag, text (mandatory field) title +## flag shows if title is unique in the whole index; if yes and another document appears with same title, the unique-flag is set to false, boolean +#title_unique_b + ## id of the host, a 6-byte hash that is part of the document id (mandatory field) host_id_s @@ -113,6 +116,9 @@ author ## content of description-tag, text description +## flag shows if description is unique in the whole index; if yes and another document appears with same description, the unique-flag is set to false, boolean +#description_unique_b + ## content of keywords tag; words are separated by space keywords @@ -282,6 +288,9 @@ outboundlinks_urlstub_txt ## url inside the canonical link element, string #canonical_t +## flag shows if the url in canonical_t is equal to sku, boolean +#canonical_equal_sku_b + ## link from the url property inside the refresh link element, string #refresh_s diff --git a/source/net/yacy/cora/federate/solr/YaCySchema.java b/source/net/yacy/cora/federate/solr/YaCySchema.java index 69f5b1e99..3a0df9b7e 100644 --- a/source/net/yacy/cora/federate/solr/YaCySchema.java +++ b/source/net/yacy/cora/federate/solr/YaCySchema.java @@ -30,10 +30,11 @@ public enum YaCySchema implements Schema { // mandatory id(SolrType.string, true, true, false, "primary key of document, the URL hash **mandatory field**"), - sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"), + sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"), // a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr. last_modified(SolrType.date, true, true, false, "last-modified from http header"), content_type(SolrType.string, true, true, true, "mime-type of document"), title(SolrType.text_general, true, true, true, "content of title tag"), + title_unique_b(SolrType.bool, true, true, false, "flag shows if title is unique in the whole index; if yes and another document appears with same title, the unique-flag is set to false"), host_id_s(SolrType.string, true, true, false, "id of the host, a 6-byte hash that is part of the document id"),// String hosthash(); md5_s(SolrType.string, true, true, false, "the md5 of the raw source"),// String md5(); exact_signature_l(SolrType.num_long, true, true, false, "the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of text_t"), @@ -66,6 +67,7 @@ public enum YaCySchema implements Schema { author(SolrType.text_general, true, true, false, "content of author-tag"), author_s(SolrType.string, true, true, false, "content of author-tag as copy-field from author. This is used for facet generation"), description(SolrType.text_general, true, true, false, "content of description-tag"), + description_unique_b(SolrType.bool, true, true, false, "flag shows if description is unique in the whole index; if yes and another document appears with same description, the unique-flag is set to false"), keywords(SolrType.text_general, true, true, false, "content of keywords tag; words are separated by space"), charset_s(SolrType.string, true, true, false, "character encoding"), wordcount_i(SolrType.num_integer, true, true, false, "number of words in visible area"), @@ -130,6 +132,7 @@ public enum YaCySchema implements Schema { images_withalt_i(SolrType.num_integer, true, true, false, "number of image links with alt tag"), htags_i(SolrType.num_integer, true, true, false, "binary pattern for the existance of h1..h6 headlines"), canonical_t(SolrType.text_general, true, true, false, "url inside the canonical link element"), + canonical_equal_sku_b(SolrType.bool, true, true, false, "flag shows if the url in canonical_t is equal to sku"), refresh_s(SolrType.string, true, true, false, "link from the url property inside the refresh link element"), li_txt(SolrType.text_general, true, true, true, "all texts in
  • tags"), licount_i(SolrType.num_integer, true, true, false, "number of
  • tags"),