From ee23fc7a322200559486a939c0c6788f3942353f Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 4 Sep 2012 14:11:11 +0200 Subject: [PATCH] added h1..h6 counter fields --- defaults/solr.keys.list | 8 ++++++++ source/net/yacy/search/index/SolrConfiguration.java | 12 ++++++------ source/net/yacy/search/index/YaCySchema.java | 7 +++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index 63ce7be8c..7ede1c94a 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -327,6 +327,14 @@ italic_txt ## number of words in each description #description_words_val +## number of h1..h6 header lines +#h1_i +#h2_i +#h3_i +#h4_i +#h5_i +#h6_i + ## names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias #ext_cms_txt diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index c04f2b9ed..0799f7212 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -431,12 +431,12 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable int f = 1; String[] hs; - hs = html.getHeadlines(1); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h1_txt, hs); - hs = html.getHeadlines(2); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h2_txt, hs); - hs = html.getHeadlines(3); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h3_txt, hs); - hs = html.getHeadlines(4); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h4_txt, hs); - hs = html.getHeadlines(5); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h5_txt, hs); - hs = html.getHeadlines(6); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h6_txt, hs); + hs = html.getHeadlines(1); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h1_txt, hs); add(doc, YaCySchema.h1_i, hs.length); + hs = html.getHeadlines(2); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h2_txt, hs); add(doc, YaCySchema.h2_i, hs.length); + hs = html.getHeadlines(3); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h3_txt, hs); add(doc, YaCySchema.h3_i, hs.length); + hs = html.getHeadlines(4); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h4_txt, hs); add(doc, YaCySchema.h4_i, hs.length); + hs = html.getHeadlines(5); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h5_txt, hs); add(doc, YaCySchema.h5_i, hs.length); + hs = html.getHeadlines(6); h = h | (hs.length > 0 ? f : 0); f = f * 2; add(doc, YaCySchema.h6_txt, hs); add(doc, YaCySchema.h6_i, hs.length); add(doc, YaCySchema.htags_i, h); diff --git a/source/net/yacy/search/index/YaCySchema.java b/source/net/yacy/search/index/YaCySchema.java index c276fddfb..cfa44e605 100644 --- a/source/net/yacy/search/index/YaCySchema.java +++ b/source/net/yacy/search/index/YaCySchema.java @@ -153,6 +153,13 @@ public enum YaCySchema implements Schema { description_chars_val(SolrType.integer, true, true, true, "number of characters for each description"), description_words_val(SolrType.integer, true, true, true, "number of words in each description"), + h1_i(SolrType.integer, true, true, false, "number of h1 header lines"), + h2_i(SolrType.integer, true, true, false, "number of h2 header lines"), + h3_i(SolrType.integer, true, true, false, "number of h3 header lines"), + h4_i(SolrType.integer, true, true, false, "number of h4 header lines"), + h5_i(SolrType.integer, true, true, false, "number of h5 header lines"), + h6_i(SolrType.integer, true, true, false, "number of h6 header lines"), + // special values; can only be used if '_val' type is defined in schema file; this is not standard bold_val(SolrType.integer, true, true, true, "number of occurrences of texts in bold_txt"), italic_val(SolrType.integer, true, true, true, "number of occurrences of texts in italic_txt"),