- date navigation
The date is taken from the CONTENT of the documents / web pages, NOT
from a date submitted in the context of metadata (i.e. http header or
html head form). This makes it possible to search for documents in the
future, i.e. when documents contain event descriptions for future
events.
The date is written to an index field which is now enabled by default.
All documents are scanned for contained date mentions.
To visualize the dates for a specific search results, a histogram
showing the number of documents for each day is displayed. To render
these histograms the morris.js library is used. Morris.js requires also
raphael.js which is now also integrated in YaCy.
The histogram is now also displayed in the index browser by default.
To select a specific range from a search result, the following modifiers
had been introduced:
from:<date>
to:<date>
These modifiers can be used separately (i.e. only 'from' or only 'to')
to describe an open interval or combined to have a closed interval. Both
dates are inclusive. To select a specific single date only, use the
'to:' - modifier.
The histogram shows blue and green lines; the green lines denot weekend
days (saturday and sunday).
Clicking on bars in the histogram has the following reaction:
1st click: add a from:<date> modifier for the date of the bar
2nd click: add a to:<date> modifier for the date of the bar
3rd click: remove from and date modifier and set a on:<date> for the bar
When the on:<date> modifier is used, the histogram shows an unlimited
time period. This makes it possible to click again (4th click) which is
then interpreted as a 1st click again (sets a from modifier).
The display feature is NOT switched on by default; to switch it on use
the /ConfigSearchPage_p.html servlet.
<inputtype="checkbox"name="search.navigation.date"value="true"#(search.navigation.date)#::checked="checked"#(/search.navigation.date)#/> Date Navigation
<linkrel="stylesheet"href="/env/morris.css">
<scriptsrc="/js/raphael-min.js"></script>
<scriptsrc="/js/morris.js"></script>
<divid="graph"style="height:200px"></div>
<script>
var solr= $.getJSON("http://localhost:8090/solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) {
var solr= $.getJSON("http://localhost:8090/solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) {
//if (d == null) try {d = GenericFormatter.FORMAT_SHORT_DAY.parse(text);} catch (ParseException e) {} // did not work well and fired for wrong formats; do not use
params.setParam("f."+CollectionSchema.dates_in_content_dts.getSolrFieldName()+".facet.limit",Integer.toString(FACETS_DATE_MAXCOUNT));// the year constraint should cause that limitation already
}
//for (String k: params.getParameterNames()) {ArrayList<String> al = new ArrayList<>(); for (String s: params.getParams(k)) al.add(s); System.out.println("Parameter: " + k + "=" + al.toString());}
fq.append(" AND ").append(QueryModifier.parseFromToExpression(this.modifier.from,this.modifier.to));
}
}
if(this.modifier.protocol!=null){
if(this.modifier.protocol!=null){
fq.append(" AND {!tag=").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append("}").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol);
fq.append(" AND {!tag=").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append("}").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol);
@ -35,10 +35,8 @@ public enum CollectionSchema implements SchemaDeclaration {
sku(SolrType.string,true,true,false,true,true,"url of document"),// a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
sku(SolrType.string,true,true,false,true,true,"url of document"),// a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
//sku(SolrType.text_en_splitting_tight, true, true, false, true, true, "url of document"), // a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
//sku(SolrType.text_en_splitting_tight, true, true, false, true, true, "url of document"), // a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
last_modified(SolrType.date,true,true,false,false,false,"last-modified from http header"),
last_modified(SolrType.date,true,true,false,false,false,"last-modified from http header"),
dates_in_content_sxt(SolrType.string,true,true,true,false,true,"if date expressions can be found in the content, these dates are listed here in order of the appearances"),
dates_in_content_dts(SolrType.date,true,true,true,false,true,"if date expressions can be found in the content, these dates are listed here as date objects in order of the appearances"),
dates_in_content_count_i(SolrType.num_integer,true,true,false,false,false,"the number of entries in dates_in_content_sxt"),
dates_in_content_count_i(SolrType.num_integer,true,true,false,false,false,"the number of entries in dates_in_content_sxt"),
date_in_content_min_dt(SolrType.date,true,true,false,false,false,"if dates_in_content_sxt is filled, this contains the oldest date from the list of available dates"),
date_in_content_max_dt(SolrType.date,true,true,false,false,false,"if dates_in_content_sxt is filled, this contains the youngest date from the list of available dates, that may also be possibly in the future"),
content_type(SolrType.string,true,true,true,false,false,"mime-type of document"),
content_type(SolrType.string,true,true,true,false,false,"mime-type of document"),
http_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url which was http then appears as https (or vice versa) then the field is false"),
http_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url which was http then appears as https (or vice versa) then the field is false"),
www_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url within the subdomain www then appears without that subdomain (or vice versa) then the field is false"),
www_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url within the subdomain www then appears without that subdomain (or vice versa) then the field is false"),
@ -362,6 +360,12 @@ public enum CollectionSchema implements SchemaDeclaration {