sku(SolrType.string,true,true,false,true,true,"url of document"),// a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
//sku(SolrType.text_en_splitting_tight, true, true, false, true, true, "url of document"), // a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
http_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url which was http then appears as https (or vice versa) then the field is false"),
www_unique_b(SolrType.bool,true,true,false,false,false,"unique-field which is true when an url appears the first time. If the same url within the subdomain www then appears without that subdomain (or vice versa) then the field is false"),
title_exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of title, used to compute title_unique_b"),
title_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if title is unique within all indexable documents of the same host with status code 200; if yes and another document appears with same title, the unique-flag is set to false"),
host_id_s(SolrType.string,true,true,false,false,false,"id of the host, a 6-byte hash that is part of the document id"),// String hosthash();
md5_s(SolrType.string,true,true,false,false,false,"the md5 of the raw source"),// String md5();
exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of text_t"),
exact_signature_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if exact_signature_l is unique at the time of document creation, used for double-check during search"),
exact_signature_copycount_i(SolrType.num_integer,true,true,false,false,false,"counter for the number of documents which are not unique (== count of not-unique-flagged documents + 1)"),
fuzzy_signature_text_t(SolrType.text_general,true,true,false,false,true,"intermediate data produced in EnhancedTextProfileSignature: a list of word frequencies"),
fuzzy_signature_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if fuzzy_signature_l is unique at the time of document creation, used for double-check during search"),
fuzzy_signature_copycount_i(SolrType.num_integer,true,true,false,false,false,"counter for the number of documents which are not unique (== count of not-unique-flagged documents + 1)"),
references_i(SolrType.num_integer,true,true,false,false,false,"number of unique http references, should be equal to references_internal_i + references_external_i"),
crawldepth_i(SolrType.num_integer,true,true,false,false,false,"crawl depth of web page according to the number of steps that the crawler did to get to this document; if the crawl was started at a root document, then this is equal to the clickdepth"),
harvestkey_s(SolrType.string,true,true,false,false,false,"key from a harvest process (i.e. the crawl profile hash key) which is needed for near-realtime postprocessing. This shall be deleted as soon as postprocessing has been terminated."),
description_exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of description, used to compute description_unique_b"),
description_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if description is unique within all indexable documents of the same host with status code 200; if yes and another document appears with same description, the unique-flag is set to false"),
linkscount_i(SolrType.num_integer,true,true,false,false,false,"number of all outgoing links; including linksnofollowcount_i"),
linksnofollowcount_i(SolrType.num_integer,true,true,false,false,false,"number of all outgoing inks with nofollow tag"),
inboundlinkscount_i(SolrType.num_integer,true,true,false,false,false,"number of outgoing inbound (to same domain) links; including inboundlinksnofollowcount_i"),
inboundlinksnofollowcount_i(SolrType.num_integer,true,true,false,false,false,"number of outgoing inbound (to same domain) links with nofollow tag"),
outboundlinkscount_i(SolrType.num_integer,true,true,false,false,false,"number of outgoing outbound (to other domain) links, including outboundlinksnofollowcount_i"),
outboundlinksnofollowcount_i(SolrType.num_integer,true,true,false,false,false,"number of outgoing outbound (to other domain) links with nofollow tag"),
collection_sxt(SolrType.string,true,true,true,false,false,"tags that are attached to crawls/index generation to separate the search result into user-defined subsets"),
csscount_i(SolrType.num_integer,true,true,false,false,false,"number of entries in css_tag_txt and css_url_txt"),
robots_i(SolrType.num_integer,true,true,false,false,false,"content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
metagenerator_t(SolrType.text_general,true,true,false,false,false,"content of <meta name=\"generator\" content=#content#> tag"),
inboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"internal links, only the protocol"),
images_alt_sxt(SolrType.string,true,true,true,false,true,"all image link alt tag"),// no need to index this; don't turn it into a txt field; use images_text_t instead
images_height_val(SolrType.num_integer,true,true,true,false,false,"size of images:height"),
images_width_val(SolrType.num_integer,true,true,true,false,false,"size of images:width"),
images_pixel_val(SolrType.num_integer,true,true,true,false,false,"size of images as number of pixels (easier for a search restriction than with and height)"),
images_withalt_i(SolrType.num_integer,true,true,false,false,false,"number of image links with alt tag"),
htags_i(SolrType.num_integer,true,true,false,false,false,"binary pattern for the existance of h1..h6 headlines"),
canonical_s(SolrType.string,true,true,false,false,false,"url inside the canonical link element"),
canonical_equal_sku_b(SolrType.bool,true,true,false,false,false,"flag shows if the url in canonical_t is equal to sku"),
refresh_s(SolrType.string,true,true,false,false,false,"link from the url property inside the refresh link element"),
li_txt(SolrType.text_general,true,true,true,false,true,"all texts in <li> tags"),
licount_i(SolrType.num_integer,true,true,false,false,false,"number of <li> tags"),
bold_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <b> or <strong> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
boldcount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <b> or <strong>"),
italic_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <i> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
italiccount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <i>"),
underline_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <u> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
underlinecount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <u>"),
flash_b(SolrType.bool,true,true,false,false,false,"flag that shows if a swf file is linked"),
frames_sxt(SolrType.string,true,true,true,false,false,"list of all links to frames"),
framesscount_i(SolrType.num_integer,true,true,false,false,false,"number of frames_txt"),
iframes_sxt(SolrType.string,true,true,true,false,false,"list of all links to iframes"),
iframesscount_i(SolrType.num_integer,true,true,false,false,false,"number of iframes_txt"),
hreflang_url_sxt(SolrType.string,true,true,true,false,false,"url of the hreflang link tag, see http://support.google.com/webmasters/bin/answer.py?hl=de&answer=189077"),
hreflang_cc_sxt(SolrType.string,true,true,true,false,false,"country code of the hreflang link tag, see http://support.google.com/webmasters/bin/answer.py?hl=de&answer=189077"),
navigation_url_sxt(SolrType.string,true,true,true,false,false,"page navigation url, see http://googlewebmastercentral.blogspot.de/2011/09/pagination-with-relnext-and-relprev.html"),
navigation_type_sxt(SolrType.string,true,true,true,false,false,"page navigation rel property value, can contain one of {top,up,next,prev,first,last}"),
publisher_url_s(SolrType.string,true,true,false,false,false,"publisher url as defined in http://support.google.com/plus/answer/1713826?hl=de"),
url_file_name_s(SolrType.string,true,true,false,false,true,"the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension"),
url_file_name_tokens_t(SolrType.text_general,true,true,false,false,true,"tokens generated from url_file_name_s which can be used for better matching and result boosting"),
url_paths_count_i(SolrType.num_integer,true,true,false,false,false,"number of all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"),
url_paths_sxt(SolrType.string,true,true,true,false,true,"all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"),
host_extent_i(SolrType.num_integer,true,true,false,false,false,"number of documents from the same host; can be used to measure references_internal_i for likelihood computation"),
description_count_i(SolrType.num_integer,true,true,false,false,false,"number of descriptions in the document. Its not counting the 'description' field since there is only one. But it counts the number of descriptions that appear in the document (if any)"),
description_chars_val(SolrType.num_integer,true,true,true,false,false,"number of characters for each description"),
description_words_val(SolrType.num_integer,true,true,true,false,false,"number of words in each description"),
cr_host_count_i(SolrType.num_integer,true,true,false,false,false,"the number of documents within a single host"),
cr_host_chance_d(SolrType.num_double,true,true,false,false,false,"the chance to click on this page when randomly clicking on links within on one host"),
cr_host_norm_i(SolrType.num_integer,true,true,false,false,false,"normalization of chance: 0 for lower halve of cr_host_count_i urls, 1 for 1/2 of the remaining and so on. the maximum number is 10"),
bold_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in bold_txt"),
italic_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in italic_txt"),
underline_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in underline_txt"),
ext_cms_txt(SolrType.text_general,true,true,true,false,false,"names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias"),
ext_cms_val(SolrType.num_integer,true,true,true,false,false,"number of attributes that count for a specific cms in ext_cms_txt"),
ext_ads_txt(SolrType.text_general,true,true,true,false,false,"names of ad-servers/ad-services"),
ext_ads_val(SolrType.num_integer,true,true,true,false,false,"number of attributes counts in ext_ads_txt"),
ext_community_txt(SolrType.text_general,true,true,true,false,false,"names of recognized community functions"),
ext_community_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in attr_community"),
ext_maps_txt(SolrType.text_general,true,true,true,false,false,"names of map services"),
ext_maps_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in ext_maps_txt"),
ext_tracker_txt(SolrType.text_general,true,true,true,false,false,"names of tracker server"),
ext_tracker_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in ext_tracker_txt"),
ext_title_txt(SolrType.text_general,true,true,true,false,false,"names matching title expressions"),
ext_title_val(SolrType.num_integer,true,true,true,false,false,"number of matching title expressions"),
vocabularies_sxt(SolrType.string,true,true,true,false,false,"collection of all vocabulary names that have a matcher in the document - use this to boost with vocabularies");
publicfinalstaticStringCORE_NAME="collection1";// this was the default core name up to Solr 4.4.0. This default name was stored in CoreContainer.DEFAULT_DEFAULT_CORE_NAME but was removed in Solr 4.5.0
publicfinalstaticStringVOCABULARY_PREFIX="vocabulary_";// collects all terms that appear for each vocabulary
publicfinalstaticStringVOCABULARY_TERMS_SUFFIX="_sxt";// suffix for the term collector that start with VOCABULARY_PREFIX - middle part is vocabulary name
publicfinalstaticStringVOCABULARY_COUNT_SUFFIX="_i";// suffix for the term counter (>=1) that start with VOCABULARY_PREFIX - middle part is vocabulary name
publicfinalstaticStringVOCABULARY_LOGCOUNT_SUFFIX="_log_i";// log2(VOCABULARY_COUNT)] -- can be used for ranking boosts based on the number of occurrences
publicfinalstaticStringVOCABULARY_LOGCOUNTS_SUFFIX="_log_val";// all integers from [0 to log2(VOCABULARY_COUNT)] -- can be used for ranking boosts based on the number of occurrences