id(SolrType.string,true,true,false,false,false,"primary key of document, the URL hash **mandatory field**"),
sku(SolrType.text_en_splitting_tight,true,true,false,true,true,"url of document"),// a 'sku' is a stock-keeping unit, a unique identifier and a default field in unmodified solr.
last_modified(SolrType.date,true,true,false,false,false,"last-modified from http header"),
content_type(SolrType.string,true,true,true,false,false,"mime-type of document"),
title(SolrType.text_general,true,true,true,false,true,"content of title tag"),
title_exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of title, used to compute title_unique_b"),
title_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if title is unique in the whole index; if yes and another document appears with same title, the unique-flag is set to false"),
host_id_s(SolrType.string,true,true,false,false,false,"id of the host, a 6-byte hash that is part of the document id"),// String hosthash();
md5_s(SolrType.string,true,true,false,false,false,"the md5 of the raw source"),// String md5();
exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of text_t"),
exact_signature_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if exact_signature_l is unique at the time of document creation, used for double-check during search"),
fuzzy_signature_l(SolrType.num_long,true,true,false,false,false,"64 bit of the Lookup3Signature from EnhancedTextProfileSignature of text_t"),
fuzzy_signature_text_t(SolrType.text_general,true,true,false,false,false,"intermediate data produced in EnhancedTextProfileSignature: a list of word frequencies"),
fuzzy_signature_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if fuzzy_signature_l is unique at the time of document creation, used for double-check during search"),
size_i(SolrType.num_integer,true,true,false,false,false,"the size of the raw source"),// int size();
failreason_t(SolrType.text_general,true,true,false,false,false,"fail reason if a page was not loaded. if the page was loaded then this field is empty"),
failtype_s(SolrType.string,true,true,false,false,false,"fail type if a page was not loaded. This field is either empty, 'excl' or 'fail'"),
httpstatus_i(SolrType.num_integer,true,true,false,false,false,"html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.num_integer,true,true,false,false,false,"html status return code (i.e. \"200\" for ok), -1 if not loaded"),
references_i(SolrType.num_integer,true,true,false,false,false,"number of unique http references, should be equal to references_internal_i + references_external_i"),
references_internal_i(SolrType.num_integer,true,true,false,false,false,"number of unique http references from same host as referenced url"),
references_external_i(SolrType.num_integer,true,true,false,false,false,"number of unique http references from external hosts"),
references_exthosts_i(SolrType.num_integer,true,true,false,false,false,"number of external hosts which provide http references"),
clickdepth_i(SolrType.num_integer,true,true,false,false,false,"depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url"),
process_sxt(SolrType.string,true,true,true,false,false,"needed (post-)processing steps on this metadata set"),
description_exact_signature_l(SolrType.num_long,true,true,false,false,false,"the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of description, used to compute description_unique_b"),
description_unique_b(SolrType.bool,true,true,false,false,false,"flag shows if description is unique in the whole index; if yes and another document appears with same description, the unique-flag is set to false"),
keywords(SolrType.text_general,true,true,false,false,true,"content of keywords tag; words are separated by space"),
collection_sxt(SolrType.string,true,true,true,false,false,"tags that are attached to crawls/index generation to separate the search result into user-defined subsets"),
csscount_i(SolrType.num_integer,true,true,false,false,false,"number of entries in css_tag_txt and css_url_txt"),
css_tag_txt(SolrType.text_general,true,true,true,false,false,"full css tag with normalized url"),
css_url_txt(SolrType.text_general,true,true,true,false,false,"normalized urls within a css tag"),
scripts_txt(SolrType.text_general,true,true,true,false,false,"normalized urls within a scripts tag"),
scriptscount_i(SolrType.num_integer,true,true,false,false,false,"number of entries in scripts_txt"),
robots_i(SolrType.num_integer,true,true,false,false,false,"content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
metagenerator_t(SolrType.text_general,true,true,false,false,false,"content of <meta name=\"generator\" content=#content#> tag"),
inboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"internal links, only the protocol"),
inboundlinks_urlstub_txt(SolrType.text_general,true,true,true,false,false,"internal links, the url only without the protocol"),
outboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"external links, only the protocol"),
outboundlinks_urlstub_txt(SolrType.text_general,true,true,true,false,false,"external links, the url only without the protocol"),
images_tag_txt(SolrType.text_general,true,true,true,false,true," all image tags, encoded as <img> tag inclusive alt- and title property"),
images_urlstub_txt(SolrType.text_general,true,true,true,false,true,"all image links without the protocol and '://'"),
images_protocol_sxt(SolrType.text_general,true,true,true,false,false,"all image link protocols"),
images_alt_txt(SolrType.text_general,true,true,true,false,true,"all image link alt tag"),
images_withalt_i(SolrType.num_integer,true,true,false,false,false,"number of image links with alt tag"),
htags_i(SolrType.num_integer,true,true,false,false,false,"binary pattern for the existance of h1..h6 headlines"),
canonical_s(SolrType.string,true,true,false,false,false,"url inside the canonical link element"),
canonical_equal_sku_b(SolrType.bool,true,true,false,false,false,"flag shows if the url in canonical_t is equal to sku"),
refresh_s(SolrType.string,true,true,false,false,false,"link from the url property inside the refresh link element"),
li_txt(SolrType.text_general,true,true,true,false,true,"all texts in <li> tags"),
licount_i(SolrType.num_integer,true,true,false,false,false,"number of <li> tags"),
bold_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <b> or <strong> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
boldcount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <b> or <strong>"),
italic_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <i> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
italiccount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <i>"),
underline_txt(SolrType.text_general,true,true,true,false,true,"all texts inside of <u> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
underlinecount_i(SolrType.num_integer,true,true,false,false,false,"total number of occurrences of <u>"),
flash_b(SolrType.bool,true,true,false,false,false,"flag that shows if a swf file is linked"),
frames_sxt(SolrType.string,true,true,true,false,false,"list of all links to frames"),
framesscount_i(SolrType.num_integer,true,true,false,false,false,"number of frames_txt"),
iframes_sxt(SolrType.string,true,true,true,false,false,"list of all links to iframes"),
iframesscount_i(SolrType.num_integer,true,true,false,false,false,"number of iframes_txt"),
hreflang_url_sxt(SolrType.string,true,true,true,false,false,"url of the hreflang link tag, see http://support.google.com/webmasters/bin/answer.py?hl=de&answer=189077"),
hreflang_cc_sxt(SolrType.string,true,true,true,false,false,"country code of the hreflang link tag, see http://support.google.com/webmasters/bin/answer.py?hl=de&answer=189077"),
navigation_url_sxt(SolrType.string,true,true,true,false,false,"page navigation url, see http://googlewebmastercentral.blogspot.de/2011/09/pagination-with-relnext-and-relprev.html"),
navigation_type_sxt(SolrType.string,true,true,true,false,false,"page navigation rel property value, can contain one of {top,up,next,prev,first,last}"),
publisher_url_s(SolrType.string,true,true,false,false,false,"publisher url as defined in http://support.google.com/plus/answer/1713826?hl=de"),
host_extent_i(SolrType.num_integer,true,true,false,false,false,"number of documents from the same host; can be used to measure references_internal_i for likelihood computation"),
description_count_i(SolrType.num_integer,true,true,false,false,false,"number of descriptions in the document. Its not counting the 'description' field since there is only one. But it counts the number of descriptions that appear in the document (if any)"),
description_chars_val(SolrType.num_integer,true,true,true,false,false,"number of characters for each description"),
description_words_val(SolrType.num_integer,true,true,true,false,false,"number of words in each description"),
bold_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in bold_txt"),
italic_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in italic_txt"),
underline_val(SolrType.num_integer,true,true,true,false,false,"number of occurrences of texts in underline_txt"),
ext_cms_txt(SolrType.text_general,true,true,true,false,false,"names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias"),
ext_cms_val(SolrType.num_integer,true,true,true,false,false,"number of attributes that count for a specific cms in ext_cms_txt"),
ext_ads_txt(SolrType.text_general,true,true,true,false,false,"names of ad-servers/ad-services"),
ext_ads_val(SolrType.num_integer,true,true,true,false,false,"number of attributes counts in ext_ads_txt"),
ext_community_txt(SolrType.text_general,true,true,true,false,false,"names of recognized community functions"),
ext_community_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in attr_community"),
ext_maps_txt(SolrType.text_general,true,true,true,false,false,"names of map services"),
ext_maps_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in ext_maps_txt"),
ext_tracker_txt(SolrType.text_general,true,true,true,false,false,"names of tracker server"),
ext_tracker_val(SolrType.num_integer,true,true,true,false,false,"number of attribute counts in ext_tracker_txt"),
ext_title_txt(SolrType.text_general,true,true,true,false,false,"names matching title expressions"),
ext_title_val(SolrType.num_integer,true,true,true,false,false,"number of matching title expressions");