/** * SolrField * Copyright 2011 by Michael Peter Christen * First released 14.04.2011 at http://yacy.net * * $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $ * $LastChangedRevision: 7654 $ * $LastChangedBy: orbiter $ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see . */ package net.yacy.search.index; import net.yacy.cora.services.federated.solr.Schema; import net.yacy.cora.services.federated.solr.SolrType; public enum YaCySchema implements Schema { // mandatory id(SolrType.string, true, true, "primary key of document, the URL hash **mandatory field**"), sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"), last_modified(SolrType.date, true, true, "last-modified from http header"), content_type(SolrType.string, true, true, true, "mime-type of document"), title(SolrType.text_general, true, true, true, "content of title tag"), host_id_s(SolrType.string, true, true, "id of the host, a 6-byte hash that is part of the document id"),// String hosthash(); md5_s(SolrType.string, true, true, "the md5 of the raw source"),// String md5(); size_i(SolrType.integer, true, true, "the size of the raw source"),// int size(); process_s(SolrType.string, true, true, "index creation comment"), failreason_t(SolrType.text_general, true, true, "fail reason if a page was not loaded. if the page was loaded then this field is empty"), httpstatus_i(SolrType.integer, true, true, "html status return code (i.e. \"200\" for ok), -1 if not loaded"), // optional but recommended, part of index distribution load_date_dt(SolrType.date, true, true, "time when resource was loaded"), fresh_date_dt(SolrType.date, true, true, "date until resource shall be considered as fresh"), referrer_id_txt(SolrType.string, true, true, true, "ids of referrer to this document"),// byte[] referrerHash(); publisher_t(SolrType.text_general, true, true, "the name of the publisher of the document"),// String dc_publisher(); language_txt(SolrType.string, true, true, "the language used in the document; starts with primary language"),// byte[] language(); audiolinkscount_i(SolrType.integer, true, true, "number of links to audio resources"),// int laudio(); videolinkscount_i(SolrType.integer, true, true, "number of links to video resources"),// int lvideo(); applinkscount_i(SolrType.integer, true, true, "number of links to application resources"),// int lapp(); // optional but recommended lon_coordinate(SolrType.tdouble, true, true, "longitude of location as declared in WSG84"), lat_coordinate(SolrType.tdouble, true, true, "latitude of location as declared in WSG84"), ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"), author(SolrType.text_general, true, true, "content of author-tag"), description(SolrType.text_general, true, true, "content of description-tag"), keywords(SolrType.text_general, true, true, "content of keywords tag; words are separated by space"), charset_s(SolrType.string, true, true, "character encoding"), wordcount_i(SolrType.integer, true, true, "number of words in visible area"), inboundlinkscount_i(SolrType.integer, true, true, "total number of inbound links"), inboundlinksnofollowcount_i(SolrType.integer, true, true, "number of inbound links with nofollow tag"), outboundlinkscount_i(SolrType.integer, true, true, "external number of inbound links"), outboundlinksnofollowcount_i(SolrType.integer, true, true, "number of external links with nofollow tag"), imagescount_i(SolrType.integer, true, true, "number of images"), responsetime_i(SolrType.integer, true, true, "response time of target server in milliseconds"), text_t(SolrType.text_general, true, true, "all visible text"), // optional values csscount_i(SolrType.integer, true, true, "number of entries in css_tag_txt and css_url_txt"), css_tag_txt(SolrType.text_general, true, true, true, "full css tag with normalized url"), css_url_txt(SolrType.text_general, true, true, true, "normalized urls within a css tag"), scripts_txt(SolrType.text_general, true, true, true, "normalized urls within a scripts tag"), scriptscount_i(SolrType.integer, true, true, "number of entries in scripts_txt"), // encoded as binary value into an integer: // bit 0: "all" contained in html header meta // bit 1: "index" contained in html header meta // bit 2: "noindex" contained in html header meta // bit 3: "nofollow" contained in html header meta // bit 8: "noarchive" contained in http header properties // bit 9: "nosnippet" contained in http header properties // bit 10: "noindex" contained in http header properties // bit 11: "nofollow" contained in http header properties // bit 12: "unavailable_after" contained in http header properties robots_i(SolrType.integer, true, true, "content of tag and the \"X-Robots-Tag\" HTTP property"), metagenerator_t(SolrType.text_general, true, true, "content of tag"), inboundlinks_tag_txt(SolrType.text_general, true, true, true, "internal links, normalized (absolute URLs), as - tag with anchor text and nofollow"), inboundlinks_protocol_txt(SolrType.text_general, true, true, true, "internal links, only the protocol"), inboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "internal links, the url only without the protocol"), inboundlinks_name_txt(SolrType.text_general, true, true, true, "internal links, the name property of the a-tag"), inboundlinks_rel_txt(SolrType.text_general, true, true, true, "internal links, the rel property of the a-tag"), inboundlinks_relflags_txt(SolrType.text_general, true, true, true, "internal links, the rel property of the a-tag, coded binary"), inboundlinks_text_txt(SolrType.text_general, true, true, true, "internal links, the text content of the a-tag"), outboundlinks_tag_txt(SolrType.text_general, true, true, true, "external links, normalized (absolute URLs), as - tag with anchor text and nofollow"), outboundlinks_protocol_txt(SolrType.text_general, true, true, true, "external links, only the protocol"), outboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "external links, the url only without the protocol"), outboundlinks_name_txt(SolrType.text_general, true, true, true, "external links, the name property of the a-tag"), outboundlinks_rel_txt(SolrType.text_general, true, true, true, "external links, the rel property of the a-tag"), outboundlinks_relflags_txt(SolrType.text_general, true, true, true, "external links, the rel property of the a-tag, coded binary"), outboundlinks_text_txt(SolrType.text_general, true, true, true, "external links, the text content of the a-tag"), images_tag_txt(SolrType.text_general, true, true, true, " all image tags, encoded as tag inclusive alt- and title property"), images_urlstub_txt(SolrType.text_general, true, true, true, "all image links without the protocol and '://'"), images_protocol_txt(SolrType.text_general, true, true, true, "all image link protocols"), images_alt_txt(SolrType.text_general, true, true, true, "all image link alt tag"), h1_txt(SolrType.text_general, true, true, true, "h1 header"), h2_txt(SolrType.text_general, true, true, true, "h2 header"), h3_txt(SolrType.text_general, true, true, true, "h3 header"), h4_txt(SolrType.text_general, true, true, true, "h4 header"), h5_txt(SolrType.text_general, true, true, true, "h5 header"), h6_txt(SolrType.text_general, true, true, true, "h6 header"), htags_i(SolrType.integer, true, true, "binary pattern for the existance of h1..h6 headlines"), paths_txt(SolrType.text_general, true, true, true, "all path elements in the url"), host_s(SolrType.string, true, true, "host of the url"), canonical_s(SolrType.string, true, true, "url inside the canonical link element"), refresh_s(SolrType.string, true, true, "link from the url property inside the refresh link element"), li_txt(SolrType.text_general, true, true, true, "all texts in
  • tags"), licount_i(SolrType.integer, true, true, "number of
  • tags"), bold_txt(SolrType.text_general, true, true, true, "all texts inside of or tags. no doubles. listed in the order of number of occurrences in decreasing order"), bold_val(SolrType.integer, true, true, true, "number of occurrences of texts in bold_txt"), boldcount_i(SolrType.integer, true, true, "total number of occurrences of or "), italic_txt(SolrType.text_general, true, true, true, "all texts inside of tags. no doubles. listed in the order of number of occurrences in decreasing order"), italic_val(SolrType.integer, true, true, true, "number of occurrences of texts in italic_txt"), italiccount_i(SolrType.integer, true, true, "total number of occurrences of "), flash_b(SolrType.bool, true, true, "flag that shows if a swf file is linked"), frames_txt(SolrType.text_general, true, true, true, "list of all links to frames"), framesscount_i(SolrType.integer, true, true, "number of frames_txt"), iframes_txt(SolrType.text_general, true, true, true, "list of all links to iframes"), iframesscount_i(SolrType.integer, true, true, "number of iframes_txt"), ext_cms_txt(SolrType.text_general, true, true, true, "names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias"), ext_cms_val(SolrType.integer, true, true, true, "number of attributes that count for a specific cms in ext_cms_txt"), ext_ads_txt(SolrType.text_general, true, true, true, "names of ad-servers/ad-services"), ext_ads_val(SolrType.integer, true, true, true, "number of attributes counts in ext_ads_txt"), ext_community_txt(SolrType.text_general, true, true, true, "names of recognized community functions"), ext_community_val(SolrType.integer, true, true, true, "number of attribute counts in attr_community"), ext_maps_txt(SolrType.text_general, true, true, true, "names of map services"), ext_maps_val(SolrType.integer, true, true, true, "number of attribute counts in ext_maps_txt"), ext_tracker_txt(SolrType.text_general, true, true, true, "names of tracker server"), ext_tracker_val(SolrType.integer, true, true, true, "number of attribute counts in ext_tracker_txt"), ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"), ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions"); private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() ) private final SolrType type; private final boolean indexed, stored; private boolean multiValued, omitNorms; private String comment; private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final String comment) { this.type = type; this.indexed = indexed; this.stored = stored; this.multiValued = false; this.omitNorms = false; this.comment = comment; } private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final String comment) { this(type, indexed, stored, comment); this.multiValued = multiValued; } private YaCySchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final String comment) { this(type, indexed, stored, multiValued, comment); this.omitNorms = omitNorms; } /** * Returns the YaCy default or (if available) custom field name for Solr * @return SolrFieldname String */ @Override public final String getSolrFieldName() { return (this.solrFieldName == null ? this.name() : this.solrFieldName); } /** * Set a custom Solr field name (and converts it to lower case) * @param theValue = the field name */ public final void setSolrFieldName(String theValue) { // make sure no empty string is assigned if ( (theValue != null) && (!theValue.isEmpty()) ) { this.solrFieldName = theValue.toLowerCase(); } else { this.solrFieldName = null; } } @Override public final SolrType getType() { return this.type; } @Override public final boolean isIndexed() { return this.indexed; } @Override public final boolean isStored() { return this.stored; } @Override public final boolean isMultiValued() { return this.multiValued; } @Override public final boolean isOmitNorms() { return this.omitNorms; } @Override public final String getComment() { return this.comment; } }