parent
ea2bd43b28
commit
c00efc2717
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* SolrField
|
||||
* Copyright 2011 by Michael Peter Christen
|
||||
* First released 14.04.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $
|
||||
* $LastChangedRevision: 7654 $
|
||||
* $LastChangedBy: orbiter $
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.services.federated.solr;
|
||||
|
||||
|
||||
public interface SolrField {
|
||||
|
||||
public String name();
|
||||
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
/**
|
||||
* SolrType
|
||||
* Copyright 2011 by Michael Peter Christen
|
||||
* First released 14.04.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $
|
||||
* $LastChangedRevision: 7654 $
|
||||
* $LastChangedBy: orbiter $
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.services.federated.solr;
|
||||
|
||||
public enum SolrType {
|
||||
string,
|
||||
text_general,
|
||||
text_en_splitting_tight,
|
||||
date,
|
||||
integer("int"),
|
||||
tdouble,
|
||||
bool("boolean");
|
||||
|
||||
private String printName;
|
||||
private SolrType() {
|
||||
this.printName = this.name();
|
||||
}
|
||||
private SolrType(String printName) {
|
||||
this.printName = printName;
|
||||
}
|
||||
public String printName() {
|
||||
return this.printName;
|
||||
}
|
||||
}
|
@ -0,0 +1,173 @@
|
||||
/**
|
||||
* SolrField
|
||||
* Copyright 2011 by Michael Peter Christen
|
||||
* First released 14.04.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $
|
||||
* $LastChangedRevision: 7654 $
|
||||
* $LastChangedBy: orbiter $
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.search.index;
|
||||
|
||||
import net.yacy.cora.services.federated.solr.SolrType;
|
||||
|
||||
public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField {
|
||||
|
||||
id(SolrType.string, true, true, "primary key of document, the URL hash"),
|
||||
sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"),
|
||||
ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"),
|
||||
host_s(SolrType.string, true, true, "host of the url"),
|
||||
title(SolrType.text_general, true, true, true, "content of title tag"),
|
||||
author(SolrType.text_general, true, true, "content of author-tag"),
|
||||
description(SolrType.text_general, true, true, "content of description-tag"),
|
||||
content_type(SolrType.string, true, true, true, "mime-type of document"),
|
||||
last_modified(SolrType.date, true, true, "last-modified from http header"),
|
||||
keywords(SolrType.text_general, true, true, "content of keywords tag; words are separated by space"),
|
||||
text_t(SolrType.text_general, true, true, "all visible text"),
|
||||
wordcount_i(SolrType.integer, true, true, "number of words in visible area"),
|
||||
paths_txt(SolrType.text_general, true, true, true, "all path elements in the url"),
|
||||
// encoded as binary value into an integer:
|
||||
// bit 0: "all" contained in html header meta
|
||||
// bit 1: "index" contained in html header meta
|
||||
// bit 2: "noindex" contained in html header meta
|
||||
// bit 3: "nofollow" contained in html header meta
|
||||
// bit 8: "noarchive" contained in http header properties
|
||||
// bit 9: "nosnippet" contained in http header properties
|
||||
// bit 10: "noindex" contained in http header properties
|
||||
// bit 11: "nofollow" contained in http header properties
|
||||
// bit 12: "unavailable_after" contained in http header properties
|
||||
robots_i(SolrType.integer, true, true, "content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
|
||||
inboundlinkscount_i(SolrType.integer, true, true, "total number of inbound links"),
|
||||
inboundlinksnofollowcount_i(SolrType.integer, true, true, "number of inbound links with nofollow tag"),
|
||||
inboundlinks_tag_txt(SolrType.text_general, true, true, true, "internal links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
|
||||
inboundlinks_protocol_txt(SolrType.text_general, true, true, true, "internal links, only the protocol"),
|
||||
inboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "internal links, the url only without the protocol"),
|
||||
inboundlinks_name_txt(SolrType.text_general, true, true, true, "internal links, the name property of the a-tag"),
|
||||
inboundlinks_rel_txt(SolrType.text_general, true, true, true, "internal links, the rel property of the a-tag"),
|
||||
inboundlinks_relflags_txt(SolrType.text_general, true, true, true, "internal links, the rel property of the a-tag, coded binary"),
|
||||
inboundlinks_text_txt(SolrType.text_general, true, true, true, "internal links, the text content of the a-tag"),
|
||||
outboundlinkscount_i(SolrType.integer, true, true, "external number of inbound links"),
|
||||
outboundlinksnofollowcount_i(SolrType.integer, true, true, "number of external links with nofollow tag"),
|
||||
outboundlinks_tag_txt(SolrType.text_general, true, true, true, "external links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
|
||||
outboundlinks_protocol_txt(SolrType.text_general, true, true, true, "external links, only the protocol"),
|
||||
outboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "external links, the url only without the protocol"),
|
||||
outboundlinks_name_txt(SolrType.text_general, true, true, true, "external links, the name property of the a-tag"),
|
||||
outboundlinks_rel_txt(SolrType.text_general, true, true, true, "external links, the rel property of the a-tag"),
|
||||
outboundlinks_relflags_txt(SolrType.text_general, true, true, true, "external links, the rel property of the a-tag, coded binary"),
|
||||
outboundlinks_text_txt(SolrType.text_general, true, true, true, "external links, the text content of the a-tag"),
|
||||
charset_s(SolrType.string, true, true, "character encoding"),
|
||||
lon_coordinate(SolrType.tdouble, true, false, "longitude of location as declared in WSG84"),
|
||||
lat_coordinate(SolrType.tdouble, true, false, "latitude of location as declared in WSG84"),
|
||||
httpstatus_i(SolrType.integer, true, true, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
|
||||
h1_txt(SolrType.text_general, true, true, true, "h1 header"),
|
||||
h2_txt(SolrType.text_general, true, true, true, "h2 header"),
|
||||
h3_txt(SolrType.text_general, true, true, true, "h3 header"),
|
||||
h4_txt(SolrType.text_general, true, true, true, "h4 header"),
|
||||
h5_txt(SolrType.text_general, true, true, true, "h5 header"),
|
||||
h6_txt(SolrType.text_general, true, true, true, "h6 header"),
|
||||
htags_i(SolrType.integer, true, true, "binary pattern for the existance of h1..h6 headlines"),
|
||||
canonical_s(SolrType.string, true, true, "url inside the canonical link element"),
|
||||
metagenerator_t(SolrType.text_general, true, true, "content of <meta name=\"generator\" content=#content#> tag"),
|
||||
boldcount_i(SolrType.integer, true, true, "total number of occurrences of <b> or <strong>"),
|
||||
bold_txt(SolrType.text_general, true, true, true, "all texts inside of <b> or <strong> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
|
||||
bold_val(SolrType.integer, true, true, true, "number of occurrences of texts in bold_txt"),
|
||||
italiccount_i(SolrType.integer, true, true, "total number of occurrences of <i>"),
|
||||
italic_txt(SolrType.text_general, true, true, true, "all texts inside of <i> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
|
||||
italic_val(SolrType.integer, true, true, true, "number of occurrences of texts in italic_txt"),
|
||||
licount_i(SolrType.integer, true, true, "number of <li> tags"),
|
||||
li_txt(SolrType.text_general, true, true, true, "all texts in <li> tags"),
|
||||
imagescount_i(SolrType.integer, true, true, "number of images"),
|
||||
images_tag_txt(SolrType.text_general, true, true, true, " all image tags, encoded as <img> tag inclusive alt- and title property"),
|
||||
images_protocol_txt(SolrType.text_general, true, true, true, "all image link protocols"),
|
||||
images_urlstub_txt(SolrType.text_general, true, true, true, "all image links without the protocol and '://'"),
|
||||
images_alt_txt(SolrType.text_general, true, true, true, "all image link alt tag"),
|
||||
csscount_i(SolrType.integer, true, true, "number of entries in css_tag_txt and css_url_txt"),
|
||||
css_tag_txt(SolrType.text_general, true, true, true, "full css tag with normalized url"),
|
||||
css_url_txt(SolrType.text_general, true, true, true, "normalized urls within a css tag"),
|
||||
scripts_txt(SolrType.text_general, true, true, true, "normaluzed urls within a scripts tag"),
|
||||
scriptscount_i(SolrType.integer, true, true, "number of entries in scripts_txt"),
|
||||
frames_txt(SolrType.text_general, true, true, true, "list of all links to frames"),
|
||||
framesscount_i(SolrType.integer, true, true, "number of frames_txt"),
|
||||
iframes_txt(SolrType.text_general, true, true, true, "list of all links to iframes"),
|
||||
iframesscount_i(SolrType.integer, true, true, "number of iframes_txt"),
|
||||
flash_b(SolrType.bool, true, true, "flag that shows if a swf file is linked"),
|
||||
responsetime_i(SolrType.integer, true, true, "response time of target server in milliseconds"),
|
||||
ext_cms_txt(SolrType.text_general, true, true, true, "names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias"),
|
||||
ext_cms_val(SolrType.integer, true, true, true, "number of attributes that count for a specific cms in ext_cms_txt"),
|
||||
ext_ads_txt(SolrType.text_general, true, true, true, "names of ad-servers/ad-services"),
|
||||
ext_ads_val(SolrType.integer, true, true, true, "number of attributes counts in ext_ads_txt"),
|
||||
ext_community_txt(SolrType.text_general, true, true, true, "names of recognized community functions"),
|
||||
ext_community_val(SolrType.integer, true, true, true, "number of attribute counts in attr_community"),
|
||||
ext_maps_txt(SolrType.text_general, true, true, true, "names of map services"),
|
||||
ext_maps_val(SolrType.integer, true, true, true, "number of attribute counts in ext_maps_txt"),
|
||||
ext_tracker_txt(SolrType.text_general, true, true, true, "names of tracker server"),
|
||||
ext_tracker_val(SolrType.integer, true, true, true, "number of attribute counts in ext_tracker_txt"),
|
||||
ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"),
|
||||
ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions"),
|
||||
failreason_t(SolrType.text_general, true, true, "fail reason if a page was not loaded. if the page was loaded then this field is empty");
|
||||
|
||||
final SolrType type;
|
||||
final boolean indexed, stored;
|
||||
boolean multiValued, omitNorms;
|
||||
final String comment;
|
||||
|
||||
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final String comment) {
|
||||
this.type = type;
|
||||
this.indexed = indexed;
|
||||
this.stored = stored;
|
||||
this.multiValued = false;
|
||||
this.omitNorms = false;
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final String comment) {
|
||||
this(type, indexed, stored, comment);
|
||||
this.multiValued = multiValued;
|
||||
}
|
||||
|
||||
private SolrField(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final String comment) {
|
||||
this(type, indexed, stored, multiValued, comment);
|
||||
this.omitNorms = omitNorms;
|
||||
}
|
||||
|
||||
public final SolrType getType() {
|
||||
return this.type;
|
||||
}
|
||||
|
||||
public final boolean isIndexed() {
|
||||
return this.indexed;
|
||||
}
|
||||
|
||||
public final boolean isStored() {
|
||||
return this.stored;
|
||||
}
|
||||
|
||||
public final boolean isMultiValued() {
|
||||
return this.multiValued;
|
||||
}
|
||||
|
||||
public final boolean isOmitNorms() {
|
||||
return this.omitNorms;
|
||||
}
|
||||
|
||||
public final String getComment() {
|
||||
return this.comment;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in new issue