added a feature to find similarities in documents.

This uses an enhanced version of the Nutch/Solr TextProfileSignatue.
As a result, a signature of the document is written to the solr search
index. Additionally for each time when a signature is written, it is
checked if the singature exists already in the index. If the signature
does not exist, the document is marked as unique. The unique attribute
can now be used to sort document lists and bring duplicates to the end
of a result list.
To enable this, a large portion of the search api to Solr had to be
changed. This affected mainly caching of 'exists' searches to enhance
the check for existing signatures and do this without actually doing a
solr query.
Because here the first time a long number is used as value in the Solr
store, also the value naming in the YaCySchema had to be adopted and
normalized. This caused that many files had to be changed.
pull/1/head
Michael Peter Christen 12 years ago
parent f5ca5cea44
commit d6b82840f8

2
debian/changelog vendored

@ -1,4 +1,4 @@
yacy (*auto-git-version*) unstable; urgency=low
yacy (1.2.9018) unstable; urgency=low
* SVN Update

@ -32,6 +32,21 @@ host_id_s
## the md5 of the raw source (mandatory field)
md5_s
## the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of text_t
exact_signature_l
## flag shows if exact_signature_l is unique at the time of document creation, used for double-check during search
exact_signature_unique_b
## 64 bit of the Lookup3Signature from EnhancedTextProfileSignature of text_t
fuzzy_signature_l
## intermediate data produced in EnhancedTextProfileSignature: a list of word frequencies
#fuzzy_signature_text_t
## flag shows if fuzzy_signature_l is unique at the time of document creation, used for double-check during search
fuzzy_signature_unique_b
## the size of the raw source (mandatory field)
size_i

@ -30,6 +30,7 @@ import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader;
@ -204,15 +205,16 @@ public class PerformanceMemory_p {
// other caching structures
final MirrorSolrConnector solr = (MirrorSolrConnector) Switchboard.getSwitchboard().index.fulltext().getSolr();
final MirrorSolrConnector.HitMissCache hitMissCache = solr.getCache(YaCySchema.id.getSolrFieldName());
prop.putNum("solrcacheHit.size", solr.nameCacheHitSize());
prop.putNum("solrcacheHit.Hit", solr.hitCache_Hit);
prop.putNum("solrcacheHit.Miss", solr.hitCache_Miss);
prop.putNum("solrcacheHit.Insert", solr.hitCache_Insert);
prop.putNum("solrcacheHit.Hit", hitMissCache.hitCache_Hit);
prop.putNum("solrcacheHit.Miss", hitMissCache.hitCache_Miss);
prop.putNum("solrcacheHit.Insert", hitMissCache.hitCache_Insert);
prop.putNum("solrcacheMiss.size", solr.nameCacheMissSize());
prop.putNum("solrcacheMiss.Hit", solr.missCache_Hit);
prop.putNum("solrcacheMiss.Miss", solr.missCache_Miss);
prop.putNum("solrcacheMiss.Insert", solr.missCache_Insert);
prop.putNum("solrcacheMiss.Hit", hitMissCache.missCache_Hit);
prop.putNum("solrcacheMiss.Miss", hitMissCache.missCache_Miss);
prop.putNum("solrcacheMiss.Insert", hitMissCache.missCache_Insert);
prop.putNum("solrcacheDocument.size", solr.nameCacheDocumentSize());
prop.putNum("solrcacheDocument.Hit", solr.documentCache_Hit);

@ -29,7 +29,7 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;

@ -29,8 +29,8 @@
// if the shell's current path is HTROOT
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;

@ -37,8 +37,8 @@ import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.Domains;

@ -41,8 +41,8 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy;

@ -29,8 +29,8 @@ import java.util.List;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;

@ -46,8 +46,9 @@ import java.util.regex.Pattern;
import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import jcifs.smb.SmbFileInputStream;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.Punycode.PunycodeException;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.ftp.FTPClient;

@ -18,7 +18,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.document;
package net.yacy.cora.document.analysis;
import java.io.BufferedInputStream;
import java.io.File;
@ -28,6 +28,8 @@ import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.document.MultiProtocolURI;
public class Classification {
private static final Set<String> textExtSet = new HashSet<String>();
@ -140,7 +142,7 @@ public class Classification {
return ctrlExtSet.contains(ctrlExt.trim().toLowerCase());
}
protected static ContentDomain getContentDomain(final String ext) {
public static ContentDomain getContentDomain(final String ext) {
if (isTextExtension(ext)) return ContentDomain.TEXT;
if (isImageExtension(ext)) return ContentDomain.IMAGE;
if (isAudioExtension(ext)) return ContentDomain.AUDIO;

@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.yacy.cora.document.analysis;
/*
* THIS CODE WAS COPIED FROM org.apache.solr.update.processor.TextProfileSignature
* - to get access to the 'newText' variable content which is otherwise lost in the process, used for debugging
* - to use the much faster Lookup3Signature instead of MD5Signature
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.update.processor.Lookup3Signature;
/**
* <p>This implementation is copied from Apache Nutch. </p>
* <p>An implementation of a page signature. It calculates an MD5 hash
* of a plain text "profile" of a page.</p>
* <p>The algorithm to calculate a page "profile" takes the plain text version of
* a page and performs the following steps:
* <ul>
* <li>remove all characters except letters and digits, and bring all characters
* to lower case,</li>
* <li>split the text into tokens (all consecutive non-whitespace characters),</li>
* <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
* <li>sort the list of tokens by decreasing frequency,</li>
* <li>round down the counts of tokens to the nearest multiple of QUANT
* (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
* by default, and <code>maxFreq</code> is the maximum token frequency). If
* <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
* means that tokens with frequency 1 are always discarded).</li>
* <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
* <li>create a list of tokens and their quantized frequency, separated by spaces,
* in the order of decreasing frequency.</li>
* </ul>
* This list is then submitted to an MD5 hash calculation.*/
public class EnhancedTextProfileSignature extends Lookup3Signature {
private float quantRate = 0.01f;
private float minTokenLen = 2;
private StringBuilder evalText = new StringBuilder(120); // start with some capacity, makes it much faster.
@Override
public void init(SolrParams params) {
quantRate = params.getFloat("quantRate", 0.01f);
minTokenLen = params.getInt("minTokenLen", 2);
}
@Override
public byte[] getSignature() {
return super.getSignature();
}
public StringBuilder getSignatureText() {
return evalText;
}
@Override
public void add(String content) {
HashMap<String, Token> tokens = new HashMap<String, Token>();
StringBuilder curToken = new StringBuilder();
int maxFreq = 0;
for (int i = 0; i < content.length(); i++) {
char c = content.charAt(i);
if (Character.isLetterOrDigit(c)) {
curToken.append(Character.toLowerCase(c));
} else {
if (curToken.length() > 0) {
if (curToken.length() > minTokenLen) {
// add it
String s = curToken.toString();
Token tok = tokens.get(s);
if (tok == null) {
tok = new Token(0, s);
tokens.put(s, tok);
}
tok.cnt++;
if (tok.cnt > maxFreq)
maxFreq = tok.cnt;
}
curToken.setLength(0);
}
}
}
// check the last token
if (curToken.length() > minTokenLen) {
// add it
String s = curToken.toString();
Token tok = tokens.get(s);
if (tok == null) {
tok = new Token(0, s);
tokens.put(s, tok);
}
tok.cnt++;
if (tok.cnt > maxFreq)
maxFreq = tok.cnt;
}
Iterator<Token> it = tokens.values().iterator();
ArrayList<Token> profile = new ArrayList<Token>();
// calculate the QUANT value
int quant = Math.round(maxFreq * quantRate);
if (quant < 2) {
if (maxFreq > 1)
quant = 2;
else
quant = 1;
}
while (it.hasNext()) {
Token t = it.next();
// round down to the nearest QUANT
t.qcnt = (t.cnt / quant) * quant;
// discard the frequencies below the QUANT
if (t.qcnt < quant) {
continue;
}
profile.add(t);
}
Collections.sort(profile, new TokenComparator());
StringBuilder newText = new StringBuilder(120);
it = profile.iterator();
while (it.hasNext()) {
Token t = it.next();
if (newText.length() > 0) {newText.append(' ');evalText.append(' ');}
newText.append('(').append(t.val).append('-').append(t.qcnt).append(')');
evalText.append('(').append(t.val).append('-').append(t.cnt).append('-').append(t.qcnt).append(')');
}
super.add(newText.toString());
}
private static class Token {
public int cnt, qcnt;
public String val;
public Token(int cnt, String val) {
this.cnt = cnt;
this.val = val;
}
@Override
public String toString() {
return val + " " + cnt;
}
}
private static class TokenComparator implements Comparator<Token> {
public int compare(Token t1, Token t2) {
return t2.cnt - t1.cnt;
}
}
}

@ -27,11 +27,11 @@ public enum SolrType {
text_en_splitting_tight(null, null), // can insert dashes in the wrong place and still match
location("p", null), // lat,lon - format: specialized field for geospatial search. If indexed, this fieldType must not be multivalued.
date("dt", null), // date format as in http://www.w3.org/TR/xmlschema-2/#dateTime with trailing 'Z'
integer("i", "val", "int"),
bool("b", null, "boolean"),
tlong(null, null, "long"), // not used in schema yet
tfloat(null, null, "float"), // not used in schema yet
tdouble(null, null, "double"); // not used in schema yet
bool("b", "bs", "boolean"),
num_integer("i", "val", "int"),
num_long("l", "ls", "long"),
num_float("f", "fs", "float"),
num_double("d", "ds", "double");
private String printName, singlevalExt, multivalExt;
private SolrType(final String singlevalExt, final String multivalExt) {

@ -36,11 +36,16 @@ public enum YaCySchema implements Schema {
title(SolrType.text_general, true, true, true, "content of title tag"),
host_id_s(SolrType.string, true, true, false, "id of the host, a 6-byte hash that is part of the document id"),// String hosthash();
md5_s(SolrType.string, true, true, false, "the md5 of the raw source"),// String md5();
size_i(SolrType.integer, true, true, false, "the size of the raw source"),// int size();
exact_signature_l(SolrType.num_long, true, true, false, "the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of text_t"),
exact_signature_unique_b(SolrType.bool, true, true, false, "flag shows if exact_signature_l is unique at the time of document creation, used for double-check during search"),
fuzzy_signature_l(SolrType.num_long, true, true, false, "64 bit of the Lookup3Signature from EnhancedTextProfileSignature of text_t"),
fuzzy_signature_text_t(SolrType.text_general, true, true, false, "intermediate data produced in EnhancedTextProfileSignature: a list of word frequencies"),
fuzzy_signature_unique_b(SolrType.bool, true, true, false, "flag shows if fuzzy_signature_l is unique at the time of document creation, used for double-check during search"),
size_i(SolrType.num_integer, true, true, false, "the size of the raw source"),// int size();
process_s(SolrType.string, true, true, false, "index creation comment"),
failreason_t(SolrType.text_general, true, true, false, "fail reason if a page was not loaded. if the page was loaded then this field is empty"),
httpstatus_i(SolrType.integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_i(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
// optional but recommended, part of index distribution
load_date_dt(SolrType.date, true, true, false, "time when resource was loaded"),
@ -48,9 +53,9 @@ public enum YaCySchema implements Schema {
referrer_id_txt(SolrType.string, true, true, true, "ids of referrer to this document"),// byte[] referrerHash();
publisher_t(SolrType.text_general, true, true, false, "the name of the publisher of the document"),// String dc_publisher();
language_s(SolrType.string, true, true, false, "the language used in the document"),// byte[] language();
audiolinkscount_i(SolrType.integer, true, true, false, "number of links to audio resources"),// int laudio();
videolinkscount_i(SolrType.integer, true, true, false, "number of links to video resources"),// int lvideo();
applinkscount_i(SolrType.integer, true, true, false, "number of links to application resources"),// int lapp();
audiolinkscount_i(SolrType.num_integer, true, true, false, "number of links to audio resources"),// int laudio();
videolinkscount_i(SolrType.num_integer, true, true, false, "number of links to video resources"),// int lvideo();
applinkscount_i(SolrType.num_integer, true, true, false, "number of links to application resources"),// int lapp();
// optional but recommended
coordinate_p(SolrType.location, true, true, false, "point in degrees of latitude,longitude as declared in WSG84"),
@ -59,13 +64,13 @@ public enum YaCySchema implements Schema {
description(SolrType.text_general, true, true, false, "content of description-tag"),
keywords(SolrType.text_general, true, true, false, "content of keywords tag; words are separated by space"),
charset_s(SolrType.string, true, true, false, "character encoding"),
wordcount_i(SolrType.integer, true, true, false, "number of words in visible area"),
inboundlinkscount_i(SolrType.integer, true, true, false, "total number of inbound links"),
inboundlinksnofollowcount_i(SolrType.integer, true, true, false, "number of inbound links with nofollow tag"),
outboundlinkscount_i(SolrType.integer, true, true, false, "external number of inbound links"),
outboundlinksnofollowcount_i(SolrType.integer, true, true, false, "number of external links with nofollow tag"),
imagescount_i(SolrType.integer, true, true, false, "number of images"),
responsetime_i(SolrType.integer, true, true, false, "response time of target server in milliseconds"),
wordcount_i(SolrType.num_integer, true, true, false, "number of words in visible area"),
inboundlinkscount_i(SolrType.num_integer, true, true, false, "total number of inbound links"),
inboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, "number of inbound links with nofollow tag"),
outboundlinkscount_i(SolrType.num_integer, true, true, false, "external number of inbound links"),
outboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, "number of external links with nofollow tag"),
imagescount_i(SolrType.num_integer, true, true, false, "number of images"),
responsetime_i(SolrType.num_integer, true, true, false, "response time of target server in milliseconds"),
text_t(SolrType.text_general, true, true, false, "all visible text"),
synonyms_sxt(SolrType.string, true, true, true, "additional synonyms to the words in the text"),
h1_txt(SolrType.text_general, true, true, true, "h1 header"),
@ -77,11 +82,11 @@ public enum YaCySchema implements Schema {
// optional values, not part of standard YaCy handling (but useful for external applications)
collection_sxt(SolrType.string, true, true, true, "tags that are attached to crawls/index generation to separate the search result into user-defined subsets"),
csscount_i(SolrType.integer, true, true, false, "number of entries in css_tag_txt and css_url_txt"),
csscount_i(SolrType.num_integer, true, true, false, "number of entries in css_tag_txt and css_url_txt"),
css_tag_txt(SolrType.text_general, true, true, true, "full css tag with normalized url"),
css_url_txt(SolrType.text_general, true, true, true, "normalized urls within a css tag"),
scripts_txt(SolrType.text_general, true, true, true, "normalized urls within a scripts tag"),
scriptscount_i(SolrType.integer, true, true, false, "number of entries in scripts_txt"),
scriptscount_i(SolrType.num_integer, true, true, false, "number of entries in scripts_txt"),
// encoded as binary value into an integer:
// bit 0: "all" contained in html header meta
// bit 1: "index" contained in html header meta
@ -92,57 +97,57 @@ public enum YaCySchema implements Schema {
// bit 10: "noindex" contained in http header properties
// bit 11: "nofollow" contained in http header properties
// bit 12: "unavailable_after" contained in http header properties
robots_i(SolrType.integer, true, true, false, "content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
robots_i(SolrType.num_integer, true, true, false, "content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
metagenerator_t(SolrType.text_general, true, true, false, "content of <meta name=\"generator\" content=#content#> tag"),
inboundlinks_tag_txt(SolrType.text_general, true, true, true, "internal links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
inboundlinks_protocol_sxt(SolrType.string, true, true, true, "internal links, only the protocol"),
inboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "internal links, the url only without the protocol"),
inboundlinks_name_txt(SolrType.text_general, true, true, true, "internal links, the name property of the a-tag"),
inboundlinks_rel_sxt(SolrType.string, true, true, true, "internal links, the rel property of the a-tag"),
inboundlinks_relflags_val(SolrType.integer, true, true, true, "internal links, the rel property of the a-tag, coded binary"),
inboundlinks_relflags_val(SolrType.num_integer, true, true, true, "internal links, the rel property of the a-tag, coded binary"),
inboundlinks_text_txt(SolrType.text_general, true, true, true, "internal links, the text content of the a-tag"),
inboundlinks_text_chars_val(SolrType.integer, true, true, true, "internal links, the length of the a-tag as number of characters"),
inboundlinks_text_words_val(SolrType.integer, true, true, true, "internal links, the length of the a-tag as number of words"),
inboundlinks_text_chars_val(SolrType.num_integer, true, true, true, "internal links, the length of the a-tag as number of characters"),
inboundlinks_text_words_val(SolrType.num_integer, true, true, true, "internal links, the length of the a-tag as number of words"),
inboundlinks_alttag_txt(SolrType.text_general, true, true, true, "if the link is an image link, this contains the alt tag if the image is also liked as img link"),
outboundlinks_tag_txt(SolrType.text_general, true, true, true, "external links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
outboundlinks_protocol_sxt(SolrType.string, true, true, true, "external links, only the protocol"),
outboundlinks_urlstub_txt(SolrType.text_general, true, true, true, "external links, the url only without the protocol"),
outboundlinks_name_txt(SolrType.text_general, true, true, true, "external links, the name property of the a-tag"),
outboundlinks_rel_sxt(SolrType.string, true, true, true, "external links, the rel property of the a-tag"),
outboundlinks_relflags_val(SolrType.integer, true, true, true, "external links, the rel property of the a-tag, coded binary"),
outboundlinks_relflags_val(SolrType.num_integer, true, true, true, "external links, the rel property of the a-tag, coded binary"),
outboundlinks_text_txt(SolrType.text_general, true, true, true, "external links, the text content of the a-tag"),
outboundlinks_text_chars_val(SolrType.integer, true, true, true, "external links, the length of the a-tag as number of characters"),
outboundlinks_text_words_val(SolrType.integer, true, true, true, "external links, the length of the a-tag as number of words"),
outboundlinks_text_chars_val(SolrType.num_integer, true, true, true, "external links, the length of the a-tag as number of characters"),
outboundlinks_text_words_val(SolrType.num_integer, true, true, true, "external links, the length of the a-tag as number of words"),
outboundlinks_alttag_txt(SolrType.text_general, true, true, true, "if the link is an image link, this contains the alt tag if the image is also liked as img link"),
images_tag_txt(SolrType.text_general, true, true, true, " all image tags, encoded as <img> tag inclusive alt- and title property"),
images_urlstub_txt(SolrType.text_general, true, true, true, "all image links without the protocol and '://'"),
images_protocol_sxt(SolrType.text_general, true, true, true, "all image link protocols"),
images_alt_txt(SolrType.text_general, true, true, true, "all image link alt tag"),
images_withalt_i(SolrType.integer, true, true, false, "number of image links with alt tag"),
htags_i(SolrType.integer, true, true, false, "binary pattern for the existance of h1..h6 headlines"),
images_withalt_i(SolrType.num_integer, true, true, false, "number of image links with alt tag"),
htags_i(SolrType.num_integer, true, true, false, "binary pattern for the existance of h1..h6 headlines"),
canonical_t(SolrType.text_general, true, true, false, "url inside the canonical link element"),
refresh_s(SolrType.string, true, true, false, "link from the url property inside the refresh link element"),
li_txt(SolrType.text_general, true, true, true, "all texts in <li> tags"),
licount_i(SolrType.integer, true, true, false, "number of <li> tags"),
licount_i(SolrType.num_integer, true, true, false, "number of <li> tags"),
bold_txt(SolrType.text_general, true, true, true, "all texts inside of <b> or <strong> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
boldcount_i(SolrType.integer, true, true, false, "total number of occurrences of <b> or <strong>"),
boldcount_i(SolrType.num_integer, true, true, false, "total number of occurrences of <b> or <strong>"),
italic_txt(SolrType.text_general, true, true, true, "all texts inside of <i> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
italiccount_i(SolrType.integer, true, true, false, "total number of occurrences of <i>"),
italiccount_i(SolrType.num_integer, true, true, false, "total number of occurrences of <i>"),
underline_txt(SolrType.text_general, true, true, true, "all texts inside of <u> tags. no doubles. listed in the order of number of occurrences in decreasing order"),
underlinecount_i(SolrType.integer, true, true, false, "total number of occurrences of <u>"),
underlinecount_i(SolrType.num_integer, true, true, false, "total number of occurrences of <u>"),
flash_b(SolrType.bool, true, true, false, "flag that shows if a swf file is linked"),
frames_txt(SolrType.text_general, true, true, true, "list of all links to frames"),
framesscount_i(SolrType.integer, true, true, false, "number of frames_txt"),
framesscount_i(SolrType.num_integer, true, true, false, "number of frames_txt"),
iframes_txt(SolrType.text_general, true, true, true, "list of all links to iframes"),
iframesscount_i(SolrType.integer, true, true, false, "number of iframes_txt"),
iframesscount_i(SolrType.num_integer, true, true, false, "number of iframes_txt"),
url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"),
url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"),
url_file_ext_s(SolrType.string, true, true, false, "the file name extension"),
url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"),
url_parameter_i(SolrType.num_integer, true, true, false, "number of key-value pairs in search part of the url"),
url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"),
url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"),
url_chars_i(SolrType.integer, true, true, false, "number of all characters in the url == length of sku field"),
url_chars_i(SolrType.num_integer, true, true, false, "number of all characters in the url == length of sku field"),
host_s(SolrType.string, true, true, false, "host of the url"),
host_dnc_s(SolrType.string, true, true, false, "the Domain Class Name, either the TLD or a combination of ccSLD+TLD if a ccSLD is used."),
@ -150,43 +155,43 @@ public enum YaCySchema implements Schema {
host_organizationdnc_s(SolrType.string, true, true, false, "the organization and dnc concatenated with '.'"),
host_subdomain_s(SolrType.string, true, true, false, "the remaining part of the host without organizationdnc"),
title_count_i(SolrType.integer, true, true, false, "number of titles (counting the 'title' field) in the document"),
title_chars_val(SolrType.integer, true, true, true, "number of characters for each title"),
title_words_val(SolrType.integer, true, true, true, "number of words in each title"),
title_count_i(SolrType.num_integer, true, true, false, "number of titles (counting the 'title' field) in the document"),
title_chars_val(SolrType.num_integer, true, true, true, "number of characters for each title"),
title_words_val(SolrType.num_integer, true, true, true, "number of words in each title"),
description_count_i(SolrType.integer, true, true, false, "number of descriptions in the document. Its not counting the 'description' field since there is only one. But it counts the number of descriptions that appear in the document (if any)"),
description_chars_val(SolrType.integer, true, true, true, "number of characters for each description"),
description_words_val(SolrType.integer, true, true, true, "number of words in each description"),
description_count_i(SolrType.num_integer, true, true, false, "number of descriptions in the document. Its not counting the 'description' field since there is only one. But it counts the number of descriptions that appear in the document (if any)"),
description_chars_val(SolrType.num_integer, true, true, true, "number of characters for each description"),
description_words_val(SolrType.num_integer, true, true, true, "number of words in each description"),
h1_i(SolrType.integer, true, true, false, "number of h1 header lines"),
h2_i(SolrType.integer, true, true, false, "number of h2 header lines"),
h3_i(SolrType.integer, true, true, false, "number of h3 header lines"),
h4_i(SolrType.integer, true, true, false, "number of h4 header lines"),
h5_i(SolrType.integer, true, true, false, "number of h5 header lines"),
h6_i(SolrType.integer, true, true, false, "number of h6 header lines"),
h1_i(SolrType.num_integer, true, true, false, "number of h1 header lines"),
h2_i(SolrType.num_integer, true, true, false, "number of h2 header lines"),
h3_i(SolrType.num_integer, true, true, false, "number of h3 header lines"),
h4_i(SolrType.num_integer, true, true, false, "number of h4 header lines"),
h5_i(SolrType.num_integer, true, true, false, "number of h5 header lines"),
h6_i(SolrType.num_integer, true, true, false, "number of h6 header lines"),
schema_org_breadcrumb_i(SolrType.integer, true, true, false, "number of itemprop=\"breadcrumb\" appearances in div tags"),
schema_org_breadcrumb_i(SolrType.num_integer, true, true, false, "number of itemprop=\"breadcrumb\" appearances in div tags"),
opengraph_title_t(SolrType.text_general, true, true, false, "Open Graph Metadata from og:title metadata field, see http://ogp.me/ns#"),
opengraph_type_s(SolrType.text_general, true, true, false, "Open Graph Metadata from og:type metadata field, see http://ogp.me/ns#"),
opengraph_url_s(SolrType.text_general, true, true, false, "Open Graph Metadata from og:url metadata field, see http://ogp.me/ns#"),
opengraph_image_s(SolrType.text_general, true, true, false, "Open Graph Metadata from og:image metadata field, see http://ogp.me/ns#"),
// special values; can only be used if '_val' type is defined in schema file; this is not standard
bold_val(SolrType.integer, true, true, true, "number of occurrences of texts in bold_txt"),
italic_val(SolrType.integer, true, true, true, "number of occurrences of texts in italic_txt"),
underline_val(SolrType.integer, true, true, true, "number of occurrences of texts in underline_txt"),
bold_val(SolrType.num_integer, true, true, true, "number of occurrences of texts in bold_txt"),
italic_val(SolrType.num_integer, true, true, true, "number of occurrences of texts in italic_txt"),
underline_val(SolrType.num_integer, true, true, true, "number of occurrences of texts in underline_txt"),
ext_cms_txt(SolrType.text_general, true, true, true, "names of cms attributes; if several are recognized then they are listen in decreasing order of number of matching criterias"),
ext_cms_val(SolrType.integer, true, true, true, "number of attributes that count for a specific cms in ext_cms_txt"),
ext_cms_val(SolrType.num_integer, true, true, true, "number of attributes that count for a specific cms in ext_cms_txt"),
ext_ads_txt(SolrType.text_general, true, true, true, "names of ad-servers/ad-services"),
ext_ads_val(SolrType.integer, true, true, true, "number of attributes counts in ext_ads_txt"),
ext_ads_val(SolrType.num_integer, true, true, true, "number of attributes counts in ext_ads_txt"),
ext_community_txt(SolrType.text_general, true, true, true, "names of recognized community functions"),
ext_community_val(SolrType.integer, true, true, true, "number of attribute counts in attr_community"),
ext_community_val(SolrType.num_integer, true, true, true, "number of attribute counts in attr_community"),
ext_maps_txt(SolrType.text_general, true, true, true, "names of map services"),
ext_maps_val(SolrType.integer, true, true, true, "number of attribute counts in ext_maps_txt"),
ext_maps_val(SolrType.num_integer, true, true, true, "number of attribute counts in ext_maps_txt"),
ext_tracker_txt(SolrType.text_general, true, true, true, "names of tracker server"),
ext_tracker_val(SolrType.integer, true, true, true, "number of attribute counts in ext_tracker_txt"),
ext_tracker_val(SolrType.num_integer, true, true, true, "number of attribute counts in ext_tracker_txt"),
ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"),
ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions");
ext_title_val(SolrType.num_integer, true, true, true, "number of matching title expressions");
private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() )
private final SolrType type;
@ -269,16 +274,19 @@ public enum YaCySchema implements Schema {
public final void add(final SolrInputDocument doc, final Date value) {
assert !this.isMultiValued();
assert this.type == SolrType.date;
doc.setField(this.getSolrFieldName(), value);
}
public final void add(final SolrInputDocument doc, final int value) {
assert !this.isMultiValued();
assert this.type == SolrType.num_integer;
doc.setField(this.getSolrFieldName(), value);
}
public final void add(final SolrInputDocument doc, final long value) {
assert !this.isMultiValued();
assert this.type == SolrType.num_long;
doc.setField(this.getSolrFieldName(), value);
}
@ -295,7 +303,7 @@ public enum YaCySchema implements Schema {
public final void add(final SolrInputDocument doc, final List<?> value) {
assert this.isMultiValued();
if (value == null || value.size() == 0) {
if (this.type == SolrType.integer) {
if (this.type == SolrType.num_integer) {
doc.setField(this.getSolrFieldName(), new Integer[0]);
} else if (this.type == SolrType.string) {
doc.setField(this.getSolrFieldName(), new String[0]);
@ -305,7 +313,7 @@ public enum YaCySchema implements Schema {
}
return;
}
if (this.type == SolrType.integer) {
if (this.type == SolrType.num_integer) {
assert (value.iterator().next() instanceof Integer);
doc.setField(this.getSolrFieldName(), value.toArray(new Integer[value.size()]));
} else if (this.type == SolrType.string || this.type == SolrType.text_general) {

@ -51,7 +51,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
public final static SolrQuery catchSuccessQuery = new SolrQuery();
static {
catchSuccessQuery.setQuery("-" + YaCySchema.failreason_t.name() + ":[* TO *]");
catchSuccessQuery.setQuery("-" + YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]");
catchSuccessQuery.setFields(YaCySchema.id.getSolrFieldName());
catchSuccessQuery.setRows(1);
catchSuccessQuery.setStart(0);
@ -59,10 +59,15 @@ public abstract class AbstractSolrConnector implements SolrConnector {
private final static int pagesize = 100;
@Override
public boolean exists(final String id) throws IOException {
public boolean exists(final String fieldName, final String key) throws IOException {
if (fieldName == null) return false;
try {
final SolrDocument doc = get(id, YaCySchema.id.getSolrFieldName());
if (fieldName.equals(YaCySchema.id.getSolrFieldName())) {
final SolrDocument doc = getById(key, fieldName);
return doc != null;
}
long count = getQueryCount(fieldName + ":\"" + key + "\"");
return count > 0;
} catch (final Throwable e) {
log.warn(e);
return false;

@ -57,20 +57,47 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
private SolrConnector solr0;
private SolrConnector solr1;
private final ARC<String, Object> hitCache, missCache;
private int hitCacheMax, missCacheMax, partitions;
private final Map<String, HitMissCache> hitMissCache;
private final ARC<String, SolrDocument> documentCache;
public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write
public static class HitMissCache {
public final ARC<String, Object> hitCache, missCache;
public long hitCache_Hit = 0, hitCache_Miss = 0, hitCache_Insert = 0; // for statistics only; do not write
public long missCache_Hit = 0, missCache_Miss = 0, missCache_Insert = 0; // for statistics only; do not write
public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write
public HitMissCache(int hitCacheMax, int missCacheMax, int partitions) {
this.hitCache = new ConcurrentARC<String, Object>(hitCacheMax, partitions);
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
}
public void clearCache() {
this.hitCache.clear();
this.missCache.clear();
}
}
public MirrorSolrConnector(int hitCacheMax, int missCacheMax, int docCacheMax) {
this.solr0 = null;
this.solr1 = null;
int partitions = Runtime.getRuntime().availableProcessors() * 2;
this.hitCache = new ConcurrentARC<String, Object>(hitCacheMax, partitions);
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, partitions);
this.hitCacheMax = hitCacheMax;
this.missCacheMax = missCacheMax;
this.partitions = Runtime.getRuntime().availableProcessors() * 2;
this.hitMissCache = new HashMap<String, HitMissCache>();
this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, this.partitions);
}
public HitMissCache getCache(String field) {
HitMissCache c = this.hitMissCache.get(field);
if (c == null) {
c = new HitMissCache(this.hitCacheMax, this.missCacheMax, this.partitions);
this.hitMissCache.put(field, c);
}
return c;
}
public boolean isConnected0() {
@ -110,8 +137,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
}
public void clearCache() {
this.hitCache.clear();
this.missCache.clear();
for (HitMissCache c: hitMissCache.values()) c.clearCache();
this.documentCache.clear();
}
@ -163,9 +189,10 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public void delete(final String id) throws IOException {
this.documentCache.remove(id);
this.hitCache.remove(id);
this.missCache.put(id, EXIST);
this.missCache_Insert++;
HitMissCache c = getCache("id");
c.hitCache.remove(id);
c.missCache.put(id, EXIST);
c.missCache_Insert++;
if (this.solr0 != null) this.solr0.delete(id);
if (this.solr1 != null) this.solr1.delete(id);
}
@ -179,9 +206,10 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
public void delete(final List<String> ids) throws IOException {
for (String id: ids) {
this.documentCache.remove(id);
this.hitCache.remove(id);
this.missCache.put(id, EXIST);
this.missCache_Insert++;
HitMissCache c = getCache("id");
c.hitCache.remove(id);
c.missCache.put(id, EXIST);
c.missCache_Insert++;
}
if (this.solr0 != null) this.solr0.delete(ids);
if (this.solr1 != null) this.solr1.delete(ids);
@ -196,74 +224,63 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return count;
}
/**
* check if a given id exists in solr
* @param id
* @return true if any entry in solr exists
* @throws IOException
*/
@Override
public boolean exists(final String id) throws IOException {
if (this.hitCache.containsKey(id)) {
this.hitCache_Hit++;
public boolean exists(final String fieldName, final String key) throws IOException {
HitMissCache c = getCache(fieldName);
if (c.hitCache.containsKey(key)) {
c.hitCache_Hit++;
return true;
}
this.hitCache_Miss++;
if (this.documentCache.containsKey(id)) {
c.hitCache_Miss++;
if (this.documentCache.containsKey(key)) {
this.documentCache_Hit++;
return true;
}
this.documentCache_Miss++;
if (this.missCache.containsKey(id)) {
this.missCache_Hit++;
if (c.missCache.containsKey(key)) {
c.missCache_Hit++;
return false;
}
this.missCache_Miss++;
if ((solr0 != null && solr0.exists(id)) || (solr1 != null && solr1.exists(id))) {
this.missCache.remove(id);
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
c.missCache_Miss++;
if ((solr0 != null && solr0.exists(fieldName, key)) || (solr1 != null && solr1.exists(fieldName, key))) {
c.missCache.remove(key);
c.hitCache.put(key, EXIST);
c.hitCache_Insert++;
return true;
}
this.missCache.put(id, EXIST);
this.missCache_Insert++;
c.missCache.put(key, EXIST);
c.missCache_Insert++;
return false;
}
@Override
public SolrDocument get(final String id, final String ... fields) throws IOException {
SolrDocument doc = fields.length == 0 ? this.documentCache.get(id) : null;
public SolrDocument getById(final String key, final String ... fields) throws IOException {
SolrDocument doc = fields.length == 0 ? this.documentCache.get(key) : null;
if (doc != null) {
this.documentCache_Hit++;
return doc;
}
documentCache_Miss++;
if (this.missCache.containsKey(id)) {
this.missCache_Hit++;
HitMissCache c = this.getCache(YaCySchema.id.getSolrFieldName());
if (c.missCache.containsKey(key)) {
c.missCache_Hit++;
return null;
}
missCache_Miss++;
if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id);
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
c.missCache_Miss++;
if ((solr0 != null && ((doc = solr0.getById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getById(key, fields)) != null))) {
addToCache(doc, fields.length == 0);
return doc;
}
// check if there is a autocommit problem
if (this.hitCache.containsKey(id)) {
if (c.hitCache.containsKey(key)) {
// the document should be there, therefore make a commit and check again
this.commit();
if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id);
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
return doc;
if ((solr0 != null && ((doc = solr0.getById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getById(key, fields)) != null))) {
addToCache(doc, fields.length == 0);
}
}
this.missCache.put(id, EXIST);
this.missCache_Insert++;
c.missCache.put(key, EXIST);
c.missCache_Insert++;
return null;
}
@ -277,11 +294,10 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName());
assert id != null;
if (id == null) return;
this.missCache.remove(id);
this.documentCache.put(id, ClientUtils.toSolrDocument(solrdoc));
SolrDocument doc = ClientUtils.toSolrDocument(solrdoc);
addToCache(doc, true);
this.documentCache.put(id, doc);
this.documentCache_Insert++;
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
if (this.solr0 != null) this.solr0.add(solrdoc);
if (this.solr1 != null) this.solr1.add(solrdoc);
}
@ -302,19 +318,19 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
SolrDocument doc = getById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc);
// no addToCache(list) here because that was already handlet in get();
return list;
}
if (this.solr0 != null && this.solr1 == null) {
SolrDocumentList list = this.solr0.query(querystring, offset, count, fields);
if (fields.length == 0) addToCache(list);
addToCache(list, fields.length == 0);
return list;
}
if (this.solr1 != null && this.solr0 == null) {
SolrDocumentList list = this.solr1.query(querystring, offset, count, fields);
if (fields.length == 0) addToCache(list);
addToCache(list, fields.length == 0);
return list;
}
@ -338,7 +354,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
for (final SolrDocument d: l) list.add(d);
// add caching
if (fields.length == 0) addToCache(list);
addToCache(list, fields.length == 0);
return list;
}
@ -441,33 +457,51 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return facets0;
}
private void addToCache(SolrDocumentList list) {
private void addToCache(SolrDocumentList list, boolean doccache) {
if (MemoryControl.shortStatus()) clearCache();
for (final SolrDocument solrdoc: list) {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName());
if (id != null) {
this.hitCache.put(id, EXIST);
hitCache_Insert++;
this.documentCache.put(id, solrdoc);
documentCache_Insert++;
addToCache(solrdoc, doccache);
}
}
private void addToCache(SolrDocument doc, boolean doccach) {
for (Map.Entry<String, HitMissCache> e: this.hitMissCache.entrySet()) {
Object keyo = doc.getFieldValue(e.getKey());
String key = null;
if (keyo instanceof String) key = (String) keyo;
if (keyo instanceof Integer) key = ((Integer) keyo).toString();
if (keyo instanceof Long) key = ((Long) keyo).toString();
if (key != null) {
HitMissCache c = e.getValue();
c.missCache.remove(key);
c.hitCache.put(key, EXIST);
c.hitCache_Insert++;
}
}
if (doccach) {
this.documentCache.put((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName()), doc);
this.documentCache_Insert++;
}
}
@Override
public long getSize() {
long s = 0;
if (this.solr0 != null) s += this.solr0.getSize();
if (this.solr1 != null) s += this.solr1.getSize();
return Math.max(this.documentCache.size(), Math.max(this.hitCache.size(), s));
HitMissCache c = getCache("id");
return Math.max(this.documentCache.size(), Math.max(c.hitCache.size(), s));
}
public int nameCacheHitSize() {
return this.hitCache.size();
HitMissCache c = getCache("id");
return c.hitCache.size();
}
public int nameCacheMissSize() {
return this.missCache.size();
HitMissCache c = getCache("id");
return c.missCache.size();
}
public int nameCacheDocumentSize() {

@ -144,8 +144,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
}
@Override
public SolrDocument get(final String id, final String ... fields) throws IOException {
return this.solr.get(id, fields);
public SolrDocument getById(final String key, final String ... fields) throws IOException {
return this.solr.getById(key, fields);
}
@Override

@ -130,11 +130,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public boolean exists(final String id) throws IOException {
public boolean exists(final String fieldName, final String key) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.exists(id);
return this.solrConnector.exists(fieldName, key);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
@ -145,11 +145,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public SolrDocument get(final String id, final String ... fields) throws IOException {
public SolrDocument getById(final String key, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.get(id, fields);
return this.solrConnector.getById(key, fields);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}

@ -125,17 +125,17 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
* @throws IOException
*/
@Override
public boolean exists(final String id) throws IOException {
public boolean exists(final String fieldName, final String key) throws IOException {
for (final SolrConnector connector: this.connectors) {
if (connector.exists(id)) return true;
if (connector.exists(fieldName, key)) return true;
}
return false;
}
@Override
public SolrDocument get(String id, final String ... fields) throws IOException {
public SolrDocument getById(final String key, final String ... fields) throws IOException {
for (final SolrConnector connector: this.connectors) {
SolrDocument doc = connector.get(id, fields);
SolrDocument doc = connector.getById(key, fields);
if (doc != null) return doc;
}
return null;

@ -88,12 +88,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
public int deleteByQuery(final String querystring) throws IOException;
/**
* check if a given id exists in solr
* @param id
* check if a given key exists in solr at the field fieldName
* @param fieldName
* @param key
* @return true if any entry in solr exists
* @throws IOException
*/
public boolean exists(final String id) throws IOException;
public boolean exists(final String fieldName, final String key) throws IOException;
/**
* add a solr input document
@ -105,13 +106,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException;
/**
* get a document from solr by given id
* @param id
* get a document from solr by given key for the id-field
* @param key
* @param fields list of fields
* @return one result or null if no result exists
* @throws IOException
*/
public SolrDocument get(final String id, final String ... fields) throws IOException;
public SolrDocument getById(final String key, final String ... fields) throws IOException;
/**
* get a query result from solr

@ -316,20 +316,14 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
private final char[] queryIDTemplate = "id:\" \"".toCharArray();
/**
* get a document from solr by given id
* @param id
* @return one result or null if no result exists
* @throws IOException
*/
@Override
public SolrDocument get(final String id, final String ... fields) throws IOException {
assert id.length() == 12;
public SolrDocument getById(final String key, final String ... fields) throws IOException {
final SolrQuery query = new SolrQuery();
assert key.length() == 12;
// construct query
char[] q = new char[17];
System.arraycopy(this.queryIDTemplate, 0, q, 0, 17);
System.arraycopy(id.toCharArray(), 0, q, 4, 12);
final SolrQuery query = new SolrQuery();
System.arraycopy(key.toCharArray(), 0, q, 4, 12);
query.setQuery(new String(q));
query.setRows(1);
query.setStart(0);

@ -185,15 +185,15 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter {
writeTag(writer, "str", name, value, true);
} else if (typeName.equals(SolrType.bool.printName())) {
writeTag(writer, "bool", name, "F".equals(value) ? "false" : "true", true);
} else if (typeName.equals(SolrType.integer.printName())) {
} else if (typeName.equals(SolrType.num_integer.printName())) {
writeTag(writer, "int", name, value, true);
} else if (typeName.equals(SolrType.tlong.printName())) {
} else if (typeName.equals(SolrType.num_long.printName())) {
writeTag(writer, "long", name, value, true);
} else if (typeName.equals(SolrType.date.printName())) {
writeTag(writer, "date", name, DateField.formatExternal(new Date(Long.parseLong(value))), true);
} else if (typeName.equals(SolrType.tfloat.printName())) {
} else if (typeName.equals(SolrType.num_float.printName())) {
writeTag(writer, "float", name, value, true);
} else if (typeName.equals(SolrType.tdouble.printName())) {
} else if (typeName.equals(SolrType.num_double.printName())) {
writeTag(writer, "double", name, value, true);
}
}

@ -41,7 +41,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.Base64Order;

@ -31,8 +31,8 @@ import java.util.Date;
import java.util.List;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;

@ -30,9 +30,9 @@ import java.util.Date;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;

@ -39,9 +39,9 @@ import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import jcifs.smb.SmbFileInputStream;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;

@ -37,9 +37,13 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.update.processor.Lookup3Signature;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.analysis.EnhancedTextProfileSignature;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.language.synonyms.SynonymLibrary;
import net.yacy.cora.lod.vocabulary.Tagging;
@ -71,6 +75,8 @@ public final class Condenser {
private final Map<String, Word> words; // a string (the words) to (indexWord) - relation
private final Map<String, Set<Tagging.Metatag>> tags = new HashMap<String, Set<Tagging.Metatag>>(); // a set of tags, discovered from Autotagging
private final Set<String> synonyms; // a set of synonyms to the words
private long fuzzy_signature = 0, exact_signature = 0; // signatures for double-check detection
private String fuzzy_signature_text = null; // signatures for double-check detection
public int RESULT_NUMB_WORDS = -1;
public int RESULT_DIFF_WORDS = -1;
@ -84,7 +90,7 @@ public final class Condenser {
final boolean indexText,
final boolean indexMedia,
final WordCache meaningLib,
final SynonymLibrary synonyms,
final SynonymLibrary synlib,
final boolean doAutotagging
) {
Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging
@ -210,13 +216,47 @@ public final class Condenser {
document.addMetatags(this.tags);
}
if (synlib != null) {
for (String word: this.words.keySet()) {
Set<String> syms = synlib.getSynonyms(word);
if (syms != null) this.synonyms.addAll(syms);
}
}
String text = document.getTextString();
// create the synonyms set
if (synonyms != null) {
for (String word: this.words.keySet()) {
Set<String> syms = synonyms.getSynonyms(word);
Set<String> syms = synlib.getSynonyms(word);
if (syms != null) this.synonyms.addAll(syms);
}
}
// create hashes for duplicate detection
// check dups with http://localhost:8090/solr/select?q=*:*&start=0&rows=3&fl=sku,fuzzy_signature_text_t,fuzzy_signature_l,fuzzy_signature_unique_b
EnhancedTextProfileSignature fuzzySignatureFactory = new EnhancedTextProfileSignature();
Map<String,String> sp = new HashMap<String,String>();
sp.put("quantRate", "0.5"); // for minTokenLen = 2 the value should not be below 0.24; for minTokenLen = 3 the value must be not below 0.5!
sp.put("minTokenLen", "3");
fuzzySignatureFactory.init(new MapSolrParams(sp));
fuzzySignatureFactory.add(text);
byte[] fuzzy_signature_hash = fuzzySignatureFactory.getSignature();
long l = 0; for (int i = 0; i < 8; i++) l = (l << 8) + (fuzzy_signature_hash[i] & 0xff);
this.fuzzy_signature = l;
this.fuzzy_signature_text = fuzzySignatureFactory.getSignatureText().toString();
Lookup3Signature exactSignatureFactory = new Lookup3Signature();
exactSignatureFactory.add(text);
byte[] exact_signature_hash = exactSignatureFactory.getSignature();
l = 0; for (int i = 0; i < 8; i++) l = (l << 8) + (exact_signature_hash[i] & 0xff);
this.exact_signature = l;
}
public Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) {
this.languageIdentificator = null; // we don't need that here
// analysis = new Properties();
this.words = new TreeMap<String, Word>();
this.synonyms = new HashSet<String>();
createCondensement(text, meaningLib, doAutotagging);
}
private void insertTextToWords(
@ -250,14 +290,6 @@ public final class Condenser {
}
}
public Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) {
this.languageIdentificator = null; // we don't need that here
// analysis = new Properties();
this.words = new TreeMap<String, Word>();
this.synonyms = new HashSet<String>();
createCondensement(text, meaningLib, doAutotagging);
}
public int excludeWords(final SortedSet<String> stopwords) {
// subtracts the given stopwords from the word list
// the word list shrinkes. This returns the number of shrinked words
@ -277,6 +309,18 @@ public final class Condenser {
return l;
}
public long fuzzySignature() {
return this.fuzzy_signature;
}
public String fuzzySignatureText() {
return this.fuzzy_signature_text;
}
public long exactSignature() {
return this.exact_signature;
}
public String language() {
return this.languageIdentificator.getLanguage();
}

@ -53,9 +53,9 @@ import java.util.Set;
import java.util.TreeSet;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.DCTerms;
import net.yacy.cora.lod.vocabulary.Owl;

@ -433,7 +433,7 @@ public class URIMetadataNode {
private int getInt(YaCySchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.integer;
assert field.getType() == SolrType.num_integer;
Object x = this.doc.getFieldValue(field.getSolrFieldName());
if (x == null) return 0;
if (x instanceof Integer) return ((Integer) x).intValue();
@ -480,7 +480,7 @@ public class URIMetadataNode {
@SuppressWarnings("unchecked")
private ArrayList<Integer> getIntList(YaCySchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.integer;
assert field.getType() == SolrType.num_integer;
Object r = this.doc.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList<Integer>(0);
if (r instanceof ArrayList) {

@ -83,13 +83,13 @@ import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.contentcontrol.SMWListSyncThread;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.ShardSelection;
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
@ -392,7 +392,7 @@ public final class Switchboard extends serverSwitch {
solrScheme.fill(backupScheme, true);
// switch on some fields which are necessary for ranking and faceting
for (YaCySchema field: new YaCySchema[]{
YaCySchema.host_s,
YaCySchema.host_s, YaCySchema.load_date_dt,
YaCySchema.url_file_ext_s, YaCySchema.last_modified, // needed for media search and /date operator
YaCySchema.url_paths_sxt, YaCySchema.host_organization_s, // needed to search in the url
YaCySchema.inboundlinks_protocol_sxt, YaCySchema.inboundlinks_urlstub_txt, // needed for HostBrowser

@ -42,7 +42,6 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
@ -208,7 +207,7 @@ public final class Fulltext implements Iterable<byte[]> {
}
public void commit() {
if (this.forcedCommitTime + forcedCommitTimeout < System.currentTimeMillis()) return;
if (this.forcedCommitTime + forcedCommitTimeout > System.currentTimeMillis()) return;
this.forcedCommitTime = Long.MAX_VALUE - forcedCommitTimeout; // set the time high to prevent that other processes get to this point meanwhile
this.solr.commit();
this.forcedCommitTime = System.currentTimeMillis(); // set the exact time
@ -218,7 +217,7 @@ public final class Fulltext implements Iterable<byte[]> {
if (urlHash == null) return null;
SolrDocument doc;
try {
doc = this.solr.get(urlHash, YaCySchema.load_date_dt.getSolrFieldName());
doc = this.solr.getById(urlHash, YaCySchema.load_date_dt.getSolrFieldName());
} catch (IOException e) {
return null;
}
@ -249,7 +248,7 @@ public final class Fulltext implements Iterable<byte[]> {
// get the metadata from Solr
try {
SolrDocument doc = this.solr.get(ASCII.String(urlHash));
SolrDocument doc = this.solr.getById(ASCII.String(urlHash));
if (doc != null) {
if (this.urlIndexFile != null) this.urlIndexFile.remove(urlHash);
return new URIMetadataNode(doc, wre, weight);
@ -279,7 +278,7 @@ public final class Fulltext implements Iterable<byte[]> {
byte[] idb = ASCII.getBytes(id);
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
SolrDocument sd = this.solr.getById(id, YaCySchema.last_modified.getSolrFieldName());
Date now = new Date();
Date sdDate = sd == null ? null : URIMetadataNode.getDate(sd, YaCySchema.last_modified);
if (sdDate == null || sdDate.after(now)) sdDate = now;
@ -307,7 +306,7 @@ public final class Fulltext implements Iterable<byte[]> {
String id = ASCII.String(idb);
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
SolrDocument sd = this.solr.getById(id);
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
if (this.solrScheme.contains(YaCySchema.ip_s)) {
// ip_s needs a dns lookup which causes blockings during search here
@ -471,7 +470,7 @@ public final class Fulltext implements Iterable<byte[]> {
if (urlHash == null) return false;
if (this.urlIndexFile != null && this.urlIndexFile.has(urlHash)) return true;
try {
if (this.solr.exists(ASCII.String(urlHash))) return true;
if (this.solr.exists(YaCySchema.id.getSolrFieldName(), ASCII.String(urlHash))) return true;
} catch (final Throwable e) {
Log.logException(e);
}
@ -480,7 +479,7 @@ public final class Fulltext implements Iterable<byte[]> {
public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null;
SolrDocument doc = this.solr.get(urlHash, YaCySchema.failreason_t.getSolrFieldName());
SolrDocument doc = this.solr.getById(urlHash, YaCySchema.failreason_t.getSolrFieldName());
if (doc == null) return null;
String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
return reason == null ? null : reason.length() == 0 ? null : reason;

@ -202,7 +202,7 @@ public class Segment {
public int getQueryCount(String word) {
if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('/') >= 0) return 0;
int count = this.termIndex == null ? 0 : this.termIndex.count(Word.word2hash(word));
try {count += this.fulltext.getSolr().getQueryCount(YaCySchema.text_t.name() + ':' + word);} catch (IOException e) {}
try {count += this.fulltext.getSolr().getQueryCount(YaCySchema.text_t.getSolrFieldName() + ':' + word);} catch (IOException e) {}
return count;
}
@ -363,8 +363,28 @@ public class Segment {
if (modDate.getTime() > loadDate.getTime()) modDate = loadDate;
char docType = Response.docType(document.dc_format());
// STORE TO SOLR
// CREATE SOLR DOCUMENT
final SolrInputDocument solrInputDoc = this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language);
// FIND OUT IF THIS IS A DOUBLE DOCUMENT
for (YaCySchema[] checkfields: new YaCySchema[][]{
{YaCySchema.exact_signature_l, YaCySchema.exact_signature_unique_b},
{YaCySchema.fuzzy_signature_l, YaCySchema.fuzzy_signature_unique_b}}) {
YaCySchema hashfield = checkfields[0];
YaCySchema uniquefield = checkfields[1];
if (this.fulltext.getSolrScheme().contains(hashfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
// lookup the document with the same signature
long signature = ((Long) solrInputDoc.getField(hashfield.getSolrFieldName()).getValue()).longValue();
try {
if (this.fulltext.getSolr().exists(hashfield.getSolrFieldName(), Long.toString(signature))) {
// change unique attribut in content
solrInputDoc.setField(uniquefield.getSolrFieldName(), false);
}
} catch (IOException e) {}
}
}
// STORE TO SOLR
String error = null;
tryloop: for (int i = 0; i < 20; i++) {
try {

@ -139,13 +139,18 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
}
private void add(final SolrInputDocument doc, final YaCySchema key, final long value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
}
private void add(final SolrInputDocument doc, final YaCySchema key, final boolean value) {
assert !key.isMultiValued();
if (isEmpty() || contains(key)) key.add(doc, value);
}
protected static Date getDate(SolrInputDocument doc, final YaCySchema key) {
Date x = (Date) doc.getFieldValue(key.name());
Date x = (Date) doc.getFieldValue(key.getSolrFieldName());
Date now = new Date();
return (x == null) ? new Date(0) : x.after(now) ? now : x;
}
@ -384,6 +389,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
List<String> synonyms = condenser.synonyms();
add(doc, YaCySchema.synonyms_sxt, synonyms);
}
add(doc, YaCySchema.exact_signature_l, condenser.exactSignature());
add(doc, YaCySchema.exact_signature_unique_b, true); // this must be corrected afterwards!
add(doc, YaCySchema.fuzzy_signature_l, condenser.fuzzySignature());
add(doc, YaCySchema.fuzzy_signature_text_t, condenser.fuzzySignatureText());
add(doc, YaCySchema.fuzzy_signature_unique_b, true); // this must be corrected afterwards!
// path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());

@ -28,12 +28,10 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedSet;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.word.Word;

@ -43,8 +43,8 @@ import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.yacy.CacheStrategy;

@ -36,7 +36,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy;

@ -41,8 +41,8 @@ import com.hp.hpl.jena.rdf.model.Resource;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.federate.yacy.Distribution;

@ -23,8 +23,8 @@ package net.yacy.search.query;
import java.util.Iterator;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;

@ -30,8 +30,8 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.util.NumberTools;
import net.yacy.kelondro.logging.Log;

@ -36,8 +36,8 @@ import java.util.SortedSet;
import java.util.TreeSet;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;

@ -93,9 +93,9 @@ import java.util.zip.GZIPOutputStream;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;

@ -455,7 +455,7 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
public SolrParams toSolrParams(YaCySchema[] facets) {
// check if all required post fields are there
if (!this.containsKey(CommonParams.DF)) this.put(CommonParams.DF, YaCySchema.text_t.name()); // set default field to the text field
if (!this.containsKey(CommonParams.DF)) this.put(CommonParams.DF, YaCySchema.text_t.getSolrFieldName()); // set default field to the text field
if (!this.containsKey(CommonParams.START)) this.put(CommonParams.START, "0"); // set default start item
if (!this.containsKey(CommonParams.ROWS)) this.put(CommonParams.ROWS, "10"); // set default number of search results
@ -466,7 +466,7 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
if (facets != null && facets.length > 0) {
m.put("facet", new String[]{"true"});
String[] fs = new String[facets.length];
for (int i = 0; i < facets.length; i++) fs[i] = facets[i].name();
for (int i = 0; i < facets.length; i++) fs[i] = facets[i].getSolrFieldName();
m.put("facet.field", fs);
}
final SolrParams solrParams = new MultiMapSolrParams(m);

Loading…
Cancel
Save