more logic on Solr queries; usage of the query terms in posprocessing,

saving one query for double document detection now per document
pull/1/head
Michael Peter Christen 11 years ago
parent 38864ae004
commit 0ceeceb35e

@ -0,0 +1,97 @@
package net.yacy.cora.federate.solr.logic;
import java.util.ArrayList;
import java.util.List;
public abstract class AbstractOperations extends AbstractTerm implements Operations {
protected final String operandName;
protected final List<Term> terms;
public AbstractOperations(final String operandName) {
this.operandName = operandName;
this.terms = new ArrayList<>();
}
@Override
public int hashCode() {
int h = operandName.hashCode();
for (Term t: this.terms) h += t.hashCode();
return h;
}
@Override
public void addOperand(Term term) {
this.terms.add(term);
}
/**
* As a Operations object is a collection of Terms, we must be able to show them
* @return the list of terms
*/
@Override
public List<Term> getOperands() {
return this.terms;
}
/**
* the weight attribute of a term shows if rewritten terms
* (using rules of replacement as allowed for propositional logic)
* are shorter and therefore more efficient.
* @return the number of operators plus the number of operands plus one
*/
@Override
public int weight() {
return terms.size() * 2;
}
@Override
public boolean isBinary() {
return this.terms.size() == 2;
}
/**
* can we set brackets anywhere (means: can we change calculation order)?
*/
@Override
public boolean isAssociative() {
return true;
}
/**
* can we switch operands (must be binary)
*/
@Override
public boolean isCommutative() {
return isBinary();
}
/**
* can we 'multiply inside' (must be binary)
*/
@Override
public boolean isDistributive() {
return isBinary();
}
@Override
public Term lightestRewrite() {
return this;
}
/**
* create a Solr query string from this conjunction
* @return a string which is a Solr query string
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Term term: this.terms) {
if (sb.length() == 0) sb.append('('); else sb.append(") ").append(this.operandName).append(" (");
sb.append(term.toString());
}
sb.append(')');
return sb.toString();
}
}

@ -0,0 +1,44 @@
/**
* AbstractTerm
* Copyright 2014 by Michael Peter Christen
* First released 03.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
public abstract class AbstractTerm implements Term {
/**
* create a hit subset of the given SolrDocumentList according to the conjunction defined
* in this object
* @param sdl the SolrDocumentList
* @return a manufactured subset-clone of the given SolrDocumentList where document match with the term as given in this object
*/
@Override
public SolrDocumentList apply(SolrDocumentList sdl) {
SolrDocumentList r = new SolrDocumentList();
int numFound = 0;
for (SolrDocument d: sdl) {
if (matches(d)) {r.add(d); numFound++;}
}
r.setNumFound(numFound);
return r;
}
}

@ -20,72 +20,47 @@
package net.yacy.cora.federate.solr.logic;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
/**
* A Concunction is a conjunction of atomic key/value pairs to Solr. The purpose of this class is,
* A Conjunction is a conjunction of terms to Solr. The purpose of this class is,
* to provide a mechanism to reduce the calls to Solr when calling Solr several times with sets of
* key/value pairs which are all conjunctive. A combined query for a set of disjunctive conjunctions
* is provided by the DNF class. The result of a DNF class query to solr must be separated again using
* the original conjunctive terms which is represented by this class. The SolrDocumentList which are
* results from individual calls is then the same as a SolrDocument list which can be computed with the
* method apply() in this class on the DNF of the Solr result.
* terms which are all conjunctive.
*/
public class Conjunction {
private final List<Literal> literals;
public class Conjunction extends AbstractOperations implements Operations {
public Conjunction() {
this.literals = new ArrayList<>();
super("AND");
}
public void addLiteral(Literal literal) {
this.literals.add(literal);
@Override
public Object clone() {
Conjunction c = new Conjunction();
for (Term t: this.terms) c.addOperand(t);
return c;
}
/**
* create a Solr query string from this conjunction
* @return a string which is a Solr query string
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Literal l: this.literals) {
if (sb.length() > 0) sb.append(" AND ");
sb.append(l.toString());
public boolean equals(Object otherTerm) {
if (!(otherTerm instanceof Conjunction)) return false;
Conjunction o = (Conjunction) otherTerm;
for (Term t: this.terms) {
if (!TermTools.isIn(t, o.getOperands())) return false;
}
return sb.toString();
return true;
}
/**
* check if this conjunction matches with a given SolrDocument
* @param doc the SolrDocument to match to
* @return true, if all literals of this conjunction match with the key/value pairs of the document
* @return true, if all literals of this conjunction match with the terms of the document
*/
@Override
public boolean matches(SolrDocument doc) {
for (Literal literal: this.literals) {
if (!literal.matches(doc)) return false;
for (Term term: this.terms) {
if (!term.matches(doc)) return false;
}
return true;
}
/**
* create a hit subset of the given SolrDocumentList according to the conjunction defined
* in this object
* @param sdl the SolrDocumentList
* @return a manufactured subset-clone of the given SolrDocumentList where document match with the Conjunction as given in this object
*/
public SolrDocumentList apply(SolrDocumentList sdl) {
SolrDocumentList r = new SolrDocumentList();
int numFound = 0;
for (SolrDocument d: r) {
if (matches(d)) {r.add(d); numFound++;}
}
r.setNumFound(numFound);
return r;
}
}

@ -1,59 +0,0 @@
/**
* DNF
* Copyright 2014 by Michael Peter Christen
* First released 03.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import java.util.ArrayList;
import java.util.List;
/**
* This is the implementation of a disjunctive normal form, which is the disjunction of conjunctions.
* See: http://en.wikipedia.org/wiki/Disjunctive_normal_form
* We use a DNF to combine several solr queries into one if that is applicable.
* When caling Solr with a DNF, we need only one http request (if this is done with a remote Solr)
* and thus saving the network overhead for each single (conjunctive) query. To filter out the conjunctions
* from the bundled query result, you must apply the apply() method from the Conjunction class.
*/
public class DNF {
private final List<Conjunction> dnf;
public DNF() {
this.dnf = new ArrayList<>();
}
public void addConjunction(Conjunction conjunction) {
this.dnf.add(conjunction);
}
/**
* create a Solr query string from this DNF
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Conjunction c: this.dnf) {
if (sb.length() > 0) sb.append(" OR ");
sb.append('(').append(c.toString()).append(')');
}
return sb.toString();
}
}

@ -0,0 +1,65 @@
/**
* Disjunction
* Copyright 2014 by Michael Peter Christen
* First released 03.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import org.apache.solr.common.SolrDocument;
/**
* A Disjunction is a desjunction of terms to Solr. The purpose of this class is,
* to provide a mechanism to reduce the calls to Solr when calling Solr several times with sets of
* terms which are all disjunctive.
*/
public class Disjunction extends AbstractOperations implements Operations {
public Disjunction() {
super("OR");
}
@Override
public Object clone() {
Disjunction c = new Disjunction();
for (Term t: this.terms) c.addOperand(t);
return c;
}
@Override
public boolean equals(Object otherTerm) {
if (!(otherTerm instanceof Disjunction)) return false;
Disjunction o = (Disjunction) otherTerm;
for (Term t: this.terms) {
if (!TermTools.isIn(t, o.getOperands())) return false;
}
return true;
}
/**
* check if this disjunction matches with a given SolrDocument
* @param doc the SolrDocument to match to
* @return true, if all literals of this disjunction match with the terms of the document
*/
@Override
public boolean matches(SolrDocument doc) {
for (Term term: this.terms) {
if (term.matches(doc)) return true;
}
return false;
}
}

@ -23,19 +23,46 @@ package net.yacy.cora.federate.solr.logic;
import org.apache.solr.common.SolrDocument;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
public class Literal {
public class Literal extends AbstractTerm implements Term {
private SchemaDeclaration key;
private String value;
private boolean sign;
public Literal(final SchemaDeclaration key, final String value, final boolean sign) {
public Literal(final SchemaDeclaration key, final String value) {
this.key = key;
this.value = value;
this.sign = sign;
}
@Override
public Object clone() {
return new Literal(this.key, this.value);
}
@Override
public boolean equals(Object otherTerm) {
if (!(otherTerm instanceof Literal)) return false;
Literal o = (Literal) otherTerm;
return this.key.equals(o.key) && this.value.equals(o.value);
}
@Override
public int hashCode() {
return key.hashCode() + value.hashCode();
}
/**
* the length attribute of a term shows if rewritten terms
* (using rules of replacement as allowed for propositional logic)
* are shorter and therefore more efficient.
* @return the number of operators plus the number of operands plus one
*/
@Override
public int weight() {
return 1;
}
/**
* create a Solr query string from this literal
* @return a string which is a Solr query string
@ -43,7 +70,6 @@ public class Literal {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (!this.sign) sb.append('-');
sb.append(this.key.getSolrFieldName());
sb.append(':').append('"').append(this.value).append('"');
return sb.toString();
@ -55,9 +81,15 @@ public class Literal {
* @return true, if the key of this literal is contained in the document and the
* value equals (does not equal) with the value if this literal (if the signature is false)
*/
@Override
public boolean matches(SolrDocument doc) {
Object v = doc.getFieldValue(this.key.getSolrFieldName());
if (v == null) return false;
return v.toString().matches(this.value) == this.sign;
return this.value.equals(AbstractSolrConnector.CATCHALL_TERM) || v.toString().matches(this.value);
}
@Override
public Term lightestRewrite() {
return this;
}
}

@ -0,0 +1,89 @@
/**
* Negation
* Copyright 2014 by Michael Peter Christen
* First released 03.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import org.apache.solr.common.SolrDocument;
public class Negation extends AbstractTerm implements Term {
private Term term;
public Negation(final Term term) {
this.term = term;
}
@Override
public Object clone() {
return new Negation(this.term);
}
@Override
public boolean equals(Object otherTerm) {
if (!(otherTerm instanceof Negation)) return false;
Negation o = (Negation) otherTerm;
return this.term.equals(o.term);
}
@Override
public int hashCode() {
return -this.term.hashCode();
}
/**
* the length attribute of a term shows if rewritten terms
* (using rules of replacement as allowed for propositional logic)
* are shorter and therefore more efficient.
* @return the number of operators plus the number of operands plus one
*/
@Override
public int weight() {
return term.weight() + 1;
}
/**
* create a Solr query string from this literal
* @return a string which is a Solr query string
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append('-').append(this.term.toString());
return sb.toString();
}
/**
* check if the key/value pair of this literal occurs in the SolrDocument
* @param doc the document to match to this literal
* @return true, if the key of this literal is contained in the document and the
* value equals (does not equal) with the value if this literal (if the signature is false)
*/
@Override
public boolean matches(SolrDocument doc) {
return !this.term.matches(doc);
}
@Override
public Term lightestRewrite() {
// TODO: this can be enhanced if negations are not attached to atoms
Term t = this.term.lightestRewrite();
return new Negation(t);
}
}

@ -0,0 +1,58 @@
package net.yacy.cora.federate.solr.logic;
import java.util.List;
/**
* The Operations class describes a set of operands which form a term using the same operation.
*/
public interface Operations extends Term {
/**
* As a Operations object is a collection of Terms, we must be able to show them
* @return the list of terms
*/
public List<Term> getOperands();
/**
* add another operand to the operations term
* @param operand
*/
public void addOperand(Term operand);
/**
* the operation is binary, if it contains two operands
* @return if this is a binary operation
*/
public boolean isBinary();
/**
* a binary operation * on a set S is called associative if it satisfies the associative law:
* (x * y) * z = x * (y * z) for all x,y,z in S.
* @return true if this is associative
*/
public boolean isAssociative();
/**
* In standard truth-functional propositional logic, commutativity refer to two valid rules of replacement.
* The rules allow one to transpose propositional variables within logical expressions in logical proofs. The rules are:
* (P OR Q) <=> (Q OR P)
* (P AND Q) <=> (Q AND P)
* @return true if this is distributive
*/
public boolean isCommutative();
/**
* In propositional logic, distribution refers to two valid rules of replacement.
* The rules allow one to reformulate conjunctions and disjunctions within logical proofs.
* Given a set S and two binary operators * and + on S, we say that the operation *
* is left-distributive over + if, given any elements x, y, and z of S,
* x * (y + z) = (x * y) + (x * z)
* is right-distributive over + if, given any elements x, y, and z of S:
* (y + z) * x = (y * x) + (z * x)
* is distributive over + if it is left- and right-distributive.
* @return true if this is distributive;
*/
public boolean isDistributive();
}

@ -0,0 +1,76 @@
/**
* Term
* Copyright 2014 by Michael Peter Christen
* First released 03.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
public interface Term {
/**
* Equal method which returns true if the terms are logically equal.
* It is advised to create minimum-weight variants of the terms using lightestRewrite() before comparing because
* the equals method should not apply rewrite rules. If two terms are equal, then also their minimum weight rewrite is equal.
* @param otherTerm
* @return true if the interpretation (apply method) of the term is equal to the interpretation (apply method) of otherTerm on any document
*/
@Override
public boolean equals(Object otherTerm);
/**
* the weight attribute of a term shows if rewritten terms
* (using rules of replacement as allowed for propositional logic)
* are shorter and therefore more efficient.
* @return the number of operators plus the number of operands plus one
*/
public int weight();
/**
* toString produces the Solr Query representation of the term
* @return the Solr Query String
*/
@Override
public String toString();
/**
* check if this term matches the SolrDocument
* @param doc the document to match to this term
* @return true, if this term matches with the document
*/
public boolean matches(SolrDocument doc);
/**
* Create a hit subset of the given SolrDocumentList according to the conjunction defined
* in this object. This is the interpretation of the term on a 'world object' (the Solr document).
* @param sdl the SolrDocumentList
* @return a manufactured subset-clone of the given SolrDocumentList where document match with the term
*/
public SolrDocumentList apply(SolrDocumentList sdl);
/**
* Applying a rewrite rule to the term should not change the logical expression of the term.
* The possible set of rewrites of the term is computed and the ligtest rewrite of the underlying terms
* are used to compare all rewrites to each other. Then the lightest term is returned.
* @return the lightest term that is logically equivalent to the given term
*/
public Term lightestRewrite();
}

@ -0,0 +1,58 @@
/**
* TermTools
* Copyright 2014 by Michael Peter Christen
* First released 04.08.2014 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.logic;
import java.util.List;
/**
* static methods for term comparison, term order, term weights, permutations etc.
*/
public class TermTools {
public static boolean isIn(final Term a, final List<Term> termlist) {
for (Term t: termlist) {
if (a.equals(t)) return true;
}
return false;
}
/*
public static ArrayList<Operations> permutations(final Operations operations) {
List<Term> ops = operations.getOperands();
int os = ops.size();
ArrayList<Operations> permutation = new ArrayList<Operations>();
if (ops.size() < 2) {
permutation.add(operations);
return permutation;
}
Term head = ops.get(0);
ops.remove(0);
ArrayList<Operations> p1 = permutations(operations);
for (Operations pt: p1) {
// insert head into each position from pt
for (int i = 0; i < os; i++) {
}
}
return
}
*/
}

@ -92,7 +92,8 @@ public class MemoryControl {
* @return bytes
*/
public static final long available() {
return getStrategy().available();
long available = getStrategy().available();
return available;
}
/**

@ -61,8 +61,9 @@ import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector.LoadTimeURL;
import net.yacy.cora.federate.solr.logic.Conjunction;
import net.yacy.cora.federate.solr.logic.DNF;
import net.yacy.cora.federate.solr.logic.Disjunction;
import net.yacy.cora.federate.solr.logic.Literal;
import net.yacy.cora.federate.solr.logic.Negation;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
@ -1374,7 +1375,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// FIND OUT IF THIS IS A DOUBLE DOCUMENT
String urlhash = ASCII.String(url.hash());
String hostid = url.hosthash();
DNF dnf = new DNF();
Conjunction con = new Conjunction();
con.addOperand(new Negation(new Literal(CollectionSchema.id, urlhash)));
con.addOperand(new Literal(CollectionSchema.host_id_s, hostid));
Disjunction dnf = new Disjunction();
uniquecheck: for (CollectionSchema[] checkfields: new CollectionSchema[][]{
{CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b, CollectionSchema.exact_signature_copycount_i},
{CollectionSchema.fuzzy_signature_l, CollectionSchema.fuzzy_signature_unique_b, CollectionSchema.fuzzy_signature_copycount_i}}) {
@ -1386,25 +1390,42 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// lookup the document with the same signature
Long signature = (Long) sid.getField(signaturefield.getSolrFieldName()).getValue();
if (signature == null) continue uniquecheck;
Conjunction con = new Conjunction();
con.addLiteral(new Literal(CollectionSchema.id, urlhash, false));
con.addLiteral(new Literal(CollectionSchema.host_id_s, hostid, true));
con.addLiteral(new Literal(signaturefield, signature.toString(), true));
dnf.addConjunction(con);
String query = con.toString();
try {
//SolrDocumentList docsOld = segment.fulltext().getDefaultConnector().getDocumentListByQuery("-" + CollectionSchema.id.getSolrFieldName() + ":\"" + urlhash + "\" AND " + CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + signature.toString() + "\"", null, 0, 2000, CollectionSchema.id.getSolrFieldName());
SolrDocumentList docs = segment.fulltext().getDefaultConnector().getDocumentListByQuery(query, null, 0, 2000, CollectionSchema.id.getSolrFieldName());
if (docs.getNumFound() == 0) {
sid.setField(uniquefield.getSolrFieldName(), true);
sid.setField(countfield.getSolrFieldName(), 1);
} else {
boolean firstappearance = true;
for (SolrDocument d: docs) {if (uniqueURLs.contains(d.getFieldValue(CollectionSchema.id.getSolrFieldName()))) firstappearance = false; break;}
sid.setField(uniquefield.getSolrFieldName(), firstappearance);
sid.setField(countfield.getSolrFieldName(), docs.getNumFound() + 1); // the current url was excluded from search but is included in count
}
} catch (final IOException e) {}
//con.addOperand(new Negation(new Literal(CollectionSchema.id, urlhash)));
//con.addOperand(new Literal(CollectionSchema.host_id_s, hostid));
dnf.addOperand(new Literal(signaturefield, signature.toString()));
}
}
con.addOperand(dnf);
String query = con.toString();
SolrDocumentList docsAkk;
try {
docsAkk = segment.fulltext().getDefaultConnector().getDocumentListByQuery(query, null, 0, 2000,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.exact_signature_l.getSolrFieldName(), CollectionSchema.fuzzy_signature_l.getSolrFieldName());
} catch (final IOException e) {
ConcurrentLog.logException(e);
docsAkk = new SolrDocumentList();
}
uniquecheck: for (CollectionSchema[] checkfields: new CollectionSchema[][]{
{CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b, CollectionSchema.exact_signature_copycount_i},
{CollectionSchema.fuzzy_signature_l, CollectionSchema.fuzzy_signature_unique_b, CollectionSchema.fuzzy_signature_copycount_i}}) {
CollectionSchema signaturefield = checkfields[0];
CollectionSchema uniquefield = checkfields[1];
CollectionSchema countfield = checkfields[2];
if (this.contains(signaturefield) && this.contains(uniquefield) && this.contains(countfield)) {
// lookup the document with the same signature
Long signature = (Long) sid.getField(signaturefield.getSolrFieldName()).getValue();
if (signature == null) continue uniquecheck;
SolrDocumentList docs = new Literal(signaturefield, signature.toString()).apply(docsAkk);
if (docs.getNumFound() == 0) {
sid.setField(uniquefield.getSolrFieldName(), true);
sid.setField(countfield.getSolrFieldName(), 1);
} else {
boolean firstappearance = true;
for (SolrDocument d: docs) {if (uniqueURLs.contains(d.getFieldValue(CollectionSchema.id.getSolrFieldName()))) firstappearance = false; break;}
sid.setField(uniquefield.getSolrFieldName(), firstappearance);
sid.setField(countfield.getSolrFieldName(), docs.getNumFound() + 1); // the current url was excluded from search but is included in count
}
}
}
@ -1440,7 +1461,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
"-" + CollectionSchema.robots_i.getSolrFieldName() + ":24 AND " + // bit 3 + 4
"-" + CollectionSchema.robots_i.getSolrFieldName() + ":512 AND " + // bit 9
"-" + CollectionSchema.robots_i.getSolrFieldName() + ":1536 AND " + // bit 9 + 10
"((-" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":[* TO *]) OR (" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":true)) AND " +
"((-" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":" + AbstractSolrConnector.CATCHALL_TERM + ") OR (" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":true)) AND " +
CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " +
"-" + CollectionSchema.id.getSolrFieldName() + ":\"" + urlhash + "\" AND " +
signaturefield.getSolrFieldName() + ":\"" + signature.toString() + "\"";

Loading…
Cancel
Save