saving one query for double document detection now per documentpull/1/head
parent
38864ae004
commit
0ceeceb35e
@ -0,0 +1,97 @@
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class AbstractOperations extends AbstractTerm implements Operations {
|
||||
|
||||
protected final String operandName;
|
||||
protected final List<Term> terms;
|
||||
|
||||
public AbstractOperations(final String operandName) {
|
||||
this.operandName = operandName;
|
||||
this.terms = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = operandName.hashCode();
|
||||
for (Term t: this.terms) h += t.hashCode();
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addOperand(Term term) {
|
||||
this.terms.add(term);
|
||||
}
|
||||
|
||||
/**
|
||||
* As a Operations object is a collection of Terms, we must be able to show them
|
||||
* @return the list of terms
|
||||
*/
|
||||
@Override
|
||||
public List<Term> getOperands() {
|
||||
return this.terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* the weight attribute of a term shows if rewritten terms
|
||||
* (using rules of replacement as allowed for propositional logic)
|
||||
* are shorter and therefore more efficient.
|
||||
* @return the number of operators plus the number of operands plus one
|
||||
*/
|
||||
@Override
|
||||
public int weight() {
|
||||
return terms.size() * 2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBinary() {
|
||||
return this.terms.size() == 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* can we set brackets anywhere (means: can we change calculation order)?
|
||||
*/
|
||||
@Override
|
||||
public boolean isAssociative() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* can we switch operands (must be binary)
|
||||
*/
|
||||
@Override
|
||||
public boolean isCommutative() {
|
||||
return isBinary();
|
||||
}
|
||||
|
||||
/**
|
||||
* can we 'multiply inside' (must be binary)
|
||||
*/
|
||||
@Override
|
||||
public boolean isDistributive() {
|
||||
return isBinary();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Term lightestRewrite() {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a Solr query string from this conjunction
|
||||
* @return a string which is a Solr query string
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Term term: this.terms) {
|
||||
if (sb.length() == 0) sb.append('('); else sb.append(") ").append(this.operandName).append(" (");
|
||||
sb.append(term.toString());
|
||||
}
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
/**
|
||||
* AbstractTerm
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 03.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
|
||||
public abstract class AbstractTerm implements Term {
|
||||
|
||||
/**
|
||||
* create a hit subset of the given SolrDocumentList according to the conjunction defined
|
||||
* in this object
|
||||
* @param sdl the SolrDocumentList
|
||||
* @return a manufactured subset-clone of the given SolrDocumentList where document match with the term as given in this object
|
||||
*/
|
||||
@Override
|
||||
public SolrDocumentList apply(SolrDocumentList sdl) {
|
||||
SolrDocumentList r = new SolrDocumentList();
|
||||
int numFound = 0;
|
||||
for (SolrDocument d: sdl) {
|
||||
if (matches(d)) {r.add(d); numFound++;}
|
||||
}
|
||||
r.setNumFound(numFound);
|
||||
return r;
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
/**
|
||||
* DNF
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 03.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This is the implementation of a disjunctive normal form, which is the disjunction of conjunctions.
|
||||
* See: http://en.wikipedia.org/wiki/Disjunctive_normal_form
|
||||
* We use a DNF to combine several solr queries into one if that is applicable.
|
||||
* When caling Solr with a DNF, we need only one http request (if this is done with a remote Solr)
|
||||
* and thus saving the network overhead for each single (conjunctive) query. To filter out the conjunctions
|
||||
* from the bundled query result, you must apply the apply() method from the Conjunction class.
|
||||
*/
|
||||
public class DNF {
|
||||
|
||||
private final List<Conjunction> dnf;
|
||||
|
||||
public DNF() {
|
||||
this.dnf = new ArrayList<>();
|
||||
}
|
||||
|
||||
public void addConjunction(Conjunction conjunction) {
|
||||
this.dnf.add(conjunction);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a Solr query string from this DNF
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Conjunction c: this.dnf) {
|
||||
if (sb.length() > 0) sb.append(" OR ");
|
||||
sb.append('(').append(c.toString()).append(')');
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Disjunction
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 03.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
/**
|
||||
* A Disjunction is a desjunction of terms to Solr. The purpose of this class is,
|
||||
* to provide a mechanism to reduce the calls to Solr when calling Solr several times with sets of
|
||||
* terms which are all disjunctive.
|
||||
*/
|
||||
public class Disjunction extends AbstractOperations implements Operations {
|
||||
|
||||
public Disjunction() {
|
||||
super("OR");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
Disjunction c = new Disjunction();
|
||||
for (Term t: this.terms) c.addOperand(t);
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object otherTerm) {
|
||||
if (!(otherTerm instanceof Disjunction)) return false;
|
||||
Disjunction o = (Disjunction) otherTerm;
|
||||
for (Term t: this.terms) {
|
||||
if (!TermTools.isIn(t, o.getOperands())) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* check if this disjunction matches with a given SolrDocument
|
||||
* @param doc the SolrDocument to match to
|
||||
* @return true, if all literals of this disjunction match with the terms of the document
|
||||
*/
|
||||
@Override
|
||||
public boolean matches(SolrDocument doc) {
|
||||
for (Term term: this.terms) {
|
||||
if (term.matches(doc)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
/**
|
||||
* Negation
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 03.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
public class Negation extends AbstractTerm implements Term {
|
||||
|
||||
private Term term;
|
||||
|
||||
public Negation(final Term term) {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new Negation(this.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object otherTerm) {
|
||||
if (!(otherTerm instanceof Negation)) return false;
|
||||
Negation o = (Negation) otherTerm;
|
||||
return this.term.equals(o.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return -this.term.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* the length attribute of a term shows if rewritten terms
|
||||
* (using rules of replacement as allowed for propositional logic)
|
||||
* are shorter and therefore more efficient.
|
||||
* @return the number of operators plus the number of operands plus one
|
||||
*/
|
||||
@Override
|
||||
public int weight() {
|
||||
return term.weight() + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a Solr query string from this literal
|
||||
* @return a string which is a Solr query string
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append('-').append(this.term.toString());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* check if the key/value pair of this literal occurs in the SolrDocument
|
||||
* @param doc the document to match to this literal
|
||||
* @return true, if the key of this literal is contained in the document and the
|
||||
* value equals (does not equal) with the value if this literal (if the signature is false)
|
||||
*/
|
||||
@Override
|
||||
public boolean matches(SolrDocument doc) {
|
||||
return !this.term.matches(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Term lightestRewrite() {
|
||||
// TODO: this can be enhanced if negations are not attached to atoms
|
||||
Term t = this.term.lightestRewrite();
|
||||
return new Negation(t);
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Operations class describes a set of operands which form a term using the same operation.
|
||||
*/
|
||||
public interface Operations extends Term {
|
||||
|
||||
/**
|
||||
* As a Operations object is a collection of Terms, we must be able to show them
|
||||
* @return the list of terms
|
||||
*/
|
||||
public List<Term> getOperands();
|
||||
|
||||
/**
|
||||
* add another operand to the operations term
|
||||
* @param operand
|
||||
*/
|
||||
public void addOperand(Term operand);
|
||||
|
||||
/**
|
||||
* the operation is binary, if it contains two operands
|
||||
* @return if this is a binary operation
|
||||
*/
|
||||
public boolean isBinary();
|
||||
|
||||
/**
|
||||
* a binary operation * on a set S is called associative if it satisfies the associative law:
|
||||
* (x * y) * z = x * (y * z) for all x,y,z in S.
|
||||
* @return true if this is associative
|
||||
*/
|
||||
public boolean isAssociative();
|
||||
|
||||
/**
|
||||
* In standard truth-functional propositional logic, commutativity refer to two valid rules of replacement.
|
||||
* The rules allow one to transpose propositional variables within logical expressions in logical proofs. The rules are:
|
||||
* (P OR Q) <=> (Q OR P)
|
||||
* (P AND Q) <=> (Q AND P)
|
||||
* @return true if this is distributive
|
||||
*/
|
||||
public boolean isCommutative();
|
||||
|
||||
/**
|
||||
* In propositional logic, distribution refers to two valid rules of replacement.
|
||||
* The rules allow one to reformulate conjunctions and disjunctions within logical proofs.
|
||||
* Given a set S and two binary operators * and + on S, we say that the operation *
|
||||
* is left-distributive over + if, given any elements x, y, and z of S,
|
||||
* x * (y + z) = (x * y) + (x * z)
|
||||
* is right-distributive over + if, given any elements x, y, and z of S:
|
||||
* (y + z) * x = (y * x) + (z * x)
|
||||
* is distributive over + if it is left- and right-distributive.
|
||||
* @return true if this is distributive;
|
||||
*/
|
||||
public boolean isDistributive();
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Term
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 03.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
|
||||
public interface Term {
|
||||
|
||||
/**
|
||||
* Equal method which returns true if the terms are logically equal.
|
||||
* It is advised to create minimum-weight variants of the terms using lightestRewrite() before comparing because
|
||||
* the equals method should not apply rewrite rules. If two terms are equal, then also their minimum weight rewrite is equal.
|
||||
* @param otherTerm
|
||||
* @return true if the interpretation (apply method) of the term is equal to the interpretation (apply method) of otherTerm on any document
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object otherTerm);
|
||||
|
||||
/**
|
||||
* the weight attribute of a term shows if rewritten terms
|
||||
* (using rules of replacement as allowed for propositional logic)
|
||||
* are shorter and therefore more efficient.
|
||||
* @return the number of operators plus the number of operands plus one
|
||||
*/
|
||||
public int weight();
|
||||
|
||||
/**
|
||||
* toString produces the Solr Query representation of the term
|
||||
* @return the Solr Query String
|
||||
*/
|
||||
@Override
|
||||
public String toString();
|
||||
|
||||
/**
|
||||
* check if this term matches the SolrDocument
|
||||
* @param doc the document to match to this term
|
||||
* @return true, if this term matches with the document
|
||||
*/
|
||||
public boolean matches(SolrDocument doc);
|
||||
|
||||
/**
|
||||
* Create a hit subset of the given SolrDocumentList according to the conjunction defined
|
||||
* in this object. This is the interpretation of the term on a 'world object' (the Solr document).
|
||||
* @param sdl the SolrDocumentList
|
||||
* @return a manufactured subset-clone of the given SolrDocumentList where document match with the term
|
||||
*/
|
||||
public SolrDocumentList apply(SolrDocumentList sdl);
|
||||
|
||||
/**
|
||||
* Applying a rewrite rule to the term should not change the logical expression of the term.
|
||||
* The possible set of rewrites of the term is computed and the ligtest rewrite of the underlying terms
|
||||
* are used to compare all rewrites to each other. Then the lightest term is returned.
|
||||
* @return the lightest term that is logically equivalent to the given term
|
||||
*/
|
||||
public Term lightestRewrite();
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* TermTools
|
||||
* Copyright 2014 by Michael Peter Christen
|
||||
* First released 04.08.2014 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.federate.solr.logic;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* static methods for term comparison, term order, term weights, permutations etc.
|
||||
*/
|
||||
public class TermTools {
|
||||
|
||||
public static boolean isIn(final Term a, final List<Term> termlist) {
|
||||
for (Term t: termlist) {
|
||||
if (a.equals(t)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
public static ArrayList<Operations> permutations(final Operations operations) {
|
||||
List<Term> ops = operations.getOperands();
|
||||
int os = ops.size();
|
||||
ArrayList<Operations> permutation = new ArrayList<Operations>();
|
||||
if (ops.size() < 2) {
|
||||
permutation.add(operations);
|
||||
return permutation;
|
||||
}
|
||||
Term head = ops.get(0);
|
||||
ops.remove(0);
|
||||
ArrayList<Operations> p1 = permutations(operations);
|
||||
for (Operations pt: p1) {
|
||||
// insert head into each position from pt
|
||||
for (int i = 0; i < os; i++) {
|
||||
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
*/
|
||||
}
|
Loading…
Reference in new issue