diff --git a/source/net/yacy/cora/federate/solr/logic/AbstractOperations.java b/source/net/yacy/cora/federate/solr/logic/AbstractOperations.java
index 7cdaa5ce7..a1c4a8270 100644
--- a/source/net/yacy/cora/federate/solr/logic/AbstractOperations.java
+++ b/source/net/yacy/cora/federate/solr/logic/AbstractOperations.java
@@ -78,7 +78,6 @@ public abstract class AbstractOperations extends AbstractTerm implements Operati
public Term lightestRewrite() {
return this;
}
-
/**
* create a Solr query string from this conjunction
@@ -86,12 +85,18 @@ public abstract class AbstractOperations extends AbstractTerm implements Operati
*/
@Override
public String toString() {
+ if (this.terms.size() == 0) return "";
StringBuilder sb = new StringBuilder();
- for (Term term: this.terms) {
- if (sb.length() == 0) sb.append('('); else sb.append(") ").append(this.operandName).append(" (");
- sb.append(term.toString());
+ if (this.terms.size() == 1) {
+ sb.append(terms.iterator().next().toString());
+ } else {
+ sb.append('(');
+ for (Term term: this.terms) {
+ if (sb.length() > 1) sb.append(' ').append(this.operandName).append(' ');
+ sb.append(term.toString());
+ }
+ sb.append(')');
}
- sb.append(')');
return sb.toString();
}
}
diff --git a/source/net/yacy/cora/federate/solr/logic/BooleanLiteral.java b/source/net/yacy/cora/federate/solr/logic/BooleanLiteral.java
new file mode 100644
index 000000000..47b5f8b37
--- /dev/null
+++ b/source/net/yacy/cora/federate/solr/logic/BooleanLiteral.java
@@ -0,0 +1,80 @@
+/**
+ * BooleanLiteral
+ * Copyright 2014 by Michael Peter Christen
+ * First released 24.10.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.cora.federate.solr.logic;
+
+import org.apache.solr.common.SolrDocument;
+
+import net.yacy.cora.federate.solr.SchemaDeclaration;
+
+public class BooleanLiteral extends Literal implements Term {
+
+ private SchemaDeclaration key;
+ private boolean value;
+
+ public BooleanLiteral(final SchemaDeclaration key, final boolean value) {
+ super();
+ this.key = key;
+ this.value = value;
+ }
+
+ @Override
+ public Object clone() {
+ return new BooleanLiteral(this.key, this.value);
+ }
+
+ @Override
+ public boolean equals(Object otherTerm) {
+ if (!(otherTerm instanceof BooleanLiteral)) return false;
+ BooleanLiteral o = (BooleanLiteral) otherTerm;
+ return this.key.equals(o.key) && this.value == o.value;
+ }
+
+ @Override
+ public int hashCode() {
+ return this.key.hashCode() + (this.value ? 1 : 0);
+ }
+
+ /**
+ * create a Solr query string from this literal
+ * @return a string which is a Solr query string
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.key.getSolrFieldName());
+ sb.append(':').append(this.value ? "true" : "false");
+ return sb.toString();
+ }
+
+ /**
+ * check if the key/value pair of this literal occurs in the SolrDocument
+ * @param doc the document to match to this literal
+ * @return true, if the key of this literal is contained in the document and the
+ * value equals (does not equal) with the value if this literal (if the signature is false)
+ */
+ @Override
+ public boolean matches(SolrDocument doc) {
+ Object v = doc.getFieldValue(this.key.getSolrFieldName());
+ if (v == null) return false;
+ return v.toString().matches(this.value ? "true" : "false");
+ }
+
+}
\ No newline at end of file
diff --git a/source/net/yacy/cora/federate/solr/logic/CatchallLiteral.java b/source/net/yacy/cora/federate/solr/logic/CatchallLiteral.java
new file mode 100644
index 000000000..dcd414fb2
--- /dev/null
+++ b/source/net/yacy/cora/federate/solr/logic/CatchallLiteral.java
@@ -0,0 +1,79 @@
+/**
+ * CatchallLiteral
+ * Copyright 2014 by Michael Peter Christen
+ * First released 24.10.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.cora.federate.solr.logic;
+
+import org.apache.solr.common.SolrDocument;
+
+import net.yacy.cora.federate.solr.SchemaDeclaration;
+import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
+
+public class CatchallLiteral extends Literal implements Term {
+
+ private SchemaDeclaration key;
+
+ public CatchallLiteral(final SchemaDeclaration key) {
+ super();
+ this.key = key;
+ }
+
+ @Override
+ public Object clone() {
+ return new CatchallLiteral(this.key);
+ }
+
+ @Override
+ public boolean equals(Object otherTerm) {
+ if (!(otherTerm instanceof CatchallLiteral)) return false;
+ CatchallLiteral o = (CatchallLiteral) otherTerm;
+ return this.key.equals(o.key);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.key.hashCode();
+ }
+
+ /**
+ * create a Solr query string from this literal
+ * @return a string which is a Solr query string
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.key.getSolrFieldName());
+ sb.append(':').append(AbstractSolrConnector.CATCHALL_TERM);
+ return sb.toString();
+ }
+
+ /**
+ * check if the key/value pair of this literal occurs in the SolrDocument
+ * @param doc the document to match to this literal
+ * @return true, if the key of this literal is contained in the document and the
+ * value equals (does not equal) with the value if this literal (if the signature is false)
+ */
+ @Override
+ public boolean matches(SolrDocument doc) {
+ Object v = doc.getFieldValue(this.key.getSolrFieldName());
+ if (v == null) return false; // this does not match if the field is missing
+ return true;
+ }
+
+}
\ No newline at end of file
diff --git a/source/net/yacy/cora/federate/solr/logic/Conjunction.java b/source/net/yacy/cora/federate/solr/logic/Conjunction.java
index df5286631..06f09204e 100644
--- a/source/net/yacy/cora/federate/solr/logic/Conjunction.java
+++ b/source/net/yacy/cora/federate/solr/logic/Conjunction.java
@@ -33,6 +33,12 @@ public class Conjunction extends AbstractOperations implements Operations {
super("AND");
}
+ public Conjunction(final Term t1, final Term t2) {
+ super("AND");
+ this.addOperand(t1);
+ this.addOperand(t2);
+ }
+
@Override
public Object clone() {
Conjunction c = new Conjunction();
diff --git a/source/net/yacy/cora/federate/solr/logic/Disjunction.java b/source/net/yacy/cora/federate/solr/logic/Disjunction.java
index b74830ec1..bd2c74073 100644
--- a/source/net/yacy/cora/federate/solr/logic/Disjunction.java
+++ b/source/net/yacy/cora/federate/solr/logic/Disjunction.java
@@ -32,6 +32,12 @@ public class Disjunction extends AbstractOperations implements Operations {
public Disjunction() {
super("OR");
}
+
+ public Disjunction(final Term t1, final Term t2) {
+ super("OR");
+ this.addOperand(t1);
+ this.addOperand(t2);
+ }
@Override
public Object clone() {
diff --git a/source/net/yacy/cora/federate/solr/logic/Literal.java b/source/net/yacy/cora/federate/solr/logic/Literal.java
index 7cdeef944..edea5a1d7 100644
--- a/source/net/yacy/cora/federate/solr/logic/Literal.java
+++ b/source/net/yacy/cora/federate/solr/logic/Literal.java
@@ -20,36 +20,9 @@
package net.yacy.cora.federate.solr.logic;
-import org.apache.solr.common.SolrDocument;
+public abstract class Literal extends AbstractTerm implements Term {
-import net.yacy.cora.federate.solr.SchemaDeclaration;
-import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
-
-public class Literal extends AbstractTerm implements Term {
-
- private SchemaDeclaration key;
- private String value;
-
- public Literal(final SchemaDeclaration key, final String value) {
- this.key = key;
- this.value = value;
- }
-
- @Override
- public Object clone() {
- return new Literal(this.key, this.value);
- }
-
- @Override
- public boolean equals(Object otherTerm) {
- if (!(otherTerm instanceof Literal)) return false;
- Literal o = (Literal) otherTerm;
- return this.key.equals(o.key) && this.value.equals(o.value);
- }
-
- @Override
- public int hashCode() {
- return key.hashCode() + value.hashCode();
+ public Literal() {
}
/**
@@ -63,31 +36,6 @@ public class Literal extends AbstractTerm implements Term {
return 1;
}
- /**
- * create a Solr query string from this literal
- * @return a string which is a Solr query string
- */
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(this.key.getSolrFieldName());
- sb.append(':').append('"').append(this.value).append('"');
- return sb.toString();
- }
-
- /**
- * check if the key/value pair of this literal occurs in the SolrDocument
- * @param doc the document to match to this literal
- * @return true, if the key of this literal is contained in the document and the
- * value equals (does not equal) with the value if this literal (if the signature is false)
- */
- @Override
- public boolean matches(SolrDocument doc) {
- Object v = doc.getFieldValue(this.key.getSolrFieldName());
- if (v == null) return false;
- return this.value.equals(AbstractSolrConnector.CATCHALL_TERM) || v.toString().matches(this.value);
- }
-
@Override
public Term lightestRewrite() {
return this;
diff --git a/source/net/yacy/cora/federate/solr/logic/LongLiteral.java b/source/net/yacy/cora/federate/solr/logic/LongLiteral.java
new file mode 100644
index 000000000..39255248a
--- /dev/null
+++ b/source/net/yacy/cora/federate/solr/logic/LongLiteral.java
@@ -0,0 +1,80 @@
+/**
+ * LongLiteral
+ * Copyright 2014 by Michael Peter Christen
+ * First released 24.10.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.cora.federate.solr.logic;
+
+import org.apache.solr.common.SolrDocument;
+
+import net.yacy.cora.federate.solr.SchemaDeclaration;
+
+public class LongLiteral extends Literal implements Term {
+
+ private SchemaDeclaration key;
+ private long value;
+
+ public LongLiteral(final SchemaDeclaration key, final long value) {
+ super();
+ this.key = key;
+ this.value = value;
+ }
+
+ @Override
+ public Object clone() {
+ return new LongLiteral(this.key, this.value);
+ }
+
+ @Override
+ public boolean equals(Object otherTerm) {
+ if (!(otherTerm instanceof LongLiteral)) return false;
+ LongLiteral o = (LongLiteral) otherTerm;
+ return this.key.equals(o.key) && this.value == o.value;
+ }
+
+ @Override
+ public int hashCode() {
+ return this.key.hashCode() + (int) (this.value & Integer.MAX_VALUE);
+ }
+
+ /**
+ * create a Solr query string from this literal
+ * @return a string which is a Solr query string
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.key.getSolrFieldName());
+ sb.append(':').append(this.value);
+ return sb.toString();
+ }
+
+ /**
+ * check if the key/value pair of this literal occurs in the SolrDocument
+ * @param doc the document to match to this literal
+ * @return true, if the key of this literal is contained in the document and the
+ * value equals (does not equal) with the value if this literal (if the signature is false)
+ */
+ @Override
+ public boolean matches(SolrDocument doc) {
+ Object v = doc.getFieldValue(this.key.getSolrFieldName());
+ if (v == null) return false;
+ return v.toString().matches(Long.toString(this.value));
+ }
+
+}
\ No newline at end of file
diff --git a/source/net/yacy/cora/federate/solr/logic/StringLiteral.java b/source/net/yacy/cora/federate/solr/logic/StringLiteral.java
new file mode 100644
index 000000000..fc00137df
--- /dev/null
+++ b/source/net/yacy/cora/federate/solr/logic/StringLiteral.java
@@ -0,0 +1,81 @@
+/**
+ * StringLiteral
+ * Copyright 2014 by Michael Peter Christen
+ * First released 03.08.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.cora.federate.solr.logic;
+
+import org.apache.solr.common.SolrDocument;
+
+import net.yacy.cora.federate.solr.SchemaDeclaration;
+import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
+
+public class StringLiteral extends Literal implements Term {
+
+ private SchemaDeclaration key;
+ private String value;
+
+ public StringLiteral(final SchemaDeclaration key, final String value) {
+ super();
+ this.key = key;
+ this.value = value;
+ }
+
+ @Override
+ public Object clone() {
+ return new StringLiteral(this.key, this.value);
+ }
+
+ @Override
+ public boolean equals(Object otherTerm) {
+ if (!(otherTerm instanceof StringLiteral)) return false;
+ StringLiteral o = (StringLiteral) otherTerm;
+ return this.key.equals(o.key) && this.value.equals(o.value);
+ }
+
+ @Override
+ public int hashCode() {
+ return key.hashCode() + value.hashCode();
+ }
+
+ /**
+ * create a Solr query string from this literal
+ * @return a string which is a Solr query string
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.key.getSolrFieldName());
+ sb.append(':').append('"').append(this.value).append('"');
+ return sb.toString();
+ }
+
+ /**
+ * check if the key/value pair of this literal occurs in the SolrDocument
+ * @param doc the document to match to this literal
+ * @return true, if the key of this literal is contained in the document and the
+ * value equals (does not equal) with the value if this literal (if the signature is false)
+ */
+ @Override
+ public boolean matches(SolrDocument doc) {
+ Object v = doc.getFieldValue(this.key.getSolrFieldName());
+ if (v == null) return false;
+ return this.value.equals(AbstractSolrConnector.CATCHALL_TERM) || v.toString().matches(this.value);
+ }
+
+}
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index 2c2521310..50803695a 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -61,10 +61,13 @@ import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector.LoadTimeURL;
+import net.yacy.cora.federate.solr.logic.BooleanLiteral;
+import net.yacy.cora.federate.solr.logic.CatchallLiteral;
import net.yacy.cora.federate.solr.logic.Conjunction;
import net.yacy.cora.federate.solr.logic.Disjunction;
-import net.yacy.cora.federate.solr.logic.Literal;
+import net.yacy.cora.federate.solr.logic.LongLiteral;
import net.yacy.cora.federate.solr.logic.Negation;
+import net.yacy.cora.federate.solr.logic.StringLiteral;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
@@ -1442,9 +1445,19 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
public void postprocessing_doublecontent(Segment segment, Set uniqueURLs, SolrDocument doc, final SolrInputDocument sid, final DigestURL url) {
// FIND OUT IF THIS IS A DOUBLE DOCUMENT
+ // term to describe documents which are indexable:
+ // - no noindex in meta oder x-robots
+ // - no canonical-tag
+ Conjunction ValidDocTermTemplate = new Conjunction();
+ ValidDocTermTemplate.addOperand(new LongLiteral(CollectionSchema.httpstatus_i, 200));
+ ValidDocTermTemplate.addOperand(new Disjunction(new Negation(new CatchallLiteral(CollectionSchema.canonical_equal_sku_b)), new BooleanLiteral(CollectionSchema.canonical_equal_sku_b, true)));
+ ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 8))); // bit 3 (noindex)
+ ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 24))); // bit 3 + 4 (noindex + nofollow)
+ ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 512))); // bit 9 (noindex)
+ ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 1536))); // bit 9 + 10 (noindex + nofollow)
+
String urlhash = ASCII.String(url.hash());
String hostid = url.hosthash();
- Conjunction con = new Conjunction();
Disjunction dnf = new Disjunction();
CollectionSchema[][] doccheckschema = new CollectionSchema[][]{
{CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b, CollectionSchema.exact_signature_copycount_i},
@@ -1460,12 +1473,13 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (signature == null) continue uniquecheck;
//con.addOperand(new Negation(new Literal(CollectionSchema.id, urlhash)));
//con.addOperand(new Literal(CollectionSchema.host_id_s, hostid));
- dnf.addOperand(new Literal(signaturefield, signature.toString()));
+ dnf.addOperand(new LongLiteral(signaturefield, signature));
}
}
+ Conjunction con = (Conjunction) ValidDocTermTemplate.clone();
con.addOperand(dnf);
- con.addOperand(new Negation(new Literal(CollectionSchema.id, urlhash)));
- con.addOperand(new Literal(CollectionSchema.host_id_s, hostid));
+ con.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
+ con.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
String query = con.toString();
SolrDocumentList docsAkk;
try {
@@ -1484,7 +1498,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// lookup the document with the same signature
Long signature = (Long) doc.getFieldValue(signaturefield.getSolrFieldName());
if (signature == null) continue uniquecheck;
- SolrDocumentList docs = new Literal(signaturefield, signature.toString()).apply(docsAkk);
+ SolrDocumentList docs = new StringLiteral(signaturefield, signature.toString()).apply(docsAkk);
if (docs.getNumFound() == 0) {
sid.setField(uniquefield.getSolrFieldName(), true);
sid.setField(countfield.getSolrFieldName(), 1);
@@ -1525,17 +1539,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
continue uniquecheck;
}
try {
- String doccountquery =
- CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + hostid + "\" AND " +
- "-" + CollectionSchema.robots_i.getSolrFieldName() + ":8 AND " + // bit 3 (noindex)
- "-" + CollectionSchema.robots_i.getSolrFieldName() + ":24 AND " + // bit 3 + 4 (noindex + nofollow)
- "-" + CollectionSchema.robots_i.getSolrFieldName() + ":512 AND " + // bit 9 (noindex)
- "-" + CollectionSchema.robots_i.getSolrFieldName() + ":1536 AND " + // bit 9 + 10 (noindex + nofollow)
- "((-" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":" + AbstractSolrConnector.CATCHALL_TERM + ") OR (" + CollectionSchema.canonical_equal_sku_b.getSolrFieldName() + ":true)) AND " +
- CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " +
- "-" + CollectionSchema.id.getSolrFieldName() + ":\"" + urlhash + "\" AND " +
- signaturefield.getSolrFieldName() + ":\"" + signature.toString() + "\"";
- long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountquery);
+ Conjunction doccountterm = (Conjunction) ValidDocTermTemplate.clone();
+ doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
+ doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
+ doccountterm.addOperand(new LongLiteral(signaturefield, signature));
+ long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountterm.toString());
sid.setField(uniquefield.getSolrFieldName(), doccount == 0);
} catch (final IOException e) {}
}