extended solr connector with a method to retrieve a single facet.

pull/1/head
Michael Peter Christen 13 years ago
parent d0015df61c
commit 8556a3d521

@ -26,6 +26,7 @@ package net.yacy.cora.document;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Comparator;
import org.apache.http.entity.mime.content.StringBody;
@ -38,13 +39,65 @@ import org.apache.http.entity.mime.content.StringBody;
* @author admin
*
*/
public class UTF8 {
public class UTF8 implements Comparator<String> {
public final static Charset charset;
static {
charset = Charset.forName("UTF-8");
}
public static final UTF8 insensitiveUTF8Comparator = new UTF8(true);
public static final UTF8 identityUTF8Comparator = new UTF8(false);
public boolean insensitive;
public UTF8(boolean insensitive) {
this.insensitive = insensitive;
}
@Override
public int compare(String o0, String o1) {
final int l0 = o0.length();
final int l1 = o1.length();
final int ml = Math.min(l0, l1);
char c0, c1;
for (int i = 0; i < ml; i++) {
if (this.insensitive) {
c0 = Character.toLowerCase(o0.charAt(i));
c1 = Character.toLowerCase(o1.charAt(i));
} else {
c0 = o0.charAt(i);
c1 = o1.charAt(i);
}
if (c0 == c1) continue;
return c0 - c1;
}
return l0 - l1;
}
public boolean equals(final String o0, final String o1) {
final int l0 = o0.length();
final int l1 = o1.length();
if (l0 != l1) return false;
return equals(o0, o1, l1);
}
private boolean equals(final String o0, final String o1, final int l) {
char c0, c1;
for (int i = 0; i < l; i++) {
if (this.insensitive) {
c0 = Character.toLowerCase(o0.charAt(i));
c1 = Character.toLowerCase(o1.charAt(i));
} else {
c0 = o0.charAt(i);
c1 = o1.charAt(i);
}
if (c0 == c1) continue;
return false;
}
return true;
}
public final static StringBody StringBody(final byte[] b) {
return StringBody(UTF8.String(b));
}

@ -39,7 +39,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
private final static Logger log = Logger.getLogger(AbstractSolrConnector.class);
public final SolrDocument POISON_DOCUMENT = new SolrDocument();
public final static SolrDocument POISON_DOCUMENT = new SolrDocument();
public final static String POISON_ID = "POISON_ID";
public final static SolrQuery catchallQuery = new SolrQuery();
static {
@ -90,7 +90,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
break;
}
}
try {queue.put(AbstractSolrConnector.this.POISON_DOCUMENT);} catch (InterruptedException e1) {}
try {queue.put(AbstractSolrConnector.POISON_DOCUMENT);} catch (InterruptedException e1) {}
}
};
t.start();

@ -27,6 +27,7 @@ import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import net.yacy.cora.federate.solr.SolrServlet;
import net.yacy.cora.federate.solr.YaCySchema;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
@ -182,16 +183,26 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
storage.mkdirs();
try {
EmbeddedSolrConnector solr = new EmbeddedSolrConnector(storage, solr_config);
solr.setCommitWithinMs(100);
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "ABCD0000abcd");
doc.addField("title", "Lorem ipsum");
doc.addField("text_t", "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
doc.addField(YaCySchema.id.name(), "ABCD0000abcd");
doc.addField(YaCySchema.title.name(), "Lorem ipsum");
doc.addField(YaCySchema.host_s.name(), "yacy.net");
doc.addField(YaCySchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solr.add(doc);
SolrServlet.startServer("/solr", 8091, solr);
SolrDocumentList searchresult = solr.query("text_t:tempor", 0, 10);
for (SolrDocument d : searchresult) {
System.out.println(d.toString());
}
// start a server
SolrServlet.startServer("/solr", 8091, solr); // try http://localhost:8091/solr/select?q=*:*
// do a normal query
SolrDocumentList select = solr.query(YaCySchema.text_t.name() + ":tempor", 0, 10);
for (SolrDocument d : select) System.out.println("***TEST SELECT*** " + d.toString());
// do a facet query
select = solr.query(YaCySchema.text_t.name() + ":tempor", 0, 10);
for (SolrDocument d : select) System.out.println("***TEST SELECT*** " + d.toString());
// try http://127.0.0.1:8091/solr/select?q=ping
try {Thread.sleep(1000 * 1000);} catch (InterruptedException e) {}
solr.close();

@ -25,7 +25,10 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.kelondro.util.MemoryControl;
@ -358,6 +361,27 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return count.get();
}
/**
* get a facet of the index: a list of values that are most common in a specific field
* @param field the field which is selected for the facet
* @param maxresults the maximum size of the resulting map
* @return an ordered map of fields
* @throws IOException
*/
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
if (this.solr0 != null && this.solr1 == null) {
return this.solr0.getFacet(field, maxresults);
}
if (this.solr1 != null && this.solr0 == null) {
return this.solr1.getFacet(field, maxresults);
}
ReversibleScoreMap<String> facet0 = this.solr0.getFacet(field, maxresults);
ReversibleScoreMap<String> facet1 = this.solr1.getFacet(field, maxresults);
for (String key: facet1) facet0.inc(key, facet1.get(key));
return facet0;
}
private void addToCache(SolrDocumentList list) {
if (MemoryControl.shortStatus()) clearCache();
for (final SolrDocument solrdoc: list) {

@ -25,6 +25,8 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -163,12 +165,16 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
return this.solr.query(querystring, offset, count);
}
@Override
public long getQueryCount(final String querystring) throws IOException {
return this.solr.getQueryCount(querystring);
}
@Override
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
return this.solr.getFacet(field, maxresults);
}
@Override
public long getSize() {
return this.solr.getSize();

@ -24,6 +24,10 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -198,6 +202,21 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
return 0;
}
@Override
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.getFacet(field, maxresults);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
return new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
}
@Override
public long getSize() {
final long t = System.currentTimeMillis() + this.retryMaxTime;

@ -27,6 +27,9 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.protocol.Domains;
import org.apache.solr.common.SolrDocument;
@ -206,6 +209,22 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
return count.get();
}
/**
* get a facet of the index: a list of values that are most common in a specific field
* @param field the field which is selected for the facet
* @param maxresults the maximum size of the resulting map
* @return an ordered map of fields
* @throws IOException
*/
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
ReversibleScoreMap<String> acc = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
for (final SolrConnector connector: this.connectors) {
ReversibleScoreMap<String> peer = connector.getFacet(field, maxresults);
for (String key: peer) acc.inc(key, peer.get(key));
}
return acc;
}
public long[] getSizeList() {
final long[] size = new long[this.connectors.size()];
int i = 0;

@ -25,6 +25,8 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -117,9 +119,18 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
*/
public long getQueryCount(final String querystring) throws IOException;
/**
* get a facet of the index: a list of values that are most common in a specific field
* @param field the field which is selected for the facet
* @param maxresults the maximum size of the resulting map
* @return an ordered map of fields
* @throws IOException
*/
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException;
/**
* Get a query result from solr as a stream of documents.
* The result queue is considered as terminated if AbstractSolrConnectro.POISON_DOCUMENT is returned.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
* The method returns immediately and feeds the search results into the queue
* @param querystring
* @param offset

@ -26,21 +26,29 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.logging.Log;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
private final static Logger log = Logger.getLogger(SolrServerConnector.class);
protected SolrServer server;
protected int commitWithinMs; // max time (in ms) before a commit will happen
@ -81,9 +89,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server != null) synchronized (this.server) {this.server.commit();}
this.server = null;
} catch (SolrServerException e) {
Log.logException(e);
log.warn(e);
} catch (IOException e) {
Log.logException(e);
log.warn(e);
}
}
@ -96,7 +104,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (docs == null) return 0;
return docs.getNumFound();
} catch (final Throwable e) {
Log.logException(e);
log.warn(e);
return 0;
}
}
@ -181,7 +189,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
//this.server.commit();
}
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdoc.toString());
log.warn(e.getMessage() + " DOC=" + solrdoc.toString());
throw new IOException(e);
}
}
@ -196,7 +204,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
//this.server.commit();
}
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdocs.toString());
log.warn(e.getMessage() + " DOC=" + solrdocs.toString());
throw new IOException(e);
}
}
@ -214,6 +222,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setQuery(querystring);
params.setRows(count);
params.setStart(offset);
params.setFacet(false);
//params.addSortField( "price", SolrQuery.ORDER.asc );
// query the server
@ -222,6 +231,12 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
return docs;
}
/**
* get the number of results when this query is done.
* This should only be called if the actual result is never used, and only the count is interesting
* @param querystring
* @return the number of results for this query
*/
@Override
public long getQueryCount(String querystring) throws IOException {
// construct query
@ -229,12 +244,41 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setQuery(querystring);
params.setRows(0);
params.setStart(0);
params.setFacet(false);
// query the server
QueryResponse rsp = query(params);
final SolrDocumentList docs = rsp.getResults();
return docs.getNumFound();
}
/**
* get a facet of the index: a list of values that are most common in a specific field
* @param field the field which is selected for the facet
* @param maxresults the maximum size of the resulting map
* @return an ordered map of fields
* @throws IOException
*/
@Override
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException {
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery("*:*");
params.setRows(0);
params.setStart(0);
params.setFacet(true);
params.setFacetLimit(maxresults);
params.setFacetSort(FacetParams.FACET_SORT_COUNT);
params.addFacetField(field);
// query the server
QueryResponse rsp = query(params);
FacetField facet = rsp.getFacetField(field);
List<Count> values = facet.getValues();
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
for (Count ff: values) result.set(ff.getName(), (int) ff.getCount());
return result;
}
abstract public QueryResponse query(SolrParams params) throws IOException;

@ -393,7 +393,10 @@ public final class Switchboard extends serverSwitch
// new features are always activated by default (if activated in input-backupScheme)
solrScheme.fill(backupScheme, true);
// switch on some fields which are necessary for ranking and faceting
for (YaCySchema field: new YaCySchema[]{YaCySchema.url_file_ext_s, YaCySchema.last_modified}) {
for (YaCySchema field: new YaCySchema[]{
YaCySchema.url_file_ext_s, YaCySchema.last_modified,
YaCySchema.url_paths_sxt, YaCySchema.host_organization_s
}) {
ConfigurationSet.Entry entry = solrScheme.get(field.name()); entry.setEnable(true); solrScheme.put(field.name(), entry);
}
solrScheme.commit();

Loading…
Cancel
Save