- added solr core and libraries that solr needs (lucene is missing, will

follow later)
- added embedded solr connector which can connect to solr
programmatically (without using a server in between)
pull/1/head
Michael Peter Christen 13 years ago
parent 15f4551d88
commit 3f55dc7c1e

@ -45,8 +45,9 @@
<classpathentry kind="lib" path="lib/httpclient-4.2.jar"/> <classpathentry kind="lib" path="lib/httpclient-4.2.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.2.jar"/> <classpathentry kind="lib" path="lib/httpmime-4.2.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.1.jar"/> <classpathentry kind="lib" path="lib/commons-io-2.1.jar"/>
<classpathentry kind="lib" path="lib/apache-solr-solrj-3.6.0.jar" sourcepath="/solrj/src"/> <classpathentry kind="lib" path="lib/apache-solr-solrj-3.6.0.jar" sourcepath="/Users/admin/git/lucene-solr/solr/solrj/src/java"/>
<classpathentry kind="lib" path="lib/commons-compress-1.4.1.jar"/> <classpathentry kind="lib" path="lib/commons-compress-1.4.1.jar"/>
<classpathentry kind="lib" path="lib/apache-solr-core-3.6.0.jar" sourcepath="/Users/admin/git/lucene-solr/solr/core/src/java/"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/> <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="lib" path="lib/icu4j-core.jar"/> <classpathentry kind="lib" path="lib/icu4j-core.jar"/>
<classpathentry kind="lib" path="lib/htmllexer.jar"/> <classpathentry kind="lib" path="lib/htmllexer.jar"/>

@ -157,6 +157,7 @@
<pathelement location="${htroot}" /> <pathelement location="${htroot}" />
<pathelement location="${lib}/activation.jar" /> <pathelement location="${lib}/activation.jar" />
<pathelement location="${lib}/apache-mime4j-0.6.jar" /> <pathelement location="${lib}/apache-mime4j-0.6.jar" />
<pathelement location="${lib}/apache-solr-core-3.6.0.jar" />
<pathelement location="${lib}/apache-solr-solrj-3.6.0.jar" /> <pathelement location="${lib}/apache-solr-solrj-3.6.0.jar" />
<pathelement location="${lib}/arq-2.8.7.jar" /> <pathelement location="${lib}/arq-2.8.7.jar" />
<pathelement location="${lib}/bcmail-jdk15-145.jar" /> <pathelement location="${lib}/bcmail-jdk15-145.jar" />
@ -166,9 +167,11 @@
<pathelement location="${lib}/commons-fileupload-1.2.2.jar" /> <pathelement location="${lib}/commons-fileupload-1.2.2.jar" />
<pathelement location="${lib}/commons-io-2.1.jar" /> <pathelement location="${lib}/commons-io-2.1.jar" />
<pathelement location="${lib}/commons-jxpath-1.3.jar" /> <pathelement location="${lib}/commons-jxpath-1.3.jar" />
<pathelement location="${lib}/commons-lang-2.6.jar" />
<pathelement location="${lib}/commons-logging-1.1.1.jar" /> <pathelement location="${lib}/commons-logging-1.1.1.jar" />
<pathelement location="${lib}/fontbox-1.6.0.jar" /> <pathelement location="${lib}/fontbox-1.6.0.jar" />
<pathelement location="${lib}/geronimo-stax-api_1.0_spec-1.0.1.jar" /> <pathelement location="${lib}/geronimo-stax-api_1.0_spec-1.0.1.jar" />
<pathelement location="${lib}/guava-r05.jar" />
<pathelement location="${lib}/htmllexer.jar" /> <pathelement location="${lib}/htmllexer.jar" />
<pathelement location="${lib}/htmlparser.jar" /> <pathelement location="${lib}/htmlparser.jar" />
<pathelement location="${lib}/httpclient-4.2.jar" /> <pathelement location="${lib}/httpclient-4.2.jar" />
@ -185,6 +188,7 @@
<pathelement location="${lib}/jsch-0.1.42.jar" /> <pathelement location="${lib}/jsch-0.1.42.jar" />
<pathelement location="${lib}/json-simple-1.1.jar" /> <pathelement location="${lib}/json-simple-1.1.jar" />
<pathelement location="${lib}/log4j-1.2.16.jar" /> <pathelement location="${lib}/log4j-1.2.16.jar" />
<pathelement location="${lib}/log4j-over-slf4j-1.6.1.jar" />
<pathelement location="${lib}/metadata-extractor-2.4.0-beta-1.jar" /> <pathelement location="${lib}/metadata-extractor-2.4.0-beta-1.jar" />
<pathelement location="${lib}/mysql-connector-java-5.1.12-bin.jar" /> <pathelement location="${lib}/mysql-connector-java-5.1.12-bin.jar" />
<pathelement location="${lib}/pdfbox-1.6.0.jar" /> <pathelement location="${lib}/pdfbox-1.6.0.jar" />

Binary file not shown.

Binary file not shown.

@ -1,6 +1,6 @@
list of library-dependencies: list of library-dependencies:
* apache-solr-solrj-3.4.0.jar depens on: * apache-solr-solrj-3.4.0.jar depends on:
commons-codec-1.4.jar commons-codec-1.4.jar
commons-httpclient-3.1.jar commons-httpclient-3.1.jar
commons-io-1.4.jar commons-io-1.4.jar
@ -9,6 +9,22 @@ jcl-over-slf4j-1.6.1.jar
slf4j-api-1.6.1.jar slf4j-api-1.6.1.jar
wstx-asl-3.2.7.jar wstx-asl-3.2.7.jar
* apache-solr-core-3.6.0.jar depends on
commons-codec-1.6.jar
commons-fileupload-1.2.1.jar
commons-httpclient-3.1.jar
commons-io-2.1.jar
commons-lang-2.6.jar
geronimo-stax-api_1.0_spec-1.0.1.jar
guava-r05.jar
httpclient-4.2.jar
httpcore-4.2.jar
jcl-over-slf4j-1.6.1.jar
log4j-over-slf4j-1.6.1.jar
slf4j-api-1.6.1.jar
slf4j-jdk14-1.6.1.jar
wstx-asl-3.2.7.jar
* pdfbox-1.6.0.jar depends on: * pdfbox-1.6.0.jar depends on:
fontbox-1.6.0.jar fontbox-1.6.0.jar
jempbox-1.6.0.jar jempbox-1.6.0.jar

Binary file not shown.

Binary file not shown.

@ -0,0 +1,191 @@
/**
* AbstractSolrConnector
* Copyright 2012 by Michael Peter Christen
* First released 21.06.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.services.federated.solr;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.SolrField;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
public class AbstractSolrConnector implements SolrConnector {
protected SolrServer server;
protected AbstractSolrConnector() {
this.server = null;
}
protected void init(SolrServer server) {
this.server = server;
}
@Override
public synchronized void close() {
try {
this.server.commit();
} catch (SolrServerException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
}
}
@Override
public long getSize() {
try {
final SolrDocumentList list = get("*:*", 0, 1);
return list.getNumFound();
} catch (final Throwable e) {
Log.logException(e);
return 0;
}
}
/**
* delete everything in the solr index
* @throws IOException
*/
@Override
public void clear() throws IOException {
try {
this.server.deleteByQuery("*:*");
this.server.commit();
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void delete(final String id) throws IOException {
try {
this.server.deleteById(id);
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void delete(final List<String> ids) throws IOException {
try {
this.server.deleteById(ids);
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public boolean exists(final String id) throws IOException {
try {
final SolrDocumentList list = get(SolrField.id.getSolrFieldName() + ":" + id, 0, 1);
return list.getNumFound() > 0;
} catch (final Throwable e) {
Log.logException(e);
return false;
}
}
public void add(final File file, final String solrId) throws IOException {
final ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
up.addFile(file);
up.setParam("literal.id", solrId);
up.setParam("uprefix", "attr_");
up.setParam("fmap.content", "attr_content");
//up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
try {
this.server.request(up);
this.server.commit();
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void add(final SolrDoc solrdoc) throws IOException, SolrException {
try {
this.server.add(solrdoc,180000); // commitWithIn 180s
//this.server.commit();
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdoc.toString());
throw new IOException(e);
}
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrDoc d: solrdocs) l.add(d);
try {
this.server.add(l,180000); // commitWithIn 120s
//this.server.commit();
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdocs.toString());
throw new IOException(e);
}
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"
* @param querystring
* @throws IOException
*/
@Override
public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
// construct query
final SolrQuery query = new SolrQuery();
query.setQuery(querystring);
query.setRows(count);
query.setStart(offset);
//query.addSortField( "price", SolrQuery.ORDER.asc );
// query the server
//SearchResult result = new SearchResult(count);
try {
final QueryResponse rsp = this.server.query( query );
final SolrDocumentList docs = rsp.getResults();
return docs;
// add the docs into the YaCy search result container
/*
for (SolrDocument doc: docs) {
result.put(element)
}
*/
} catch (final Throwable e) {
throw new IOException(e);
}
//return result;
}
}

@ -27,13 +27,9 @@ package net.yacy.cora.services.federated.solr;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.InetAddress; import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.logging.Log;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope; import org.apache.http.auth.AuthScope;
@ -45,22 +41,13 @@ import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HttpContext; import org.apache.http.protocol.HttpContext;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import net.yacy.search.index.SolrField;
public class SolrSingleConnector implements SolrConnector { public class SolrSingleConnector extends AbstractSolrConnector implements SolrConnector {
private final String solrurl, host, solrpath, solraccount, solrpw; private final String solrurl, host, solrpath, solraccount, solrpw;
private final int port; private final int port;
private HttpSolrServer server;
/** /**
* create a new solr connector * create a new solr connector
@ -69,6 +56,7 @@ public class SolrSingleConnector implements SolrConnector {
* @throws IOException * @throws IOException
*/ */
public SolrSingleConnector(final String url) throws IOException { public SolrSingleConnector(final String url) throws IOException {
super();
this.solrurl = url; this.solrurl = url;
// connect using authentication // connect using authentication
@ -87,6 +75,7 @@ public class SolrSingleConnector implements SolrConnector {
this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1); this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1);
} }
} }
HttpSolrServer s;
if (this.solraccount.length() > 0) { if (this.solraccount.length() > 0) {
final DefaultHttpClient client = new DefaultHttpClient() { final DefaultHttpClient client = new DefaultHttpClient() {
@Override @Override
@ -103,155 +92,17 @@ public class SolrSingleConnector implements SolrConnector {
BasicCredentialsProvider credsProvider = new BasicCredentialsProvider(); BasicCredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(new AuthScope(this.host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(this.solraccount, this.solrpw)); credsProvider.setCredentials(new AuthScope(this.host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(this.solraccount, this.solrpw));
client.setCredentialsProvider(credsProvider); client.setCredentialsProvider(credsProvider);
this.server = new HttpSolrServer("http://" + this.host + ":" + this.port + this.solrpath, client); s = new HttpSolrServer("http://" + this.host + ":" + this.port + this.solrpath, client);
} else { } else {
this.server = new HttpSolrServer(this.solrurl); s = new HttpSolrServer(this.solrurl);
} }
this.server.setAllowCompression(true); s.setAllowCompression(true);
this.server.setConnectionTimeout(60000); s.setConnectionTimeout(60000);
this.server.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated) s.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated)
this.server.setSoTimeout(60000); s.setSoTimeout(60000);
super.init(s);
} }
@Override
public synchronized void close() {
try {
this.server.commit();
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public long getSize() {
try {
final SolrDocumentList list = get("*:*", 0, 1);
return list.getNumFound();
} catch (final Throwable e) {
Log.logException(e);
return 0;
}
}
/**
* delete everything in the solr index
* @throws IOException
*/
@Override
public void clear() throws IOException {
try {
this.server.deleteByQuery("*:*");
this.server.commit();
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void delete(final String id) throws IOException {
try {
this.server.deleteById(id);
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void delete(final List<String> ids) throws IOException {
try {
this.server.deleteById(ids);
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public boolean exists(final String id) throws IOException {
try {
final SolrDocumentList list = get(SolrField.id.getSolrFieldName() + ":" + id, 0, 1);
return list.getNumFound() > 0;
} catch (final Throwable e) {
Log.logException(e);
return false;
}
}
public void add(final File file, final String solrId) throws IOException {
final ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
up.addFile(file);
up.setParam("literal.id", solrId);
up.setParam("uprefix", "attr_");
up.setParam("fmap.content", "attr_content");
//up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
try {
this.server.request(up);
this.server.commit();
} catch (final Throwable e) {
throw new IOException(e);
}
}
@Override
public void add(final SolrDoc solrdoc) throws IOException, SolrException {
try {
this.server.add(solrdoc,180000); // commitWithIn 180s
//this.server.commit();
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdoc.toString());
throw new IOException(e);
}
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrDoc d: solrdocs) l.add(d);
try {
this.server.add(l,180000); // commitWithIn 120s
//this.server.commit();
} catch (SolrServerException e) {
Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdocs.toString());
throw new IOException(e);
}
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"
* @param querystring
* @throws IOException
*/
@Override
public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
// construct query
final SolrQuery query = new SolrQuery();
query.setQuery(querystring);
query.setRows(count);
query.setStart(offset);
//query.addSortField( "price", SolrQuery.ORDER.asc );
// query the server
//SearchResult result = new SearchResult(count);
try {
final QueryResponse rsp = this.server.query( query );
final SolrDocumentList docs = rsp.getResults();
return docs;
// add the docs into the YaCy search result container
/*
for (SolrDocument doc: docs) {
result.put(element)
}
*/
} catch (final Throwable e) {
throw new IOException(e);
}
//return result;
}
public String getAdminInterface() { public String getAdminInterface() {
final InetAddress localhostExternAddress = Domains.myPublicLocalIP(); final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress(); final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();

@ -351,6 +351,7 @@ public class Network
} }
} }
} catch ( final Exception e ) { } catch ( final Exception e ) {
Log.logException(e);
log.logSevere( log.logSevere(
"publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(), "publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(),
e); e);

@ -0,0 +1,58 @@
/**
* EmbeddedSolrConnector
* Copyright 2012 by Michael Peter Christen
* First released 21.06.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.search.solr;
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import net.yacy.cora.services.federated.solr.AbstractSolrConnector;
import net.yacy.cora.services.federated.solr.SolrConnector;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer;
import org.xml.sax.SAXException;
public class EmbeddedSolrConnector extends AbstractSolrConnector implements SolrConnector {
private final CoreContainer core;
public EmbeddedSolrConnector(File storagePath, File configFile) throws IOException {
super();
try {
this.core = new CoreContainer(storagePath.getAbsolutePath(), configFile);
} catch (ParserConfigurationException e) {
throw new IOException(e.getMessage(), e);
} catch (SAXException e) {
throw new IOException(e.getMessage(), e);
}
super.init(new EmbeddedSolrServer(this.core, "metadata"));
}
@Override
public void close() {
super.close();
this.core.shutdown();
}
}
Loading…
Cancel
Save