- fixed IndexFederated Servlet / a embedded Solr can now be selected

- added code stub for an embedded Solr but generation of Solr store is
still commented out (it works but is not yet ready for usage)
pull/1/head
Michael Peter Christen 13 years ago
parent cc1b6762bb
commit b9dfca4b0a

@ -1047,12 +1047,17 @@ color_searchurlhover = #008000
# - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar'
# - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes.
# - to check whats in solr after indexing, open http://localhost:8983/solr/admin/
federated.service.yacy.indexing.enabled = true
federated.service.solr.indexing.enabled = false
federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
federated.service.solr.indexing.sharding = MODULO_HOST_MD5
federated.service.solr.indexing.schemefile = solr.keys.default.list
# the indexing engine in YaCy can be switched off or on
# (off may make sense if federated.service.solr.indexing.enabled = true)
# for experiments the value federated.service.yacy.indexing.engine = solr may be used
# allowed values are: classic, solr, off
federated.service.yacy.indexing.engine = classic
# RDF triplestore settings
triplestore.persistent = true

@ -92,7 +92,7 @@ public class IndexControlRWIs_p
prop.put("keyhash", "");
prop.put("result", "");
prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null
prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null
|| !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
@ -180,7 +180,7 @@ public class IndexControlRWIs_p
if ( post.get("deleteSolr", "").equals("on")
&& sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) {
try {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().clear();
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().clear();
} catch ( final Exception e ) {
Log.logException(e);
}

@ -21,22 +21,16 @@
<form action="IndexFederated_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend>
<input type="checkbox" name="yacy.indexing.classic" id="yacy.indexing.classic" #(yacy.indexing.classic.checked)#:: checked="checked"#(/yacy.indexing.classic.checked)# />
<label for="p2p">Embedded 'Classic YaCy' Search Index</label>
The built-in search index can either be 'classic' (as before YaCy 1.03), 'solr' (experimental since 1.03) and 'off' (useful only if a remote solr index is used)
</legend>
You can just switch on or off this index. If you switch it off, you will not be able to search with YaCy any more.
</fieldset>
<input type="submit" name="set" value="Set" />
<fieldset>
<legend>
<input type="checkbox" name="yacy.indexing.solrembedded" id=""yacy.indexing.solrembedded" #("yacy.indexing.solrembedded.checked)#:: checked="checked"#(/"yacy.indexing.solrembedded.checked)# />
<label for="p2p">Embedded Solr Search Index</label>
</legend>
Experimental embedded solr index.
<dl>
<dt><input type="radio" name="yacy.indexing" value="classic" id="yacy.indexing.engine.classic" #(yacy.indexing.engine.classic.checked)#:: checked="checked"#(/yacy.indexing.engine.classic.checked)# /></dt><dd>embedded 'classic' search index</dd>
<dt><input type="radio" name="yacy.indexing" value="solr" id="yacy.indexing.engine.solr" #(yacy.indexing.engine.solr.checked)#:: checked="checked"#(/yacy.indexing.engine.solr.checked)# /></dt><dd>embedded solr search index</dd>
<dt><input type="radio" name="yacy.indexing" value="off" id="yacy.indexing.engine.off" #(yacy.indexing.engine.off.checked)#:: checked="checked"#(/yacy.indexing.engine.off.checked)# /></dt><dd>no local index</dd>
<dt></dt><dd><input type="submit" name="set" value="Set" /></dd>
</dl>
</fieldset>
<input type="submit" name="set" value="Set" />
<fieldset>
<legend>
<input type="checkbox" name="solr.indexing.solrremote" id="solr.indexing.solrremote" #(solr.indexing.solrremote.checked)#:: checked="checked"#(/solr.indexing.solrremote.checked)# />

@ -30,17 +30,17 @@ import java.util.Iterator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.services.federated.solr.ShardSolrConnector;
import net.yacy.cora.services.federated.solr.ShardSelection;
import net.yacy.cora.services.federated.solr.ShardSolrConnector;
import net.yacy.cora.services.federated.solr.SingleSolrConnector;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import net.yacy.search.index.SolrField;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import net.yacy.search.index.SolrField;
public class IndexFederated_p {
@ -51,11 +51,12 @@ public class IndexFederated_p {
if (post != null && post.containsKey("set")) {
// yacy
env.setConfig("federated.service.yacy.indexing.enabled", post.getBoolean("yacy.indexing.enabled", false));
String localindex = post.get("yacy.indexing", "off");
env.setConfig("federated.service.yacy.indexing.engine", localindex);
// solr
final boolean solrWasOn = env.getConfigBool("federated.service.solr.indexing.enabled", true);
final boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.enabled", false);
final boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.solrremote", false);
env.setConfig("federated.service.solr.indexing.enabled", solrIsOnAfterwards);
String solrurls = post.get("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr"));
final BufferedReader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(UTF8.getBytes(solrurls))));
@ -81,18 +82,18 @@ public class IndexFederated_p {
if (solrWasOn) {
// switch off
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().close();
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().close();
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null);
}
if (solrIsOnAfterwards) {
// switch on
final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
try {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr((usesolr) ? new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true) : null);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr((usesolr) ? new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true) : null);
} catch (final IOException e) {
Log.logException(e);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null);
}
}
@ -127,11 +128,11 @@ public class IndexFederated_p {
}
// show solr host table
if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null) {
if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null) {
prop.put("table", 0);
} else {
prop.put("table", 1);
final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr();
final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr();
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()};
boolean dark = false;
@ -171,8 +172,12 @@ public class IndexFederated_p {
prop.put("scheme", c);
// fill attribute fields
prop.put("yacy.indexing.enabled.checked", env.getConfigBool("federated.service.yacy.indexing.enabled", true) ? 1 : 0);
prop.put("solr.indexing.enabled.checked", env.getConfigBool("federated.service.solr.indexing.enabled", false) ? 1 : 0);
// allowed values are: classic, solr, off
// federated.service.yacy.indexing.engine = classic
prop.put("yacy.indexing.engine.classic.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("classic") ? 1 : 0);
prop.put("yacy.indexing.engine.solr.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr") ? 1 : 0);
prop.put("yacy.indexing.engine.off.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("off") ? 1 : 0);
prop.put("solr.indexing.solrremote.checked", env.getConfigBool("federated.service.solr.indexing.enabled", false) ? 1 : 0);
prop.put("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr").replace(",", "\n"));
prop.put("solr.indexing.sharding", env.getConfig("federated.service.solr.indexing.sharding", "modulo-host-md5"));
prop.put("solr.indexing.schemefile", schemename);

@ -82,8 +82,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -94,8 +94,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public synchronized void close() {

@ -408,15 +408,16 @@ public final class Switchboard extends serverSwitch
final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
final boolean usesolr = getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
try {
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(
(usesolr) ? new ShardSolrConnector(
solrurls,
ShardSelection.Method.MODULO_HOST_MD5,
10000, true) : null);
} catch ( final IOException e ) {
Log.logException(e);
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
if (usesolr && solrurls != null && solrurls.length() > 0) {
try {
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(
new ShardSolrConnector(
solrurls,
ShardSelection.Method.MODULO_HOST_MD5,
10000, true));
} catch ( final IOException e ) {
Log.logException(e);
}
}
// initialize network database
@ -2435,8 +2436,18 @@ public final class Switchboard extends serverSwitch
public indexingQueueEntry condenseDocument(final indexingQueueEntry in) {
in.queueEntry.updateStatus(Response.QUEUE_STATE_CONDENSING);
if ( this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() != null
&& getConfigBool("federated.service.solr.indexing.enabled", false)/*in.queueEntry.profile().pushSolr()*/) {
if ( !in.queueEntry.profile().indexText() && !in.queueEntry.profile().indexMedia() ) {
if ( this.log.isInfo() ) {
this.log.logInfo("Not Condensed Resource '"
+ in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by crawl profile");
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
}
boolean localSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr");
boolean remoteSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false);
if (localSolr || remoteSolr) {
// send the documents to solr
for ( final Document doc : in.documents ) {
try {
@ -2455,7 +2466,8 @@ public final class Switchboard extends serverSwitch
}
try {
SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().add(solrDoc);
if (localSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr().add(solrDoc);
if (remoteSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().add(solrDoc);
} catch ( final IOException e ) {
Log.logWarning(
"SOLR",
@ -2472,7 +2484,7 @@ public final class Switchboard extends serverSwitch
}
// check if we should accept the document for our index
if ( !getConfigBool("federated.service.yacy.indexing.enabled", false) ) {
if (!getConfig("federated.service.yacy.indexing.engine", "classic").equals("classic")) {
if ( this.log.isInfo() ) {
this.log.logInfo("Not Condensed Resource '"
+ in.queueEntry.url().toNormalform(false, true)
@ -2480,14 +2492,6 @@ public final class Switchboard extends serverSwitch
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
}
if ( !in.queueEntry.profile().indexText() && !in.queueEntry.profile().indexMedia() ) {
if ( this.log.isInfo() ) {
this.log.logInfo("Not Condensed Resource '"
+ in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by crawl profile");
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
}
final List<Document> doclist = new ArrayList<Document>();
// check which files may take part in the indexing process

@ -61,6 +61,8 @@ import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import de.anomic.crawler.CrawlStacker;
public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> {
@ -71,7 +73,7 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
private final File location;
private final String tablename;
private ArrayList<HostStat> statsDump;
private SolrConnector solr;
private SolrConnector localSolr, remoteSolr;
public MetadataRepository(
final File path,
@ -85,15 +87,27 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
this.urlIndexFile = backupIndex; //new Cache(backupIndex, 20000000, 20000000);
this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null;
this.solr = null;
this.remoteSolr = null;
this.localSolr = null;
}
public void connectSolr(final SolrConnector solr) {
this.solr = solr;
public void connectRemoteSolr(final SolrConnector solr) {
this.remoteSolr = solr;
}
public SolrConnector getSolr() {
return this.solr;
public void connectLocalSolr() throws IOException {
File solrLocation = this.location;
if (solrLocation.getName().equals("default")) solrLocation = solrLocation.getParentFile();
solrLocation = new File(solrLocation, "solr");
this.localSolr = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath,"defaults"), "solr"));
}
public SolrConnector getRemoteSolr() {
return this.remoteSolr;
}
public SolrConnector getLocalSolr() {
return this.localSolr;
}
public void clearCache() {
@ -123,7 +137,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
this.urlIndexFile.close();
this.urlIndexFile = null;
}
if (this.solr != null) this.solr.close();
if (this.remoteSolr != null) this.remoteSolr.close();
if (this.localSolr != null) this.localSolr.close();
}
public int writeCacheSize() {
@ -207,7 +222,7 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
public boolean exists(final byte[] urlHash) {
if (urlHash == null) return false;
try {
if (this.solr != null && this.solr.exists(ASCII.String(urlHash))) {
if (this.remoteSolr != null && this.remoteSolr.exists(ASCII.String(urlHash))) {
return true;
}
} catch (final Throwable e) {

@ -152,14 +152,23 @@ public class Segment {
// create LURL-db
this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727);
//this.connectLocalSolr();
}
public void connectSolr(final SolrConnector solr) {
this.urlMetadata.connectSolr(solr);
public void connectRemoteSolr(final SolrConnector solr) {
this.urlMetadata.connectRemoteSolr(solr);
}
public SolrConnector getSolr() {
return this.urlMetadata.getSolr();
public void connectLocalSolr() throws IOException {
this.urlMetadata.connectLocalSolr();
}
public SolrConnector getRemoteSolr() {
return this.urlMetadata.getRemoteSolr();
}
public SolrConnector getLocalSolr() {
return this.urlMetadata.getLocalSolr();
}
public MetadataRepository urlMetadata() {

@ -448,7 +448,7 @@ public class SnippetProcess {
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
this.shallrun = true;
this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getSolr();
this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getRemoteSolr();
}
@Override

@ -32,10 +32,7 @@ import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.SolrField;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.core.CoreContainer;

Loading…
Cancel
Save