From b9dfca4b0a55a6070e7fac89594389e2d3ca5277 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 25 Jun 2012 11:34:38 +0200 Subject: [PATCH] - fixed IndexFederated Servlet / a embedded Solr can now be selected - added code stub for an embedded Solr but generation of Solr store is still commented out (it works but is not yet ready for usage) --- defaults/yacy.init | 7 ++- htroot/IndexControlRWIs_p.java | 4 +- htroot/IndexFederated_p.html | 22 ++++----- htroot/IndexFederated_p.java | 31 +++++++------ source/de/anomic/crawler/CrawlQueues.java | 8 ++-- source/net/yacy/search/Switchboard.java | 46 ++++++++++--------- .../yacy/search/index/MetadataRepository.java | 31 +++++++++---- source/net/yacy/search/index/Segment.java | 17 +++++-- .../net/yacy/search/query/SnippetProcess.java | 2 +- .../search/solr/EmbeddedSolrConnector.java | 3 -- 10 files changed, 100 insertions(+), 71 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index b09bed54f..38de012bb 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1047,12 +1047,17 @@ color_searchurlhover = #008000 # - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar' # - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes. # - to check whats in solr after indexing, open http://localhost:8983/solr/admin/ -federated.service.yacy.indexing.enabled = true federated.service.solr.indexing.enabled = false federated.service.solr.indexing.url = http://127.0.0.1:8983/solr federated.service.solr.indexing.sharding = MODULO_HOST_MD5 federated.service.solr.indexing.schemefile = solr.keys.default.list +# the indexing engine in YaCy can be switched off or on +# (off may make sense if federated.service.solr.indexing.enabled = true) +# for experiments the value federated.service.yacy.indexing.engine = solr may be used +# allowed values are: classic, solr, off +federated.service.yacy.indexing.engine = classic + # RDF triplestore settings triplestore.persistent = true diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 4a8733f12..aa1997815 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -92,7 +92,7 @@ public class IndexControlRWIs_p prop.put("keyhash", ""); prop.put("result", ""); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); - prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null + prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1); String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); @@ -180,7 +180,7 @@ public class IndexControlRWIs_p if ( post.get("deleteSolr", "").equals("on") && sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) { try { - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().clear(); + sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().clear(); } catch ( final Exception e ) { Log.logException(e); } diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html index 9089f0029..dbf4cad51 100644 --- a/htroot/IndexFederated_p.html +++ b/htroot/IndexFederated_p.html @@ -21,22 +21,16 @@
- - + The built-in search index can either be 'classic' (as before YaCy 1.03), 'solr' (experimental since 1.03) and 'off' (useful only if a remote solr index is used) - You can just switch on or off this index. If you switch it off, you will not be able to search with YaCy any more. -
- - -
- - - - - Experimental embedded solr index. +
+
embedded 'classic' search index
+
embedded solr search index
+
no local index
+
+
- - +
diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index 0d1094a4d..79ce3dbc4 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -30,17 +30,17 @@ import java.util.Iterator; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.services.federated.solr.SolrConnector; -import net.yacy.cora.services.federated.solr.ShardSolrConnector; import net.yacy.cora.services.federated.solr.ShardSelection; +import net.yacy.cora.services.federated.solr.ShardSolrConnector; import net.yacy.cora.services.federated.solr.SingleSolrConnector; +import net.yacy.cora.services.federated.solr.SolrConnector; import net.yacy.cora.storage.ConfigurationSet; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; +import net.yacy.search.index.SolrField; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import net.yacy.search.index.SolrField; public class IndexFederated_p { @@ -51,11 +51,12 @@ public class IndexFederated_p { if (post != null && post.containsKey("set")) { // yacy - env.setConfig("federated.service.yacy.indexing.enabled", post.getBoolean("yacy.indexing.enabled", false)); + String localindex = post.get("yacy.indexing", "off"); + env.setConfig("federated.service.yacy.indexing.engine", localindex); // solr final boolean solrWasOn = env.getConfigBool("federated.service.solr.indexing.enabled", true); - final boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.enabled", false); + final boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.solrremote", false); env.setConfig("federated.service.solr.indexing.enabled", solrIsOnAfterwards); String solrurls = post.get("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr")); final BufferedReader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(UTF8.getBytes(solrurls)))); @@ -81,18 +82,18 @@ public class IndexFederated_p { if (solrWasOn) { // switch off - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().close(); - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null); + sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().close(); + sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); } if (solrIsOnAfterwards) { // switch on final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0; try { - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr((usesolr) ? new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true) : null); + sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr((usesolr) ? new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true) : null); } catch (final IOException e) { Log.logException(e); - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null); + sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); } } @@ -127,11 +128,11 @@ public class IndexFederated_p { } // show solr host table - if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null) { + if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null) { prop.put("table", 0); } else { prop.put("table", 1); - final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr(); + final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr(); final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()}; final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()}; boolean dark = false; @@ -171,8 +172,12 @@ public class IndexFederated_p { prop.put("scheme", c); // fill attribute fields - prop.put("yacy.indexing.enabled.checked", env.getConfigBool("federated.service.yacy.indexing.enabled", true) ? 1 : 0); - prop.put("solr.indexing.enabled.checked", env.getConfigBool("federated.service.solr.indexing.enabled", false) ? 1 : 0); + // allowed values are: classic, solr, off + // federated.service.yacy.indexing.engine = classic + prop.put("yacy.indexing.engine.classic.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("classic") ? 1 : 0); + prop.put("yacy.indexing.engine.solr.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr") ? 1 : 0); + prop.put("yacy.indexing.engine.off.checked", env.getConfig("federated.service.yacy.indexing.engine", "classic").equals("off") ? 1 : 0); + prop.put("solr.indexing.solrremote.checked", env.getConfigBool("federated.service.solr.indexing.enabled", false) ? 1 : 0); prop.put("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr").replace(",", "\n")); prop.put("solr.indexing.sharding", env.getConfig("federated.service.solr.indexing.sharding", "modulo-host-md5")); prop.put("solr.indexing.schemefile", schemename); diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index b11859dbc..319854f13 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -82,8 +82,8 @@ public class CrawlQueues { this.log.logConfig("Starting Crawling Management"); this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727); FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME)); - this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); - this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); + this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); + this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); } public void relocate(final File newQueuePath) { @@ -94,8 +94,8 @@ public class CrawlQueues { this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727); FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME)); - this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); - this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); + this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); + this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); } public synchronized void close() { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 6d1045419..7c9d80ccf 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -408,15 +408,16 @@ public final class Switchboard extends serverSwitch final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr"); final boolean usesolr = getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0; - try { - this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr( - (usesolr) ? new ShardSolrConnector( - solrurls, - ShardSelection.Method.MODULO_HOST_MD5, - 10000, true) : null); - } catch ( final IOException e ) { - Log.logException(e); - this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null); + if (usesolr && solrurls != null && solrurls.length() > 0) { + try { + this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr( + new ShardSolrConnector( + solrurls, + ShardSelection.Method.MODULO_HOST_MD5, + 10000, true)); + } catch ( final IOException e ) { + Log.logException(e); + } } // initialize network database @@ -2435,8 +2436,18 @@ public final class Switchboard extends serverSwitch public indexingQueueEntry condenseDocument(final indexingQueueEntry in) { in.queueEntry.updateStatus(Response.QUEUE_STATE_CONDENSING); - if ( this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() != null - && getConfigBool("federated.service.solr.indexing.enabled", false)/*in.queueEntry.profile().pushSolr()*/) { + if ( !in.queueEntry.profile().indexText() && !in.queueEntry.profile().indexMedia() ) { + if ( this.log.isInfo() ) { + this.log.logInfo("Not Condensed Resource '" + + in.queueEntry.url().toNormalform(false, true) + + "': indexing not wanted by crawl profile"); + } + return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); + } + + boolean localSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr"); + boolean remoteSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false); + if (localSolr || remoteSolr) { // send the documents to solr for ( final Document doc : in.documents ) { try { @@ -2455,7 +2466,8 @@ public final class Switchboard extends serverSwitch } try { SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc); - this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().add(solrDoc); + if (localSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr().add(solrDoc); + if (remoteSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().add(solrDoc); } catch ( final IOException e ) { Log.logWarning( "SOLR", @@ -2472,7 +2484,7 @@ public final class Switchboard extends serverSwitch } // check if we should accept the document for our index - if ( !getConfigBool("federated.service.yacy.indexing.enabled", false) ) { + if (!getConfig("federated.service.yacy.indexing.engine", "classic").equals("classic")) { if ( this.log.isInfo() ) { this.log.logInfo("Not Condensed Resource '" + in.queueEntry.url().toNormalform(false, true) @@ -2480,14 +2492,6 @@ public final class Switchboard extends serverSwitch } return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); } - if ( !in.queueEntry.profile().indexText() && !in.queueEntry.profile().indexMedia() ) { - if ( this.log.isInfo() ) { - this.log.logInfo("Not Condensed Resource '" - + in.queueEntry.url().toNormalform(false, true) - + "': indexing not wanted by crawl profile"); - } - return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); - } final List doclist = new ArrayList(); // check which files may take part in the indexing process diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index 0ce804a23..6ae2f93eb 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -61,6 +61,8 @@ import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.util.MemoryControl; import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType; +import net.yacy.search.Switchboard; +import net.yacy.search.solr.EmbeddedSolrConnector; import de.anomic.crawler.CrawlStacker; public final class MetadataRepository implements /*Metadata,*/ Iterable { @@ -71,7 +73,7 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable private final File location; private final String tablename; private ArrayList statsDump; - private SolrConnector solr; + private SolrConnector localSolr, remoteSolr; public MetadataRepository( final File path, @@ -85,15 +87,27 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable this.urlIndexFile = backupIndex; //new Cache(backupIndex, 20000000, 20000000); this.exportthread = null; // will have a export thread assigned if exporter is running this.statsDump = null; - this.solr = null; + this.remoteSolr = null; + this.localSolr = null; } - public void connectSolr(final SolrConnector solr) { - this.solr = solr; + public void connectRemoteSolr(final SolrConnector solr) { + this.remoteSolr = solr; } - public SolrConnector getSolr() { - return this.solr; + public void connectLocalSolr() throws IOException { + File solrLocation = this.location; + if (solrLocation.getName().equals("default")) solrLocation = solrLocation.getParentFile(); + solrLocation = new File(solrLocation, "solr"); + this.localSolr = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath,"defaults"), "solr")); + } + + public SolrConnector getRemoteSolr() { + return this.remoteSolr; + } + + public SolrConnector getLocalSolr() { + return this.localSolr; } public void clearCache() { @@ -123,7 +137,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable this.urlIndexFile.close(); this.urlIndexFile = null; } - if (this.solr != null) this.solr.close(); + if (this.remoteSolr != null) this.remoteSolr.close(); + if (this.localSolr != null) this.localSolr.close(); } public int writeCacheSize() { @@ -207,7 +222,7 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable public boolean exists(final byte[] urlHash) { if (urlHash == null) return false; try { - if (this.solr != null && this.solr.exists(ASCII.String(urlHash))) { + if (this.remoteSolr != null && this.remoteSolr.exists(ASCII.String(urlHash))) { return true; } } catch (final Throwable e) { diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 210a3f280..cbf35264d 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -152,14 +152,23 @@ public class Segment { // create LURL-db this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727); + //this.connectLocalSolr(); } - public void connectSolr(final SolrConnector solr) { - this.urlMetadata.connectSolr(solr); + public void connectRemoteSolr(final SolrConnector solr) { + this.urlMetadata.connectRemoteSolr(solr); } - public SolrConnector getSolr() { - return this.urlMetadata.getSolr(); + public void connectLocalSolr() throws IOException { + this.urlMetadata.connectLocalSolr(); + } + + public SolrConnector getRemoteSolr() { + return this.urlMetadata.getRemoteSolr(); + } + + public SolrConnector getLocalSolr() { + return this.urlMetadata.getLocalSolr(); } public MetadataRepository urlMetadata() { diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index b5d8f8611..16fa751da 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -448,7 +448,7 @@ public class SnippetProcess { this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; this.shallrun = true; - this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getSolr(); + this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getRemoteSolr(); } @Override diff --git a/source/net/yacy/search/solr/EmbeddedSolrConnector.java b/source/net/yacy/search/solr/EmbeddedSolrConnector.java index 496f91574..89d982718 100644 --- a/source/net/yacy/search/solr/EmbeddedSolrConnector.java +++ b/source/net/yacy/search/solr/EmbeddedSolrConnector.java @@ -32,10 +32,7 @@ import net.yacy.cora.services.federated.solr.SolrDoc; import net.yacy.kelondro.logging.Log; import net.yacy.search.index.SolrField; -import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; -import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.core.CoreContainer;