diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html index 77abc40a3..c9d363ae9 100644 --- a/htroot/IndexControlURLs_p.html +++ b/htroot/IndexControlURLs_p.html @@ -125,6 +125,26 @@ function updatepage(str) { #(/statisticslines)# + #(dumprestore)#:: +
Sequential List of URL-Hashes: #{rows}# #{cols}##[urlHash]# #{/cols}# diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index 6b106b7bf..8fdc39b9c 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -29,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.util.Iterator; +import java.util.List; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; @@ -67,6 +68,12 @@ public class IndexControlURLs_p { prop.put("statistics_lines", 100); prop.put("statisticslines", 0); prop.put("reload", 0); + prop.put("indexdump", 0); + prop.put("lurlexport", 0); + prop.put("reload", 0); + prop.put("dumprestore", 1); + List dumpFiles = segment.fulltext().dumpFiles(); + prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath()); // show export messages final Fulltext.Export export = segment.fulltext().export(); @@ -127,8 +134,6 @@ public class IndexControlURLs_p { if (post.containsKey("urlhashdeleteall")) { int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST); prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); - prop.put("lurlexport", 0); - prop.put("reload", 0); } if (post.containsKey("urlhashdelete")) { @@ -141,8 +146,6 @@ public class IndexControlURLs_p { sb.urlRemove(segment, urlhash.getBytes()); prop.putHTML("result", "Removed URL " + urlstring); } - prop.put("lurlexport", 0); - prop.put("reload", 0); } if (post.containsKey("urldelete")) { @@ -157,8 +160,6 @@ public class IndexControlURLs_p { sb.urlRemove(segment, urlhash.getBytes()); prop.putHTML("result", "Removed URL " + urlstring); } - prop.put("lurlexport", 0); - prop.put("reload", 0); } if (post.containsKey("urlstringsearch")) { @@ -179,8 +180,6 @@ public class IndexControlURLs_p { prop.putHTML("result", "bad url: " + urlstring); prop.put("urlhash", ""); } - prop.put("lurlexport", 0); - prop.put("reload", 0); } if (post.containsKey("urlhashsearch")) { @@ -192,8 +191,6 @@ public class IndexControlURLs_p { prop.putAll(genUrlProfile(segment, entry, urlhash)); prop.put("statistics", 0); } - prop.put("lurlexport", 0); - prop.put("reload", 0); } // generate list @@ -218,8 +215,6 @@ public class IndexControlURLs_p { prop.put("statistics", 0); prop.put("urlhashsimilar_rows", rows); prop.put("result", result.toString()); - prop.put("lurlexport", 0); - prop.put("reload", 0); } if (post.containsKey("lurlexport")) { @@ -232,23 +227,34 @@ public class IndexControlURLs_p { if (fname.endsWith("rss")) format = 2; // extend export file name - String s = post.get("exportfile", ""); - if (s.indexOf('.',0) < 0) { - if (format == 0) s = s + ".txt"; - if (format == 1) s = s + ".html"; - if (format == 2) s = s + ".xml"; - } - final File f = new File(s); - f.getParentFile().mkdirs(); - final String filter = post.get("exportfilter", ".*"); - final Fulltext.Export running = segment.fulltext().export(f, filter, null, format, dom); + String s = post.get("exportfile", ""); + if (s.indexOf('.',0) < 0) { + if (format == 0) s = s + ".txt"; + if (format == 1) s = s + ".html"; + if (format == 2) s = s + ".xml"; + } + final File f = new File(s); + f.getParentFile().mkdirs(); + final String filter = post.get("exportfilter", ".*"); + final Fulltext.Export running = segment.fulltext().export(f, filter, null, format, dom); - prop.put("lurlexport_exportfile", s); - prop.put("lurlexport_urlcount", running.count()); - if ((running != null) && (running.failed() == null)) { - prop.put("lurlexport", 2); - } - prop.put("reload", 1); + prop.put("lurlexport_exportfile", s); + prop.put("lurlexport_urlcount", running.count()); + if ((running != null) && (running.failed() == null)) { + prop.put("lurlexport", 2); + } + prop.put("reload", 1); + } + + if (post.containsKey("indexdump")) { + final File dump = segment.fulltext().dumpSolr(); + prop.put("indexdump", 1); + prop.put("indexdump_dumpfile", dump.getAbsolutePath()); + } + + if (post.containsKey("indexrestore")) { + final File dump = new File(post.get("dumpfile", "")); + segment.fulltext().restoreSolr(dump); } if (post.containsKey("deletedomain")) { @@ -261,7 +267,6 @@ public class IndexControlURLs_p { } // trigger the loading of the table post.put("statistics", ""); - prop.put("reload", 0); } if (post.containsKey("statistics")) { @@ -289,8 +294,6 @@ public class IndexControlURLs_p { } prop.put("statisticslines_domains", cnt); prop.put("statisticslines", 1); - prop.put("lurlexport", 0); - prop.put("reload", 0); } // insert constants diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html index c0e3741b6..3d5de5b18 100644 --- a/htroot/IndexFederated_p.html +++ b/htroot/IndexFederated_p.html @@ -23,7 +23,7 @@ Local Search Index - This is an experimental switchboard to test an index migration from embedded metadata to embedded solr. The 'classic' configuration is rwi + metadata switched on. The future configuration is rwi + solr switched on. + This is a switchboard for the usage of embedded metadata to embedded solr. The rwi index is necessary for index transmission and shall be switched off in future portalmode configurations. url metadata and embedded solr fulltext search index, interface at: /solr/select?q=*:*&start=0&rows=10 diff --git a/source/net/yacy/cora/services/federated/solr/EmbeddedSolrConnector.java b/source/net/yacy/cora/services/federated/solr/EmbeddedSolrConnector.java index ec406cd95..fae35aaa6 100644 --- a/source/net/yacy/cora/services/federated/solr/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/services/federated/solr/EmbeddedSolrConnector.java @@ -60,10 +60,12 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo private final SolrCore defaultCore; protected SolrRequestParsers adminRequestParser; private final SearchHandler requestHandler; + private final File storagePath; public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException { super(); // copy the solrconfig.xml to the storage path + this.storagePath = storagePath; File conf = new File(storagePath, "conf"); conf.mkdirs(); File source, target; @@ -106,6 +108,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName)); } + public File getStoragePath() { + return this.storagePath; + } + public SolrCore getCore() { return this.defaultCore; } diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 09b622ba0..dfc8fea67 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -30,9 +30,11 @@ import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.TreeSet; +import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.order.CloneableIterator; @@ -43,6 +45,8 @@ import net.yacy.cora.services.federated.yacy.YaCySchema; import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.storage.HandleSet; +import net.yacy.cora.storage.ZIPReader; +import net.yacy.cora.storage.ZIPWriter; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; @@ -447,7 +451,63 @@ public final class Fulltext implements Iterable { } }; } - + + public List dumpFiles() { + EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); + File storagePath = esc.getStoragePath().getParentFile(); + ArrayList zips = new ArrayList(); + for (String p: storagePath.list()) { + if (p.endsWith("zip")) zips.add(new File(storagePath, p)); + } + return zips; + } + + /** + * create a dump file from the current solr directory + * @return + */ + public File dumpSolr() { + EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); + int commitWithin = esc.getCommitWithinMs(); + File storagePath = esc.getStoragePath(); + this.disconnectLocalSolr(); + File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip"); + try { + ZIPWriter.zip(storagePath, zipOut); + } catch (IOException e) { + Log.logException(e); + } finally { + try { + this.connectLocalSolr(commitWithin); + } catch (IOException e) { + Log.logException(e); + } + } + return zipOut; + } + + /** + * restore a solr dump to the current solr directory + * @param solrDumpZipFile + */ + public void restoreSolr(File solrDumpZipFile) { + EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); + int commitWithin = esc.getCommitWithinMs(); + File storagePath = esc.getStoragePath(); + this.disconnectLocalSolr(); + try { + ZIPReader.unzip(solrDumpZipFile, storagePath); + } catch (IOException e) { + Log.logException(e); + } finally { + try { + this.connectLocalSolr(commitWithin); + } catch (IOException e) { + Log.logException(e); + } + } + } + // export methods public Export export(final File f, final String filter, final HandleSet set, final int format, final boolean dom) { if ((this.exportthread != null) && (this.exportthread.isAlive())) {