used the new zip writer/reader to add a solr dump process: the whole

solr index can be written to a zip dump and also restored during runtime
pull/1/head
Michael Peter Christen 13 years ago
parent 4a3e684f8c
commit 1b474139dd

@ -125,6 +125,26 @@ function updatepage(str) {
</table> </table>
#(/statisticslines)# #(/statisticslines)#
#(dumprestore)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Dump and Restore of Solr Index</legend>
<dl>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="indexdump" value="Create Dump" class="submitready" style="width:240px;"/>
</dd>
</dl>
<dl>
<dt class="TableCellDark">Dump File</dt>
<dd><input type="text" name="dumpfile" value="#[dumpfile]#" size="80" maxlength="250" />
</dd>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="indexrestore" value="Restore Dump" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>::
#(/dumprestore)#
#(lurlexport)#:: #(lurlexport)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8"> <form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Loaded URL Export</legend> <fieldset><legend>Loaded URL Export</legend>
@ -162,6 +182,10 @@ function updatepage(str) {
<div class="error">Export to file #[exportfile]# failed: #[exportfailmsg]#</div>:: <div class="error">Export to file #[exportfile]# failed: #[exportfailmsg]#</div>::
#(/lurlexporterror)# #(/lurlexporterror)#
#(indexdump)#::
<div class="commit">Stored a solr dump to file #[dumpfile]#</div>::
#(/indexdump)#
#(urlhashsimilar)#::<p>Sequential List of URL-Hashes:<br /> #(urlhashsimilar)#::<p>Sequential List of URL-Hashes:<br />
#{rows}# #{rows}#
#{cols}#<a href="/IndexControlURLs_p.html?urlhash=#[urlHash]#&amp;urlhashsearch=1" class="tt">#[urlHash]#</a> #{/cols}#<br /> #{cols}#<a href="/IndexControlURLs_p.html?urlhash=#[urlHash]#&amp;urlhashsearch=1" class="tt">#[urlHash]#</a> #{/cols}#<br />

@ -29,6 +29,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
@ -67,6 +68,12 @@ public class IndexControlURLs_p {
prop.put("statistics_lines", 100); prop.put("statistics_lines", 100);
prop.put("statisticslines", 0); prop.put("statisticslines", 0);
prop.put("reload", 0); prop.put("reload", 0);
prop.put("indexdump", 0);
prop.put("lurlexport", 0);
prop.put("reload", 0);
prop.put("dumprestore", 1);
List<File> dumpFiles = segment.fulltext().dumpFiles();
prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath());
// show export messages // show export messages
final Fulltext.Export export = segment.fulltext().export(); final Fulltext.Export export = segment.fulltext().export();
@ -127,8 +134,6 @@ public class IndexControlURLs_p {
if (post.containsKey("urlhashdeleteall")) { if (post.containsKey("urlhashdeleteall")) {
int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST); int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST);
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
if (post.containsKey("urlhashdelete")) { if (post.containsKey("urlhashdelete")) {
@ -141,8 +146,6 @@ public class IndexControlURLs_p {
sb.urlRemove(segment, urlhash.getBytes()); sb.urlRemove(segment, urlhash.getBytes());
prop.putHTML("result", "Removed URL " + urlstring); prop.putHTML("result", "Removed URL " + urlstring);
} }
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
if (post.containsKey("urldelete")) { if (post.containsKey("urldelete")) {
@ -157,8 +160,6 @@ public class IndexControlURLs_p {
sb.urlRemove(segment, urlhash.getBytes()); sb.urlRemove(segment, urlhash.getBytes());
prop.putHTML("result", "Removed URL " + urlstring); prop.putHTML("result", "Removed URL " + urlstring);
} }
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
if (post.containsKey("urlstringsearch")) { if (post.containsKey("urlstringsearch")) {
@ -179,8 +180,6 @@ public class IndexControlURLs_p {
prop.putHTML("result", "bad url: " + urlstring); prop.putHTML("result", "bad url: " + urlstring);
prop.put("urlhash", ""); prop.put("urlhash", "");
} }
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
if (post.containsKey("urlhashsearch")) { if (post.containsKey("urlhashsearch")) {
@ -192,8 +191,6 @@ public class IndexControlURLs_p {
prop.putAll(genUrlProfile(segment, entry, urlhash)); prop.putAll(genUrlProfile(segment, entry, urlhash));
prop.put("statistics", 0); prop.put("statistics", 0);
} }
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
// generate list // generate list
@ -218,8 +215,6 @@ public class IndexControlURLs_p {
prop.put("statistics", 0); prop.put("statistics", 0);
prop.put("urlhashsimilar_rows", rows); prop.put("urlhashsimilar_rows", rows);
prop.put("result", result.toString()); prop.put("result", result.toString());
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
if (post.containsKey("lurlexport")) { if (post.containsKey("lurlexport")) {
@ -251,6 +246,17 @@ public class IndexControlURLs_p {
prop.put("reload", 1); prop.put("reload", 1);
} }
if (post.containsKey("indexdump")) {
final File dump = segment.fulltext().dumpSolr();
prop.put("indexdump", 1);
prop.put("indexdump_dumpfile", dump.getAbsolutePath());
}
if (post.containsKey("indexrestore")) {
final File dump = new File(post.get("dumpfile", ""));
segment.fulltext().restoreSolr(dump);
}
if (post.containsKey("deletedomain")) { if (post.containsKey("deletedomain")) {
final String hp = post.get("hashpart"); final String hp = post.get("hashpart");
try { try {
@ -261,7 +267,6 @@ public class IndexControlURLs_p {
} }
// trigger the loading of the table // trigger the loading of the table
post.put("statistics", ""); post.put("statistics", "");
prop.put("reload", 0);
} }
if (post.containsKey("statistics")) { if (post.containsKey("statistics")) {
@ -289,8 +294,6 @@ public class IndexControlURLs_p {
} }
prop.put("statisticslines_domains", cnt); prop.put("statisticslines_domains", cnt);
prop.put("statisticslines", 1); prop.put("statisticslines", 1);
prop.put("lurlexport", 0);
prop.put("reload", 0);
} }
// insert constants // insert constants

@ -23,7 +23,7 @@
<legend> <legend>
Local Search Index Local Search Index
</legend> </legend>
This is an experimental switchboard to test an index migration from embedded metadata to embedded solr. The 'classic' configuration is rwi + metadata switched on. The future configuration is rwi + solr switched on. This is a switchboard for the usage of embedded metadata to embedded solr.
The rwi index is necessary for index transmission and shall be switched off in future portalmode configurations. The rwi index is necessary for index transmission and shall be switched off in future portalmode configurations.
<dl> <dl>
<dt><input type="checkbox" name="core.service.fulltext" id="core.service.fulltext" #(core.service.fulltext.checked)#:: checked="checked"#(/core.service.fulltext.checked)# /></dt><dd>url metadata and embedded solr fulltext search index, interface at: <a href="/solr/select?q=*:*&start=0&rows=10" target="_blank">/solr/select?q=*:*&amp;start=0&amp;rows=10</a></dd> <dt><input type="checkbox" name="core.service.fulltext" id="core.service.fulltext" #(core.service.fulltext.checked)#:: checked="checked"#(/core.service.fulltext.checked)# /></dt><dd>url metadata and embedded solr fulltext search index, interface at: <a href="/solr/select?q=*:*&start=0&rows=10" target="_blank">/solr/select?q=*:*&amp;start=0&amp;rows=10</a></dd>

@ -60,10 +60,12 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
private final SolrCore defaultCore; private final SolrCore defaultCore;
protected SolrRequestParsers adminRequestParser; protected SolrRequestParsers adminRequestParser;
private final SearchHandler requestHandler; private final SearchHandler requestHandler;
private final File storagePath;
public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException { public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException {
super(); super();
// copy the solrconfig.xml to the storage path // copy the solrconfig.xml to the storage path
this.storagePath = storagePath;
File conf = new File(storagePath, "conf"); File conf = new File(storagePath, "conf");
conf.mkdirs(); conf.mkdirs();
File source, target; File source, target;
@ -106,6 +108,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName)); super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName));
} }
public File getStoragePath() {
return this.storagePath;
}
public SolrCore getCore() { public SolrCore getCore() {
return this.defaultCore; return this.defaultCore;
} }

@ -30,9 +30,11 @@ import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.order.CloneableIterator;
@ -43,6 +45,8 @@ import net.yacy.cora.services.federated.yacy.YaCySchema;
import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.storage.ZIPReader;
import net.yacy.cora.storage.ZIPWriter;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -448,6 +452,62 @@ public final class Fulltext implements Iterable<byte[]> {
}; };
} }
public List<File> dumpFiles() {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
File storagePath = esc.getStoragePath().getParentFile();
ArrayList<File> zips = new ArrayList<File>();
for (String p: storagePath.list()) {
if (p.endsWith("zip")) zips.add(new File(storagePath, p));
}
return zips;
}
/**
* create a dump file from the current solr directory
* @return
*/
public File dumpSolr() {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath();
this.disconnectLocalSolr();
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
try {
ZIPWriter.zip(storagePath, zipOut);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
}
return zipOut;
}
/**
* restore a solr dump to the current solr directory
* @param solrDumpZipFile
*/
public void restoreSolr(File solrDumpZipFile) {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath();
this.disconnectLocalSolr();
try {
ZIPReader.unzip(solrDumpZipFile, storagePath);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
}
}
// export methods // export methods
public Export export(final File f, final String filter, final HandleSet set, final int format, final boolean dom) { public Export export(final File f, final String filter, final HandleSet set, final int format, final boolean dom) {
if ((this.exportthread != null) && (this.exportthread.isAlive())) { if ((this.exportthread != null) && (this.exportthread.isAlive())) {

Loading…
Cancel
Save