introduced a new storage path ARCHIVE inside of DATA which will be used

as path for solr index dumps (instead of the SEGMENTS path). This will
make a maintenance of index backups easier. It will also provide a tool
to migrate from an freeworld index to a webportal index.
pull/1/head
orbiter 11 years ago
parent add0e42804
commit 2ead4e44d9

@ -287,6 +287,9 @@ promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png
# when the secondary path should be equal to the primary, it must be declared empty
indexPrimaryPath=DATA/INDEX
# the path to index archive dumps
indexArchivePath=DATA/ARCHIVE
# the path to the LISTS files. Most lists are used to filter web content
listsPath=DATA/LISTS

@ -278,6 +278,8 @@ public class IndexControlURLs_p {
final File dump = segment.fulltext().dumpSolr();
prop.put("indexdump", 1);
prop.put("indexdump_dumpfile", dump.getAbsolutePath());
dumpFiles = segment.fulltext().dumpFiles();
prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath());
sb.tables.recordAPICall(post, "IndexControlURLs_p.html", WorkTables.TABLE_API_TYPE_STEERING, "solr dump generation");
}

@ -210,7 +210,6 @@ import net.yacy.utils.crypt;
import com.google.common.io.Files;
import net.yacy.http.YaCyHttpServer;
import net.yacy.http.YaCyLegacyCredential;
public final class Switchboard extends serverSwitch {
@ -339,9 +338,10 @@ public final class Switchboard extends serverSwitch {
}
// load values from configs
final File indexPath =
getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, SwitchboardConstants.INDEX_PATH_DEFAULT);
final File indexPath = getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, SwitchboardConstants.INDEX_PATH_DEFAULT);
this.log.config("Index Primary Path: " + indexPath.toString());
final File archivePath = getDataPath(SwitchboardConstants.INDEX_ARCHIVE_PATH, SwitchboardConstants.INDEX_ARCHIVE_DEFAULT);
this.log.config("Index Archive Path: " + archivePath.toString());
this.listsPath =
getDataPath(SwitchboardConstants.LISTS_PATH, SwitchboardConstants.LISTS_PATH_DEFAULT);
this.log.config("Lists Path: " + this.listsPath.toString());
@ -498,7 +498,7 @@ public final class Switchboard extends serverSwitch {
// initialize index
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
this.index = new Segment(this.log, segmentsPath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork);
this.index = new Segment(this.log, segmentsPath, archivePath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) try {
this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
} catch (final IOException e) {ConcurrentLog.logException(e);}
@ -1331,7 +1331,9 @@ public final class Switchboard extends serverSwitch {
partitionExponent,
this.useTailCache,
this.exceed134217727);
this.index = new Segment(this.log, new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), collectionConfiguration, webgraphConfiguration);
final File segmentsPath = new File(new File(indexPrimaryPath, networkName), "SEGMENTS");
final File archivePath = getDataPath(SwitchboardConstants.INDEX_ARCHIVE_PATH, SwitchboardConstants.INDEX_ARCHIVE_DEFAULT);
this.index = new Segment(this.log, segmentsPath, archivePath, collectionConfiguration, webgraphConfiguration);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {

@ -406,8 +406,10 @@ public final class SwitchboardConstants {
* <p>Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where the
* whole database of known RWIs and URLs as well as dumps of the DHT-In and DHT-Out caches are stored</p>
*/
public static final String INDEX_PRIMARY_PATH = "indexPrimaryPath"; // this is a relative path to the data root
public static final String INDEX_PRIMARY_PATH = "indexPrimaryPath"; // this is a relative path to the application root or an absolute path
public static final String INDEX_PATH_DEFAULT = "DATA/INDEX";
public static final String INDEX_ARCHIVE_PATH = "indexArchivePath"; // this is a relative path to the application root or an absolute path
public static final String INDEX_ARCHIVE_DEFAULT = "DATA/ARCHIVE";
/**
* <p><code>public static final String <strong>LISTS_PATH</strong> = "listsPath"</code></p>
* <p>Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all

@ -66,9 +66,9 @@ public class DocumentIndex extends Segment {
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
public DocumentIndex(final File segmentPath, final File collectionConfigurationPath, final File webgraphConfigurationPath, final CallbackListener callback, final int cachesize)
public DocumentIndex(final File segmentPath, final File archivePath, final File collectionConfigurationPath, final File webgraphConfigurationPath, final CallbackListener callback, final int cachesize)
throws IOException {
super(new ConcurrentLog("DocumentIndex"), segmentPath,
super(new ConcurrentLog("DocumentIndex"), segmentPath, archivePath,
collectionConfigurationPath == null ? null : new CollectionConfiguration(collectionConfigurationPath, true),
webgraphConfigurationPath == null ? null : new WebgraphConfiguration(webgraphConfigurationPath, true)
);

@ -88,7 +88,8 @@ public final class Fulltext {
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45"};
// class objects
private final File segmentPath;
private final File segmentPath;
private final File archivePath;
private Index urlIndexFile;
private Export exportthread; // will have a export thread assigned if exporter is running
private String tablename;
@ -98,8 +99,10 @@ public final class Fulltext {
private final WebgraphConfiguration webgraphConfiguration;
private boolean writeWebgraph;
protected Fulltext(final File segmentPath, final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) {
protected Fulltext(final File segmentPath, final File archivePath,
final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) {
this.segmentPath = segmentPath;
this.archivePath = archivePath;
this.tablename = null;
this.urlIndexFile = null;
this.exportthread = null; // will have a export thread assigned if exporter is running
@ -661,14 +664,13 @@ public final class Fulltext {
ConcurrentLog.warn("Fulltext", "HOT DUMP selected solr0.getStoragePath() == NULL, no dump list!");
return zips;
}
File storagePath = esc.getContainerPath().getParentFile();
if (storagePath == null) {
if (this.archivePath == null) {
ConcurrentLog.warn("Fulltext", "HOT DUMP selected esc.getStoragePath().getParentFile() == NULL, no dump list!");
return zips;
}
ConcurrentLog.info("Fulltext", "HOT DUMP dump path = " + storagePath.toString());
for (String p: storagePath.list()) {
if (p.endsWith("zip")) zips.add(new File(storagePath, p));
ConcurrentLog.info("Fulltext", "HOT DUMP dump path = " + this.archivePath.toString());
for (String p: this.archivePath.list()) {
if (p.endsWith("zip")) zips.add(new File(this.archivePath, p));
}
return zips;
}
@ -680,7 +682,7 @@ public final class Fulltext {
public File dumpSolr() {
EmbeddedInstance esc = this.solrInstances.getSolr0();
File storagePath = esc.getContainerPath();
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
File zipOut = new File(this.archivePath, storagePath.getName() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
synchronized (this.solrInstances) {
this.disconnectLocalSolr();
this.solrInstances.close();

@ -126,14 +126,15 @@ public class Segment {
* @param segmentPath that should be the path ponting to the directory "SEGMENT"
* @param collectionSchema
*/
public Segment(final ConcurrentLog log, final File segmentPath,
public Segment(final ConcurrentLog log, final File segmentPath, final File archivePath,
final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) {
log.info("Initializing Segment '" + segmentPath + ".");
this.log = log;
this.segmentPath = segmentPath;
archivePath.mkdirs();
// create LURL-db
this.fulltext = new Fulltext(segmentPath, collectionConfiguration, webgraphConfiguration);
this.fulltext = new Fulltext(segmentPath, archivePath, collectionConfiguration, webgraphConfiguration);
this.termIndex = null;
this.urlCitationIndex = null;

Loading…
Cancel
Save