From 2ead4e44d9899033af4929f13b96d6eed05a50e5 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 7 Jan 2014 17:53:49 +0100 Subject: [PATCH] introduced a new storage path ARCHIVE inside of DATA which will be used as path for solr index dumps (instead of the SEGMENTS path). This will make a maintenance of index backups easier. It will also provide a tool to migrate from an freeworld index to a webportal index. --- defaults/yacy.init | 3 +++ htroot/IndexControlURLs_p.java | 2 ++ source/net/yacy/search/Switchboard.java | 12 +++++++----- .../net/yacy/search/SwitchboardConstants.java | 4 +++- .../net/yacy/search/index/DocumentIndex.java | 4 ++-- source/net/yacy/search/index/Fulltext.java | 18 ++++++++++-------- source/net/yacy/search/index/Segment.java | 7 ++++--- 7 files changed, 31 insertions(+), 19 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 2b83fa660..6e4d490d1 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -287,6 +287,9 @@ promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png # when the secondary path should be equal to the primary, it must be declared empty indexPrimaryPath=DATA/INDEX +# the path to index archive dumps +indexArchivePath=DATA/ARCHIVE + # the path to the LISTS files. Most lists are used to filter web content listsPath=DATA/LISTS diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index c1e4ca30b..6d8e1237c 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -278,6 +278,8 @@ public class IndexControlURLs_p { final File dump = segment.fulltext().dumpSolr(); prop.put("indexdump", 1); prop.put("indexdump_dumpfile", dump.getAbsolutePath()); + dumpFiles = segment.fulltext().dumpFiles(); + prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath()); sb.tables.recordAPICall(post, "IndexControlURLs_p.html", WorkTables.TABLE_API_TYPE_STEERING, "solr dump generation"); } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index da879767b..a364165de 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -210,7 +210,6 @@ import net.yacy.utils.crypt; import com.google.common.io.Files; import net.yacy.http.YaCyHttpServer; -import net.yacy.http.YaCyLegacyCredential; public final class Switchboard extends serverSwitch { @@ -339,9 +338,10 @@ public final class Switchboard extends serverSwitch { } // load values from configs - final File indexPath = - getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, SwitchboardConstants.INDEX_PATH_DEFAULT); + final File indexPath = getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, SwitchboardConstants.INDEX_PATH_DEFAULT); this.log.config("Index Primary Path: " + indexPath.toString()); + final File archivePath = getDataPath(SwitchboardConstants.INDEX_ARCHIVE_PATH, SwitchboardConstants.INDEX_ARCHIVE_DEFAULT); + this.log.config("Index Archive Path: " + archivePath.toString()); this.listsPath = getDataPath(SwitchboardConstants.LISTS_PATH, SwitchboardConstants.LISTS_PATH_DEFAULT); this.log.config("Lists Path: " + this.listsPath.toString()); @@ -498,7 +498,7 @@ public final class Switchboard extends serverSwitch { // initialize index ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); - this.index = new Segment(this.log, segmentsPath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork); + this.index = new Segment(this.log, segmentsPath, archivePath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) try { this.index.connectRWI(wordCacheMaxCount, fileSizeMax); } catch (final IOException e) {ConcurrentLog.logException(e);} @@ -1331,7 +1331,9 @@ public final class Switchboard extends serverSwitch { partitionExponent, this.useTailCache, this.exceed134217727); - this.index = new Segment(this.log, new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), collectionConfiguration, webgraphConfiguration); + final File segmentsPath = new File(new File(indexPrimaryPath, networkName), "SEGMENTS"); + final File archivePath = getDataPath(SwitchboardConstants.INDEX_ARCHIVE_PATH, SwitchboardConstants.INDEX_ARCHIVE_DEFAULT); + this.index = new Segment(this.log, segmentsPath, archivePath, collectionConfiguration, webgraphConfiguration); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) { diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 9e574a80f..031470014 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -406,8 +406,10 @@ public final class SwitchboardConstants { *

Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where the * whole database of known RWIs and URLs as well as dumps of the DHT-In and DHT-Out caches are stored

*/ - public static final String INDEX_PRIMARY_PATH = "indexPrimaryPath"; // this is a relative path to the data root + public static final String INDEX_PRIMARY_PATH = "indexPrimaryPath"; // this is a relative path to the application root or an absolute path public static final String INDEX_PATH_DEFAULT = "DATA/INDEX"; + public static final String INDEX_ARCHIVE_PATH = "indexArchivePath"; // this is a relative path to the application root or an absolute path + public static final String INDEX_ARCHIVE_DEFAULT = "DATA/ARCHIVE"; /** *

public static final String LISTS_PATH = "listsPath"

*

Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index d402b7ca8..5cb53e546 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -66,9 +66,9 @@ public class DocumentIndex extends Segment { static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup"); - public DocumentIndex(final File segmentPath, final File collectionConfigurationPath, final File webgraphConfigurationPath, final CallbackListener callback, final int cachesize) + public DocumentIndex(final File segmentPath, final File archivePath, final File collectionConfigurationPath, final File webgraphConfigurationPath, final CallbackListener callback, final int cachesize) throws IOException { - super(new ConcurrentLog("DocumentIndex"), segmentPath, + super(new ConcurrentLog("DocumentIndex"), segmentPath, archivePath, collectionConfigurationPath == null ? null : new CollectionConfiguration(collectionConfigurationPath, true), webgraphConfigurationPath == null ? null : new WebgraphConfiguration(webgraphConfigurationPath, true) ); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index e8c5c5b58..0eb84686a 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -88,7 +88,8 @@ public final class Fulltext { private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45"}; // class objects - private final File segmentPath; + private final File segmentPath; + private final File archivePath; private Index urlIndexFile; private Export exportthread; // will have a export thread assigned if exporter is running private String tablename; @@ -98,8 +99,10 @@ public final class Fulltext { private final WebgraphConfiguration webgraphConfiguration; private boolean writeWebgraph; - protected Fulltext(final File segmentPath, final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) { + protected Fulltext(final File segmentPath, final File archivePath, + final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) { this.segmentPath = segmentPath; + this.archivePath = archivePath; this.tablename = null; this.urlIndexFile = null; this.exportthread = null; // will have a export thread assigned if exporter is running @@ -661,14 +664,13 @@ public final class Fulltext { ConcurrentLog.warn("Fulltext", "HOT DUMP selected solr0.getStoragePath() == NULL, no dump list!"); return zips; } - File storagePath = esc.getContainerPath().getParentFile(); - if (storagePath == null) { + if (this.archivePath == null) { ConcurrentLog.warn("Fulltext", "HOT DUMP selected esc.getStoragePath().getParentFile() == NULL, no dump list!"); return zips; } - ConcurrentLog.info("Fulltext", "HOT DUMP dump path = " + storagePath.toString()); - for (String p: storagePath.list()) { - if (p.endsWith("zip")) zips.add(new File(storagePath, p)); + ConcurrentLog.info("Fulltext", "HOT DUMP dump path = " + this.archivePath.toString()); + for (String p: this.archivePath.list()) { + if (p.endsWith("zip")) zips.add(new File(this.archivePath, p)); } return zips; } @@ -680,7 +682,7 @@ public final class Fulltext { public File dumpSolr() { EmbeddedInstance esc = this.solrInstances.getSolr0(); File storagePath = esc.getContainerPath(); - File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip"); + File zipOut = new File(this.archivePath, storagePath.getName() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip"); synchronized (this.solrInstances) { this.disconnectLocalSolr(); this.solrInstances.close(); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index c7e0e0183..e3b53c7ad 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -126,14 +126,15 @@ public class Segment { * @param segmentPath that should be the path ponting to the directory "SEGMENT" * @param collectionSchema */ - public Segment(final ConcurrentLog log, final File segmentPath, + public Segment(final ConcurrentLog log, final File segmentPath, final File archivePath, final CollectionConfiguration collectionConfiguration, final WebgraphConfiguration webgraphConfiguration) { log.info("Initializing Segment '" + segmentPath + "."); this.log = log; this.segmentPath = segmentPath; - + archivePath.mkdirs(); + // create LURL-db - this.fulltext = new Fulltext(segmentPath, collectionConfiguration, webgraphConfiguration); + this.fulltext = new Fulltext(segmentPath, archivePath, collectionConfiguration, webgraphConfiguration); this.termIndex = null; this.urlCitationIndex = null;