increased default maximum file size for database files to 2GB

Other file sizes can now be configured with the attributes
filesize.max.win and filesize.max.other
the default maximum file size for non-windows OS is now 32GB

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5974 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent eb36c9a092
commit 26a46b5521

@ -73,6 +73,12 @@ server.maxTrackingCount = 1000
# maximum number of hosts that are tracked # maximum number of hosts that are tracked
server.maxTrackingHostCount = 100 server.maxTrackingHostCount = 100
# maximum file sizes: since some users experience problems with too large files
# the file size of database files can be limited. Larger files can be used to get a
# better IO performance and to use less RAM; however, if the size must be limited
# because of limitations of the file system, the maximum size can be set here
filesize.max.win = 2147483647
filesize.max.other = 34359738367
# Network Definition # Network Definition
# There can be separate YaCy networks, and managed sub-groups of the general network. # There can be separate YaCy networks, and managed sub-groups of the general network.

@ -483,7 +483,7 @@ public class URLAnalysis {
} else if (args[0].equals("-diffurlcol") && args.length >= 3) { } else if (args[0].equals("-diffurlcol") && args.length >= 3) {
// make a diff-file that contains hashes from the url database that do not occur in the collection reference dump // make a diff-file that contains hashes from the url database that do not occur in the collection reference dump
// example: // example:
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT used.dump diffurlcol.dump // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump
try { try {
diffurlcol(args[1], args[2], args[3]); diffurlcol(args[1], args[2], args[3]);
} catch (IOException e) { } catch (IOException e) {

@ -66,7 +66,6 @@ public class BLOBArray implements BLOB {
*/ */
public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L; public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L;
public static final long oneGigabyte = 1024L * 1024L * 1024L;
private int keylength; private int keylength;
private ByteOrder ordering; private ByteOrder ordering;
@ -91,7 +90,7 @@ public class BLOBArray implements BLOB {
this.buffersize = buffersize; this.buffersize = buffersize;
this.heapLocation = heapLocation; this.heapLocation = heapLocation;
this.fileAgeLimit = oneMonth; this.fileAgeLimit = oneMonth;
this.fileSizeLimit = oneGigabyte; this.fileSizeLimit = (long) Integer.MAX_VALUE;
this.repositoryAgeMax = Long.MAX_VALUE; this.repositoryAgeMax = Long.MAX_VALUE;
this.repositorySizeMax = Long.MAX_VALUE; this.repositorySizeMax = Long.MAX_VALUE;
@ -327,7 +326,7 @@ public class BLOBArray implements BLOB {
public void setMaxSize(long maxSize) { public void setMaxSize(long maxSize) {
this.repositorySizeMax = maxSize; this.repositorySizeMax = maxSize;
this.fileSizeLimit = Math.min(oneGigabyte, maxSize / 10); this.fileSizeLimit = Math.min((long) Integer.MAX_VALUE, maxSize / 10L);
} }
private void executeLimits() { private void executeLimits() {

@ -90,7 +90,7 @@ public class SplitTable implements ObjectIndex {
final String tablename, final String tablename,
final Row rowdef, final Row rowdef,
final boolean resetOnFail) { final boolean resetOnFail) {
this(path, tablename, rowdef, BLOBArray.oneMonth, BLOBArray.oneGigabyte, resetOnFail); this(path, tablename, rowdef, BLOBArray.oneMonth, (long) Integer.MAX_VALUE, resetOnFail);
} }
public SplitTable( public SplitTable(

@ -68,7 +68,6 @@ public final class MetadataRepository implements Iterable<byte[]> {
this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", URLMetadataRow.rowdef, false)); this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", URLMetadataRow.rowdef, false));
this.exportthread = null; // will have a export thread assigned if exporter is running this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null; this.statsDump = null;
} }
public void clearCache() { public void clearCache() {

@ -170,6 +170,7 @@ import de.anomic.server.serverProcessorJob;
import de.anomic.server.serverProfiling; import de.anomic.server.serverProfiling;
import de.anomic.server.serverSemaphore; import de.anomic.server.serverSemaphore;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.server.serverSystem;
import de.anomic.server.serverThread; import de.anomic.server.serverThread;
import de.anomic.tools.crypt; import de.anomic.tools.crypt;
import de.anomic.tools.CryptoLib; import de.anomic.tools.CryptoLib;
@ -319,7 +320,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// start indexing management // start indexing management
log.logConfig("Starting Indexing Management"); log.logConfig("Starting Indexing Management");
final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""); final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, "");
final boolean useCommons = getConfigBool("index.storeCommons", false); final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1); final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0); final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
try { try {
@ -329,7 +330,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
indexPrimaryPath, indexPrimaryPath,
indexSecondaryPath, indexSecondaryPath,
wordCacheMaxCount, wordCacheMaxCount,
useCommons, fileSizeMax,
redundancy, redundancy,
paritionExponent); paritionExponent);
} catch (IOException e1) { } catch (IOException e1) {
@ -795,7 +796,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final File indexPrimaryPath = getConfigPath(plasmaSwitchboardConstants.INDEX_PRIMARY_PATH, plasmaSwitchboardConstants.INDEX_PATH_DEFAULT); final File indexPrimaryPath = getConfigPath(plasmaSwitchboardConstants.INDEX_PRIMARY_PATH, plasmaSwitchboardConstants.INDEX_PATH_DEFAULT);
final File indexSecondaryPath = (getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, "")); final File indexSecondaryPath = (getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, ""));
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000); final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
final boolean useCommons = getConfigBool("index.storeCommons", false); final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1); final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0); final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
try { try {
@ -805,7 +806,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
indexPrimaryPath, indexPrimaryPath,
indexSecondaryPath, indexSecondaryPath,
wordCacheMaxCount, wordCacheMaxCount,
useCommons, fileSizeMax,
redundancy, redundancy,
paritionExponent); paritionExponent);
} catch (IOException e) { } catch (IOException e) {

@ -41,7 +41,6 @@ import de.anomic.crawler.IndexingStack;
import de.anomic.data.Blacklist; import de.anomic.data.Blacklist;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpdProxyCacheEntry; import de.anomic.http.httpdProxyCacheEntry;
import de.anomic.kelondro.blob.BLOBArray;
import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.BufferedIndex; import de.anomic.kelondro.text.BufferedIndex;
@ -71,9 +70,7 @@ public final class plasmaWordIndex {
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash
public static final int lowcachedivisor = 900; public static final int lowcachedivisor = 900;
public static final int maxCollectionPartition = 7; // should be 7 public static final long targetFileSize = 256 * 1024 * 1024; // 256 MB
public static final long targetFileSize = 100 * 1024 * 1024; // 100 MB
public static final long maxFileSize = BLOBArray.oneGigabyte; // 1GB
public static final int writeBufferSize = 4 * 1024 * 1024; public static final int writeBufferSize = 4 * 1024 * 1024;
// the reference factory // the reference factory
@ -120,7 +117,7 @@ public final class plasmaWordIndex {
final File indexPrimaryRoot, final File indexPrimaryRoot,
final File indexSecondaryRoot, final File indexSecondaryRoot,
final int entityCacheMaxSize, final int entityCacheMaxSize,
final boolean useCommons, final long maxFileSize,
final int redundancy, final int redundancy,
final int partitionExponent) throws IOException { final int partitionExponent) throws IOException {

@ -676,7 +676,15 @@ public final class yacy {
final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total()); final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up."); if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); final plasmaWordIndex wordIndex = new plasmaWordIndex(
networkName,
log,
indexPrimaryRoot,
indexSecondaryRoot,
10000,
(long) Integer.MAX_VALUE,
0,
0);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false); final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false);
long urlCounter = 0, wordCounter = 0; long urlCounter = 0, wordCounter = 0;
@ -867,7 +875,15 @@ public final class yacy {
try { try {
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null; Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
if (resource.equals("all")) { if (resource.equals("all")) {
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); WordIndex = new plasmaWordIndex(
"freeworld",
log,
indexPrimaryRoot,
indexSecondaryRoot,
10000,
(long) Integer.MAX_VALUE,
1,
0);
indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false); indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false);
} }
int counter = 0; int counter = 0;

Loading…
Cancel
Save