From 8a7c68e4c7f6a682e3ef656b423ce1ad76b42caa Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 4 May 2014 09:29:07 +0200 Subject: [PATCH] content of surrogates/out never accessed (remove) After import the conent is never accessed but may take up a lot of disk space, also the getLoadedOAIServer (which lists the files in surrogate out) is not used. Making the surrogate.out obsolete. Removed keeping of xmls after import. --- .../document/importer/OAIPMHImporter.java | 38 +------------------ source/net/yacy/search/Switchboard.java | 38 +++---------------- .../net/yacy/search/SwitchboardConstants.java | 4 +- 3 files changed, 8 insertions(+), 72 deletions(-) diff --git a/source/net/yacy/document/importer/OAIPMHImporter.java b/source/net/yacy/document/importer/OAIPMHImporter.java index 74fe2b269..173332b48 100644 --- a/source/net/yacy/document/importer/OAIPMHImporter.java +++ b/source/net/yacy/document/importer/OAIPMHImporter.java @@ -22,13 +22,8 @@ package net.yacy.document.importer; -import java.io.File; import java.io.IOException; import java.net.MalformedURLException; -import java.text.ParseException; -import java.util.Date; -import java.util.HashMap; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.date.GenericFormatter; @@ -181,43 +176,12 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable getLoadedOAIServer(File surrogatesIn, File surrogatesOut) { - Map map = getLoadedOAIServer(surrogatesOut); - map.putAll((Map) getLoadedOAIServer(surrogatesIn).entrySet()); - return map; - } - - private static Map getLoadedOAIServer(File surrogates) { - HashMap map = new HashMap(); - //oaipmh_opus.bsz-bw.de_20091102113118728.xml - for (String s: surrogates.list()) { - if (s.startsWith(filenamePrefix) && s.endsWith(".xml") && s.charAt(s.length() - 22) == filenameSeparationChar) { - try { - Date fd = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(s.substring(s.length() - 21, s.length() - 4)); - String hostID = s.substring(7, s.length() - 22); - Date md = map.get(hostID); - if (md == null || fd.after(md)) map.put(hostID, fd); - } catch (final ParseException e) { - ConcurrentLog.logException(e); - } - } - } - return map; - } - public static final char hostReplacementChar = '_'; public static final char filenameSeparationChar = '.'; public static final String filenamePrefix = "oaipmh"; /** - * compute a host id that is also used in the getLoadedOAIServer method for the map key + * compute a host id * @param source * @return a string that is a key for the given host */ diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 5fe44bcd3..426c41ddd 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -37,17 +37,14 @@ package net.yacy.search; import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.io.Reader; import java.net.MalformedURLException; import java.security.NoSuchAlgorithmException; @@ -78,7 +75,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -243,7 +239,7 @@ public final class Switchboard extends serverSwitch { public File networkRoot; public File queuesRoot; public File surrogatesInPath; - public File surrogatesOutPath; + //public File surrogatesOutPath; public Segment index; public LoaderDispatcher loader; public CrawlSwitchboard crawler; @@ -675,13 +671,13 @@ public final class Switchboard extends serverSwitch { SwitchboardConstants.SURROGATES_IN_PATH_DEFAULT); this.log.info("surrogates.in Path = " + this.surrogatesInPath.getAbsolutePath()); this.surrogatesInPath.mkdirs(); - this.surrogatesOutPath = +/* this.surrogatesOutPath = getDataPath( SwitchboardConstants.SURROGATES_OUT_PATH, SwitchboardConstants.SURROGATES_OUT_PATH_DEFAULT); this.log.info("surrogates.out Path = " + this.surrogatesOutPath.getAbsolutePath()); this.surrogatesOutPath.mkdirs(); - +*/ // copy opensearch heuristic config (if not exist) final File osdConfig = new File(getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); if (!osdConfig.exists()) { @@ -1833,8 +1829,6 @@ public final class Switchboard extends serverSwitch { if ( !infile.exists() || !infile.canWrite() || !infile.canRead() ) { return false; } - final File outfile = new File(this.surrogatesOutPath, s); - //if (outfile.exists()) return false; boolean moved = false; if ( s.endsWith("xml.zip") ) { // open the zip file with all the xml files in it @@ -1858,7 +1852,7 @@ public final class Switchboard extends serverSwitch { } catch (final IOException e ) { ConcurrentLog.logException(e); } finally { - moved = infile.renameTo(outfile); + moved = infile.delete(); if (zis != null) try {zis.close();} catch (final IOException e) {} } return moved; @@ -1874,29 +1868,7 @@ public final class Switchboard extends serverSwitch { ConcurrentLog.logException(e); } finally { if (!shallTerminate()) { - moved = infile.renameTo(outfile); - if ( moved ) { - // check if this file is already compressed, if not, compress now - if ( !outfile.getName().endsWith(".gz") ) { - final String gzname = outfile.getName() + ".gz"; - final File gzfile = new File(outfile.getParentFile(), gzname); - try { - final OutputStream os = - new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(gzfile))); - BufferedInputStream bis = new BufferedInputStream(new FileInputStream(outfile)); - FileUtils.copy(bis, os); - os.close(); - bis.close(); - if ( gzfile.exists() ) { - FileUtils.deletedelete(outfile); - } - } catch (final FileNotFoundException e ) { - ConcurrentLog.logException(e); - } catch (final IOException e ) { - ConcurrentLog.logException(e); - } - } - } + moved = infile.delete(); } if (is != null) try {is.close();} catch (IOException e) {} } diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 17fa0812a..d6dc47860 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -387,8 +387,8 @@ public final class SwitchboardConstants { public static final String SURROGATES_IN_PATH = "surrogates.in"; public static final String SURROGATES_IN_PATH_DEFAULT = "DATA/SURROGATES/in"; - public static final String SURROGATES_OUT_PATH = "surrogates.out"; - public static final String SURROGATES_OUT_PATH_DEFAULT = "DATA/SURROGATES/out"; + //public static final String SURROGATES_OUT_PATH = "surrogates.out"; + //public static final String SURROGATES_OUT_PATH_DEFAULT = "DATA/SURROGATES/out"; public static final String DICTIONARY_SOURCE_PATH = "dictionaries"; public static final String DICTIONARY_SOURCE_PATH_DEFAULT = "DATA/DICTIONARIES";