|
|
|
@ -56,15 +56,14 @@ import java.util.Map;
|
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
|
import java.util.zip.GZIPInputStream;
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.document.encoding.UTF8;
|
|
|
|
|
import net.yacy.cora.storage.Files;
|
|
|
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
|
|
|
|
|
|
|
|
import org.apache.commons.lang.StringUtils;
|
|
|
|
|
|
|
|
|
|
import org.mozilla.intl.chardet.nsDetector;
|
|
|
|
|
import org.mozilla.intl.chardet.nsPSMDetector;
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.document.encoding.UTF8;
|
|
|
|
|
import net.yacy.cora.storage.Files;
|
|
|
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
|
|
|
|
|
|
|
|
public final class FileUtils {
|
|
|
|
|
|
|
|
|
|
private static final int DEFAULT_BUFFER_SIZE = 1024; // this is also the maximum chunk size
|
|
|
|
@ -106,7 +105,7 @@ public final class FileUtils {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
|
|
|
|
|
int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE);
|
|
|
|
|
final int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE);
|
|
|
|
|
|
|
|
|
|
int c;
|
|
|
|
|
long total = 0;
|
|
|
|
@ -375,7 +374,7 @@ public final class FileUtils {
|
|
|
|
|
/* source input stream must be closed here in all cases */
|
|
|
|
|
try {
|
|
|
|
|
source.close();
|
|
|
|
|
} catch(IOException ignoredException) {
|
|
|
|
|
} catch(final IOException ignoredException) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return content;
|
|
|
|
@ -480,7 +479,7 @@ public final class FileUtils {
|
|
|
|
|
* @return a set of strings eventually empty
|
|
|
|
|
*/
|
|
|
|
|
public static HashSet<String> loadList(final File file) {
|
|
|
|
|
final HashSet<String> set = new HashSet<String>();
|
|
|
|
|
final HashSet<String> set = new HashSet<>();
|
|
|
|
|
BufferedReader br = null;
|
|
|
|
|
try {
|
|
|
|
|
br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
|
|
|
|
@ -516,10 +515,10 @@ public final class FileUtils {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static ConcurrentHashMap<String, byte[]> loadMapB(final File f) {
|
|
|
|
|
ConcurrentHashMap<String, String> m = loadMap(f);
|
|
|
|
|
final ConcurrentHashMap<String, String> m = loadMap(f);
|
|
|
|
|
if (m == null) return null;
|
|
|
|
|
ConcurrentHashMap<String, byte[]> mb = new ConcurrentHashMap<String, byte[]>();
|
|
|
|
|
for (Map.Entry<String, String> e: m.entrySet()) mb.put(e.getKey(), UTF8.getBytes(e.getValue()));
|
|
|
|
|
final ConcurrentHashMap<String, byte[]> mb = new ConcurrentHashMap<>();
|
|
|
|
|
for (final Map.Entry<String, String> e: m.entrySet()) mb.put(e.getKey(), UTF8.getBytes(e.getValue()));
|
|
|
|
|
return mb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -550,10 +549,7 @@ public final class FileUtils {
|
|
|
|
|
pw.println(key + "=" + value);
|
|
|
|
|
}
|
|
|
|
|
pw.println("# EOF");
|
|
|
|
|
} catch (final FileNotFoundException e ) {
|
|
|
|
|
ConcurrentLog.warn("FileUtils", e.getMessage(), e);
|
|
|
|
|
err = true;
|
|
|
|
|
} catch (final UnsupportedEncodingException e ) {
|
|
|
|
|
} catch (final FileNotFoundException | UnsupportedEncodingException e ) {
|
|
|
|
|
ConcurrentLog.warn("FileUtils", e.getMessage(), e);
|
|
|
|
|
err = true;
|
|
|
|
|
} finally {
|
|
|
|
@ -570,8 +566,8 @@ public final class FileUtils {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void saveMapB(final File file, final Map<String, byte[]> props, final String comment) {
|
|
|
|
|
HashMap<String, String> m = new HashMap<String, String>();
|
|
|
|
|
for (Map.Entry<String, byte[]> e: props.entrySet()) m.put(e.getKey(), UTF8.String(e.getValue()));
|
|
|
|
|
final HashMap<String, String> m = new HashMap<>();
|
|
|
|
|
for (final Map.Entry<String, byte[]> e: props.entrySet()) m.put(e.getKey(), UTF8.String(e.getValue()));
|
|
|
|
|
saveMap(file, m, comment);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -582,7 +578,7 @@ public final class FileUtils {
|
|
|
|
|
|
|
|
|
|
public static ConcurrentHashMap<String, String> table(final Iterator<String> li) {
|
|
|
|
|
String line;
|
|
|
|
|
final ConcurrentHashMap<String, String> props = new ConcurrentHashMap<String, String>();
|
|
|
|
|
final ConcurrentHashMap<String, String> props = new ConcurrentHashMap<>();
|
|
|
|
|
while ( li.hasNext() ) {
|
|
|
|
|
int pos = 0;
|
|
|
|
|
line = li.next().trim();
|
|
|
|
@ -594,8 +590,8 @@ public final class FileUtils {
|
|
|
|
|
pos = line.indexOf('=', pos + 1);
|
|
|
|
|
} while ( pos > 0 && line.charAt(pos - 1) == '\\' );
|
|
|
|
|
if ( pos > 0 ) try {
|
|
|
|
|
String key = StringUtils.replaceEach(line.substring(0, pos).trim(), escaped_strings_in, unescaped_strings_out);
|
|
|
|
|
String value = StringUtils.replaceEach(line.substring(pos + 1).trim(), escaped_strings_in, unescaped_strings_out);
|
|
|
|
|
final String key = StringUtils.replaceEach(line.substring(0, pos).trim(), escaped_strings_in, unescaped_strings_out);
|
|
|
|
|
final String value = StringUtils.replaceEach(line.substring(pos + 1).trim(), escaped_strings_in, unescaped_strings_out);
|
|
|
|
|
//System.out.println("key = " + key + ", value = " + value);
|
|
|
|
|
props.put(key, value);
|
|
|
|
|
} catch (final IndexOutOfBoundsException e) {
|
|
|
|
@ -606,7 +602,7 @@ public final class FileUtils {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static Map<String, String> table(final byte[] a) {
|
|
|
|
|
if (a == null) return new ConcurrentHashMap<String, String>();
|
|
|
|
|
if (a == null) return new ConcurrentHashMap<>();
|
|
|
|
|
//System.out.println("***TABLE: a.size = " + a.length);
|
|
|
|
|
return table(strings(a));
|
|
|
|
|
}
|
|
|
|
@ -627,7 +623,7 @@ public final class FileUtils {
|
|
|
|
|
*/
|
|
|
|
|
public static ArrayList<String> getListArray(final File listFile) {
|
|
|
|
|
String line;
|
|
|
|
|
final ArrayList<String> list = new ArrayList<String>();
|
|
|
|
|
final ArrayList<String> list = new ArrayList<>();
|
|
|
|
|
BufferedReader br = null;
|
|
|
|
|
try {
|
|
|
|
|
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), StandardCharsets.UTF_8));
|
|
|
|
@ -760,7 +756,7 @@ public final class FileUtils {
|
|
|
|
|
* @return array of file names
|
|
|
|
|
*/
|
|
|
|
|
public static List<String> getDirListing(final File dir, final String filter) {
|
|
|
|
|
final List<String> ret = new LinkedList<String>();
|
|
|
|
|
final List<String> ret = new LinkedList<>();
|
|
|
|
|
File[] fileList;
|
|
|
|
|
if ( dir != null ) {
|
|
|
|
|
if ( !dir.exists() ) {
|
|
|
|
@ -789,10 +785,10 @@ public final class FileUtils {
|
|
|
|
|
* @return list of all files passing fileFilter under sourceDir including sub directories
|
|
|
|
|
*/
|
|
|
|
|
public static List<File> getFilesRecursive(final File sourceDir, final String notdir, final FilenameFilter fileNameFilter) {
|
|
|
|
|
List<File> dirList = getDirsRecursive(sourceDir,
|
|
|
|
|
final List<File> dirList = getDirsRecursive(sourceDir,
|
|
|
|
|
notdir);
|
|
|
|
|
dirList.add(sourceDir);
|
|
|
|
|
List<File> files = new ArrayList<>();
|
|
|
|
|
final List<File> files = new ArrayList<>();
|
|
|
|
|
for (final File dir : dirList) {
|
|
|
|
|
Collections.addAll(files, dir.listFiles(fileNameFilter));
|
|
|
|
|
}
|
|
|
|
@ -807,7 +803,7 @@ public final class FileUtils {
|
|
|
|
|
final String notdir,
|
|
|
|
|
final boolean excludeDotfiles) {
|
|
|
|
|
final File[] dirList = dir.listFiles();
|
|
|
|
|
final ArrayList<File> resultList = new ArrayList<File>();
|
|
|
|
|
final ArrayList<File> resultList = new ArrayList<>();
|
|
|
|
|
ArrayList<File> recursive;
|
|
|
|
|
Iterator<File> iter;
|
|
|
|
|
for ( int i = 0; i < dirList.length; i++ ) {
|
|
|
|
@ -986,6 +982,7 @@ public final class FileUtils {
|
|
|
|
|
// deleting files on windows sometimes does not work with java
|
|
|
|
|
try {
|
|
|
|
|
final String command = "cmd /C del /F /Q \"" + p + "\"";
|
|
|
|
|
@SuppressWarnings("deprecation")
|
|
|
|
|
final Process r = Runtime.getRuntime().exec(command);
|
|
|
|
|
if ( r == null ) {
|
|
|
|
|
ConcurrentLog.severe("FileUtils", "cannot execute command: " + command);
|
|
|
|
@ -1040,8 +1037,8 @@ public final class FileUtils {
|
|
|
|
|
public static List<String> detectCharset(final InputStream inStream) throws IOException {
|
|
|
|
|
// auto-detect charset, used code from http://jchardet.sourceforge.net/; see also: http://www-archive.mozilla.org/projects/intl/chardet.html
|
|
|
|
|
List<String> result;
|
|
|
|
|
nsDetector det = new nsDetector(nsPSMDetector.ALL);
|
|
|
|
|
byte[] buf = new byte[1024] ;
|
|
|
|
|
final nsDetector det = new nsDetector(nsPSMDetector.ALL);
|
|
|
|
|
final byte[] buf = new byte[1024] ;
|
|
|
|
|
int len;
|
|
|
|
|
boolean done = false ;
|
|
|
|
|
boolean isAscii = true ;
|
|
|
|
@ -1057,7 +1054,7 @@ public final class FileUtils {
|
|
|
|
|
if (isAscii) {
|
|
|
|
|
result.add(StandardCharsets.US_ASCII.name());
|
|
|
|
|
} else {
|
|
|
|
|
for (String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
|
|
|
|
|
for (final String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
@ -1071,18 +1068,18 @@ public final class FileUtils {
|
|
|
|
|
* @param concurrent if this shall run concurrently
|
|
|
|
|
*/
|
|
|
|
|
public static void checkCharset(final File file, final String givenCharset, final boolean concurrent) {
|
|
|
|
|
Thread t = new Thread("FileUtils.checkCharset") {
|
|
|
|
|
final Thread t = new Thread("FileUtils.checkCharset") {
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
try (final FileInputStream fileStream = new FileInputStream(file);
|
|
|
|
|
final BufferedInputStream imp = new BufferedInputStream(fileStream)) { // try-with-resource to close resources
|
|
|
|
|
List<String> charsets = FileUtils.detectCharset(imp);
|
|
|
|
|
final List<String> charsets = FileUtils.detectCharset(imp);
|
|
|
|
|
if (charsets.contains(givenCharset)) {
|
|
|
|
|
ConcurrentLog.info("checkCharset", "appropriate charset '" + givenCharset + "' for import of " + file + ", is part one detected " + charsets);
|
|
|
|
|
} else {
|
|
|
|
|
ConcurrentLog.warn("checkCharset", "possibly wrong charset '" + givenCharset + "' for import of " + file + ", use one of " + charsets);
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {}
|
|
|
|
|
} catch (final IOException e) {}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|