add InputStream close after reading input file (Vocabulary_p servlet)

pull/54/head
reger 9 years ago
parent 2422626975
commit 4cc38e979d

@ -101,45 +101,46 @@ public class Vocabulary_p {
discoverFromCSVCharset = charsets.get(0); discoverFromCSVCharset = charsets.get(0);
ConcurrentLog.info("FileUtils", "detected charset: " + discoverFromCSVCharset + " used to read " + discoverFromCSVFile.toString()); ConcurrentLog.info("FileUtils", "detected charset: " + discoverFromCSVCharset + " used to read " + discoverFromCSVFile.toString());
} }
// read file // read file (try-with-resource to close inputstream automatically)
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(discoverFromCSVFile), discoverFromCSVCharset)); try (BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(discoverFromCSVFile), discoverFromCSVCharset))) {
String line = null; String line = null;
Pattern semicolon = Pattern.compile(";"); Pattern semicolon = Pattern.compile(";");
Map<String, String> synonym2literal = new HashMap<>(); // helper map to check if there are double synonyms Map<String, String> synonym2literal = new HashMap<>(); // helper map to check if there are double synonyms
while ((line = r.readLine()) != null) { while ((line = r.readLine()) != null) {
if (line.length() == 0) continue; if (line.length() == 0) continue;
String[] l = semicolon.split(line); String[] l = semicolon.split(line);
if (l.length == 0) l = new String[]{line}; if (l.length == 0) l = new String[]{line};
String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim(); String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim();
if (literal == null) continue; if (literal == null) continue;
literal = normalizeLiteral(literal); literal = normalizeLiteral(literal);
String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim(); String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim();
if (literal.length() > 0) { if (literal.length() > 0) {
String synonyms = ""; String synonyms = "";
if (discoverenrichsynonyms) { if (discoverenrichsynonyms) {
Set<String> sy = SynonymLibrary.getSynonyms(literal); Set<String> sy = SynonymLibrary.getSynonyms(literal);
if (sy != null) { if (sy != null) {
for (String s: sy) synonyms += "," + s; for (String s: sy) synonyms += "," + s;
}
} else if (discoverreadcolumn) {
synonyms = discovercolumnsynonyms < 0 || l.length <= discovercolumnsynonyms ? null : l[discovercolumnsynonyms].trim();
synonyms = normalizeLiteral(synonyms);
} else {
synonyms = Tagging.normalizeTerm(literal);
} }
} else if (discoverreadcolumn) { // check double synonyms
synonyms = discovercolumnsynonyms < 0 || l.length <= discovercolumnsynonyms ? null : l[discovercolumnsynonyms].trim(); if (synonyms.length() > 0) {
synonyms = normalizeLiteral(synonyms); String oldliteral = synonym2literal.get(synonyms);
} else { if (oldliteral != null && !literal.equals(oldliteral)) {
synonyms = Tagging.normalizeTerm(literal); // replace old entry with combined new
} table.remove(oldliteral);
// check double synonyms String newliteral = oldliteral + "," + literal;
if (synonyms.length() > 0) { literal = newliteral;
String oldliteral = synonym2literal.get(synonyms); }
if (oldliteral != null && !literal.equals(oldliteral)) { synonym2literal.put(synonyms, literal);
// replace old entry with combined new
table.remove(oldliteral);
String newliteral = oldliteral + "," + literal;
literal = newliteral;
} }
synonym2literal.put(synonyms, literal); // store term
table.put(literal, new Tagging.SOTuple(synonyms, objectlink == null ? "" : objectlink));
} }
// store term
table.put(literal, new Tagging.SOTuple(synonyms, objectlink == null ? "" : objectlink));
} }
} }
} else { } else {

@ -933,26 +933,24 @@ public final class FileUtils {
*/ */
public static List<String> detectCharset(File file) throws IOException { public static List<String> detectCharset(File file) throws IOException {
// auto-detect charset, used code from http://jchardet.sourceforge.net/; see also: http://www-archive.mozilla.org/projects/intl/chardet.html // auto-detect charset, used code from http://jchardet.sourceforge.net/; see also: http://www-archive.mozilla.org/projects/intl/chardet.html
nsDetector det = new nsDetector(nsPSMDetector.ALL); List<String> result;
BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); try (BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file))) { // try-with-resource to close inputstream
nsDetector det = new nsDetector(nsPSMDetector.ALL);
byte[] buf = new byte[1024] ; byte[] buf = new byte[1024] ;
int len; int len;
boolean done = false ; boolean done = false ;
boolean isAscii = true ; boolean isAscii = true ;
while ((len = imp.read(buf,0,buf.length)) != -1) {
while ((len = imp.read(buf,0,buf.length)) != -1) { if (isAscii) isAscii = det.isAscii(buf,len);
if (isAscii) isAscii = det.isAscii(buf,len); if (!isAscii && !done) done = det.DoIt(buf,len, false);
if (!isAscii && !done) done = det.DoIt(buf,len, false); } det.DataEnd();
} result = new ArrayList<>();
det.DataEnd(); if (isAscii) {
List<String> result = new ArrayList<>(); result.add(StandardCharsets.US_ASCII.name());
if (isAscii) { } else {
result.add(StandardCharsets.US_ASCII.name()); for (String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
} else { }
for (String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
} }
return result; return result;
} }

Loading…
Cancel
Save