From 8422ee5ec4ae8c0a200b40b3e89f09ed47fcff65 Mon Sep 17 00:00:00 2001 From: danielr Date: Sat, 9 Aug 2008 12:00:31 +0000 Subject: [PATCH] - fixed UnsupportedEncoding (in proxy) using defaultCharset if no characterEncoding can be determined - serverFileUtils.copy* use now Charset instead of String - added some warnings for ignored exceptions git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5043 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Blacklist_p.java | 9 +++-- htroot/CacheAdmin_p.java | 3 +- .../htmlFilter/htmlFilterContentScraper.java | 5 +-- .../anomic/htmlFilter/htmlFilterWriter.java | 9 ++--- source/de/anomic/http/httpHeader.java | 21 +++++++---- source/de/anomic/http/httpdProxyHandler.java | 11 +++--- .../anomic/plasma/parser/odt/odtParser.java | 3 +- .../anomic/plasma/parser/rss/rssParser.java | 3 +- source/de/anomic/plasma/plasmaParser.java | 3 +- source/de/anomic/server/serverFileUtils.java | 35 +++++++++++++------ 10 files changed, 67 insertions(+), 35 deletions(-) diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 8248a55b5..1675b38cd 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -46,6 +46,7 @@ import de.anomic.index.indexReferenceBlacklist; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacyURL; @@ -79,7 +80,7 @@ prop.putHTML("asd", "0"); yacyURL testurl = null; try { testurl = new yacyURL(urlstring, null); - } catch (final MalformedURLException e) { } + } catch (final MalformedURLException e) { testurl = null; } if(testurl != null) { prop.putHTML("testlist_url",testurl.toString()); if(plasmaSwitchboard.urlBlacklist.isListed(indexReferenceBlacklist.BLACKLIST_CRAWLER, testurl)) @@ -139,7 +140,9 @@ prop.putHTML("asd", "0"); } final File BlackListFile = new File(listManager.listsPath, blacklistToUse); - BlackListFile.delete(); + if(!BlackListFile.delete()) { + serverLog.logWarning("Blacklist", "file "+ BlackListFile +" could not be deleted!"); + } for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { listManager.removeFromListSet(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse); @@ -407,7 +410,7 @@ prop.putHTML("asd", "0"); } catch (final IOException e) { e.printStackTrace(); } finally { - if (pw != null) try { pw.close(); } catch (final Exception e){ /* */} + if (pw != null) try { pw.close(); } catch (final Exception e){ serverLog.logWarning("Blacklist", "could not close stream to "+ blacklistToUse +"! "+ e.getMessage());} } // add to blacklist diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java index b3acc898e..f7826f34d 100644 --- a/htroot/CacheAdmin_p.java +++ b/htroot/CacheAdmin_p.java @@ -35,6 +35,7 @@ import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.io.Writer; +import java.nio.charset.Charset; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -139,7 +140,7 @@ public class CacheAdmin_p { String sourceCharset = resInfo.getCharacterEncoding(); if (sourceCharset == null) sourceCharset = "UTF-8"; final String mimeType = resInfo.getMimeType(); - serverFileUtils.copy(file, sourceCharset, writer); + serverFileUtils.copy(file, Charset.forName(sourceCharset), writer); writer.close(); final plasmaParserDocument document = switchboard.parser.transformScraper(url, mimeType, sourceCharset, scraper); diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index b04b5a5d3..1c2b14a91 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -32,6 +32,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.MalformedURLException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -485,7 +486,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen // scrape content final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost", null)); final Writer writer = new htmlFilterWriter(null, null, scraper, null, false); - serverFileUtils.copy(new ByteArrayInputStream(page), writer, "UTF-8"); + serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName("UTF-8")); return scraper; } @@ -500,7 +501,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen // scrape content final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); final Writer writer = new htmlFilterWriter(null, null, scraper, null, false); - serverFileUtils.copy(new ByteArrayInputStream(page), writer, "UTF-8"); + serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName("UTF-8")); return scraper; } diff --git a/source/de/anomic/htmlFilter/htmlFilterWriter.java b/source/de/anomic/htmlFilter/htmlFilterWriter.java index b8c058440..049873aad 100644 --- a/source/de/anomic/htmlFilter/htmlFilterWriter.java +++ b/source/de/anomic/htmlFilter/htmlFilterWriter.java @@ -41,6 +41,7 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.MalformedURLException; +import java.nio.charset.Charset; import java.util.Enumeration; import java.util.Properties; @@ -73,11 +74,11 @@ public final class htmlFilterWriter extends Writer { public htmlFilterWriter( final OutputStream outStream, - final String outputStreamCharset, + final Charset charSet, final htmlFilterScraper scraper, final htmlFilterTransformer transformer, final boolean passbyIfBinarySuspect - ) throws UnsupportedEncodingException { + ) { this.outStream = outStream; this.scraper = scraper; this.transformer = transformer; @@ -93,7 +94,7 @@ public final class htmlFilterWriter extends Writer { this.passbyIfBinarySuspect = passbyIfBinarySuspect; if (this.outStream != null) { - this.out = new OutputStreamWriter(this.outStream,(outputStreamCharset == null)?"UTF-8":outputStreamCharset); + this.out = new OutputStreamWriter(this.outStream,(charSet == null)?Charset.defaultCharset():charSet); } } @@ -558,7 +559,7 @@ public final class htmlFilterWriter extends Writer { final htmlFilterTransformer transformer = new htmlFilterContentTransformer(); final Reader is = new FileReader(args[0]); final FileOutputStream fos = new FileOutputStream(new File(args[0] + ".out")); - final Writer os = new htmlFilterWriter(fos, "UTF-8",scraper, transformer, false); + final Writer os = new htmlFilterWriter(fos, Charset.forName("UTF-8"),scraper, transformer, false); int i; while ((i = is.read(buffer)) > 0) os.write(buffer, 0, i); os.close(); diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java index da5e50704..e65507441 100644 --- a/source/de/anomic/http/httpHeader.java +++ b/source/de/anomic/http/httpHeader.java @@ -42,6 +42,7 @@ import java.io.FileReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.Charset; import java.text.Collator; import java.util.Date; import java.util.HashMap; @@ -54,6 +55,7 @@ import java.util.Vector; import de.anomic.server.serverCore; import de.anomic.server.serverDate; +import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyURL; @@ -946,13 +948,20 @@ public final class httpHeader extends TreeMap implements Map 0)) { diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 9ab5cb6f8..56ad37f46 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -704,7 +705,7 @@ public final class plasmaParser { // parsing the content final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); final htmlFilterWriter writer = new htmlFilterWriter(null,null,scraper,null,false); - serverFileUtils.copy(htmlFilter, writer, charset); + serverFileUtils.copy(htmlFilter, writer, Charset.forName(charset)); writer.close(); //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); //serverFileUtils.copy(sourceFile, hfos); diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java index 634051277..93490eee0 100644 --- a/source/de/anomic/server/serverFileUtils.java +++ b/source/de/anomic/server/serverFileUtils.java @@ -38,6 +38,7 @@ import java.io.PrintWriter; import java.io.Reader; import java.io.Serializable; import java.io.Writer; +import java.nio.charset.Charset; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; @@ -98,7 +99,7 @@ public final class serverFileUtils { return total; } - public static int copy(final File source, final String inputCharset, final Writer dest) throws IOException { + public static int copy(final File source, final Charset inputCharset, final Writer dest) throws IOException { InputStream fis = null; try { fis = new FileInputStream(source); @@ -108,7 +109,7 @@ public final class serverFileUtils { } } - public static int copy(final InputStream source, final Writer dest, final String inputCharset) throws IOException { + public static int copy(final InputStream source, final Writer dest, final Charset inputCharset) throws IOException { final InputStreamReader reader = new InputStreamReader(source,inputCharset); return copy(reader,dest); } @@ -158,7 +159,7 @@ public final class serverFileUtils { fos = new FileOutputStream(dest); copy(source, fos, count); } finally { - if (fos != null) try {fos.close();} catch (final Exception e) {} + if (fos != null) try {fos.close();} catch (final Exception e) { serverLog.logWarning("FileUtils", "cannot close FileOutputStream for "+ dest +"! "+ e.getMessage()); } } } @@ -393,8 +394,7 @@ public final class serverFileUtils { } pw.println("# EOF"); pw.close(); - file.delete(); - tf.renameTo(file); + forceMove(tf, file); } public static Set loadSet(final File file, final int chunksize, final boolean tree) throws IOException { @@ -437,8 +437,7 @@ public final class serverFileUtils { } os.close(); } - file.delete(); - tf.renameTo(file); + forceMove(tf, file); } public static void saveSet(final File file, final String format, final kelondroRowSet set, final String sep) throws IOException { @@ -469,8 +468,21 @@ public final class serverFileUtils { } os.close(); } - file.delete(); - tf.renameTo(file); + forceMove(tf, file); + } + + /** + * @param from + * @param to + * @throws IOException + */ + private static void forceMove(final File from, final File to) throws IOException { + if(!(to.delete() && from.renameTo(to))) { + // do it manually + copy(from, to); + if(!from.delete()) + from.deleteOnExit(); + } } /** @@ -567,7 +579,7 @@ public final class serverFileUtils { * @return * @throws IOException */ - public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final String charSet) throws IOException { + public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final Charset charSet) throws IOException { // the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader." final Reader sourceReader = new InputStreamReader(data, charSet); @@ -581,7 +593,8 @@ public final class serverFileUtils { writer.flush(); return count; } - public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final String charSet) throws IOException { + + public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final Charset charSet) throws IOException { // the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader." assert writer0 != null; assert writer1 != null;