- fixed UnsupportedEncoding (in proxy) using defaultCharset if no characterEncoding can be determined

- serverFileUtils.copy* use now Charset instead of String
- added some warnings for ignored exceptions


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5043 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
danielr 17 years ago
parent 3ac1988059
commit 8422ee5ec4

@ -46,6 +46,7 @@ import de.anomic.index.indexReferenceBlacklist;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
@ -79,7 +80,7 @@ prop.putHTML("asd", "0");
yacyURL testurl = null;
try {
testurl = new yacyURL(urlstring, null);
} catch (final MalformedURLException e) { }
} catch (final MalformedURLException e) { testurl = null; }
if(testurl != null) {
prop.putHTML("testlist_url",testurl.toString());
if(plasmaSwitchboard.urlBlacklist.isListed(indexReferenceBlacklist.BLACKLIST_CRAWLER, testurl))
@ -139,7 +140,9 @@ prop.putHTML("asd", "0");
}
final File BlackListFile = new File(listManager.listsPath, blacklistToUse);
BlackListFile.delete();
if(!BlackListFile.delete()) {
serverLog.logWarning("Blacklist", "file "+ BlackListFile +" could not be deleted!");
}
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
listManager.removeFromListSet(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
@ -407,7 +410,7 @@ prop.putHTML("asd", "0");
} catch (final IOException e) {
e.printStackTrace();
} finally {
if (pw != null) try { pw.close(); } catch (final Exception e){ /* */}
if (pw != null) try { pw.close(); } catch (final Exception e){ serverLog.logWarning("Blacklist", "could not close stream to "+ blacklistToUse +"! "+ e.getMessage());}
}
// add to blacklist

@ -35,6 +35,7 @@ import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -139,7 +140,7 @@ public class CacheAdmin_p {
String sourceCharset = resInfo.getCharacterEncoding();
if (sourceCharset == null) sourceCharset = "UTF-8";
final String mimeType = resInfo.getMimeType();
serverFileUtils.copy(file, sourceCharset, writer);
serverFileUtils.copy(file, Charset.forName(sourceCharset), writer);
writer.close();
final plasmaParserDocument document = switchboard.parser.transformScraper(url, mimeType, sourceCharset, scraper);

@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -485,7 +486,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// scrape content
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost", null));
final Writer writer = new htmlFilterWriter(null, null, scraper, null, false);
serverFileUtils.copy(new ByteArrayInputStream(page), writer, "UTF-8");
serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName("UTF-8"));
return scraper;
}
@ -500,7 +501,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// scrape content
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);
final Writer writer = new htmlFilterWriter(null, null, scraper, null, false);
serverFileUtils.copy(new ByteArrayInputStream(page), writer, "UTF-8");
serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName("UTF-8"));
return scraper;
}

@ -41,6 +41,7 @@ import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.Enumeration;
import java.util.Properties;
@ -73,11 +74,11 @@ public final class htmlFilterWriter extends Writer {
public htmlFilterWriter(
final OutputStream outStream,
final String outputStreamCharset,
final Charset charSet,
final htmlFilterScraper scraper,
final htmlFilterTransformer transformer,
final boolean passbyIfBinarySuspect
) throws UnsupportedEncodingException {
) {
this.outStream = outStream;
this.scraper = scraper;
this.transformer = transformer;
@ -93,7 +94,7 @@ public final class htmlFilterWriter extends Writer {
this.passbyIfBinarySuspect = passbyIfBinarySuspect;
if (this.outStream != null) {
this.out = new OutputStreamWriter(this.outStream,(outputStreamCharset == null)?"UTF-8":outputStreamCharset);
this.out = new OutputStreamWriter(this.outStream,(charSet == null)?Charset.defaultCharset():charSet);
}
}
@ -558,7 +559,7 @@ public final class htmlFilterWriter extends Writer {
final htmlFilterTransformer transformer = new htmlFilterContentTransformer();
final Reader is = new FileReader(args[0]);
final FileOutputStream fos = new FileOutputStream(new File(args[0] + ".out"));
final Writer os = new htmlFilterWriter(fos, "UTF-8",scraper, transformer, false);
final Writer os = new htmlFilterWriter(fos, Charset.forName("UTF-8"),scraper, transformer, false);
int i;
while ((i = is.read(buffer)) > 0) os.write(buffer, 0, i);
os.close();

@ -42,6 +42,7 @@ import java.io.FileReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.Collator;
import java.util.Date;
import java.util.HashMap;
@ -54,6 +55,7 @@ import java.util.Vector;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
@ -946,13 +948,20 @@ public final class httpHeader extends TreeMap<String, String> implements Map<Str
/**
* @param header
* @return
* @return a supported Charset, so data can be encoded (may not be correct)
*/
static String getCharSet(final httpHeader header) {
String charSet = header.getCharacterEncoding();
if (charSet == null) {
charSet = DEFAULT_CHARSET;
static Charset getCharSet(final httpHeader header) {
String charSetName = header.getCharacterEncoding();
if (charSetName == null) {
// no character encoding is sent by the server
charSetName = DEFAULT_CHARSET;
}
return charSet;
// maybe the charset is valid but not installed on this computer
if(!Charset.isSupported(charSetName)) {
serverLog.logWarning("httpHeader", "charset '"+ charSetName +"' is not supported on this machine, using default ("+ Charset.defaultCharset().name() +")");
// use system default
return Charset.defaultCharset();
}
return Charset.forName(charSetName);
}
}

@ -65,6 +65,7 @@ import java.net.NoRouteToHostException;
import java.net.Socket;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
@ -549,12 +550,12 @@ public final class httpdProxyHandler {
// make a transformer
theLogger.logFine(reqID +" create transformer for URL " + url);
//hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0));
final String charSet = httpHeader.getCharSet(responseHeader);
final Charset charSet = httpHeader.getCharSet(responseHeader);
hfos = new htmlFilterWriter(outStream,charSet, null, transformer, (ext.length() == 0));
} else {
// simply pass through without parsing
theLogger.logFine(reqID +" create passthrough for URL " + url + ", extension '" + ext + "', mime-type '" + responseHeader.mime() + "'");
hfos = new OutputStreamWriter(outStream, httpHeader.getCharSet(res.getResponseHeader()));
hfos = new OutputStreamWriter(outStream, httpHeader.getCharSet(responseHeader));
}
// handle incoming cookies
@ -757,7 +758,7 @@ public final class httpdProxyHandler {
//respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
// determine the content charset
final String charSet = httpHeader.getCharSet(cachedResponseHeader);
final Charset charSet = httpHeader.getCharSet(cachedResponseHeader);
// make a transformer
final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
@ -801,7 +802,7 @@ public final class httpdProxyHandler {
try {
final InputStream data = res.getDataAsStream();
if (data == null) return;
final String charSet = httpHeader.getCharSet(res.getResponseHeader());
final Charset charSet = httpHeader.getCharSet(res.getResponseHeader());
serverFileUtils.copyToWriter(new BufferedInputStream(data), hfos, charSet);
} finally {
res.closeStream();
@ -813,7 +814,7 @@ public final class httpdProxyHandler {
try {
final InputStream data = res.getDataAsStream();
if (data == null) return;
final String charSet = httpHeader.getCharSet(res.getResponseHeader());
final Charset charSet = httpHeader.getCharSet(res.getResponseHeader());
serverFileUtils.copyToWriters(new BufferedInputStream(data), hfos, new BufferedWriter(new OutputStreamWriter(byteStream, charSet)) , charSet);
} finally {
res.closeStream();

@ -30,6 +30,7 @@ import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.zip.ZipEntry;
@ -117,7 +118,7 @@ public class odtParser extends AbstractParser implements Parser {
// extract data
final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
final OpenDocumentTextInputStream odStream = new OpenDocumentTextInputStream(zipFileEntryStream);
serverFileUtils.copy(odStream, writer, "UTF-8");
serverFileUtils.copy(odStream, writer, Charset.forName("UTF-8"));
// close readers and writers
odStream.close();

@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.LinkedList;
@ -124,7 +125,7 @@ public class rssParser extends AbstractParser implements Parser {
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(itemURL);
final Writer writer = new htmlFilterWriter(null, null, scraper, null, false);
serverFileUtils.copy(new ByteArrayInputStream(itemContent.getBytes("UTF-8")), writer, "UTF-8");
serverFileUtils.copy(new ByteArrayInputStream(itemContent.getBytes("UTF-8")), writer, Charset.forName("UTF-8"));
final String itemHeadline = scraper.getTitle();
if ((itemHeadline != null) && (itemHeadline.length() > 0)) {

@ -35,6 +35,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -704,7 +705,7 @@ public final class plasmaParser {
// parsing the content
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);
final htmlFilterWriter writer = new htmlFilterWriter(null,null,scraper,null,false);
serverFileUtils.copy(htmlFilter, writer, charset);
serverFileUtils.copy(htmlFilter, writer, Charset.forName(charset));
writer.close();
//OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);
//serverFileUtils.copy(sourceFile, hfos);

@ -38,6 +38,7 @@ import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
@ -98,7 +99,7 @@ public final class serverFileUtils {
return total;
}
public static int copy(final File source, final String inputCharset, final Writer dest) throws IOException {
public static int copy(final File source, final Charset inputCharset, final Writer dest) throws IOException {
InputStream fis = null;
try {
fis = new FileInputStream(source);
@ -108,7 +109,7 @@ public final class serverFileUtils {
}
}
public static int copy(final InputStream source, final Writer dest, final String inputCharset) throws IOException {
public static int copy(final InputStream source, final Writer dest, final Charset inputCharset) throws IOException {
final InputStreamReader reader = new InputStreamReader(source,inputCharset);
return copy(reader,dest);
}
@ -158,7 +159,7 @@ public final class serverFileUtils {
fos = new FileOutputStream(dest);
copy(source, fos, count);
} finally {
if (fos != null) try {fos.close();} catch (final Exception e) {}
if (fos != null) try {fos.close();} catch (final Exception e) { serverLog.logWarning("FileUtils", "cannot close FileOutputStream for "+ dest +"! "+ e.getMessage()); }
}
}
@ -393,8 +394,7 @@ public final class serverFileUtils {
}
pw.println("# EOF");
pw.close();
file.delete();
tf.renameTo(file);
forceMove(tf, file);
}
public static Set<String> loadSet(final File file, final int chunksize, final boolean tree) throws IOException {
@ -437,8 +437,7 @@ public final class serverFileUtils {
}
os.close();
}
file.delete();
tf.renameTo(file);
forceMove(tf, file);
}
public static void saveSet(final File file, final String format, final kelondroRowSet set, final String sep) throws IOException {
@ -469,8 +468,21 @@ public final class serverFileUtils {
}
os.close();
}
file.delete();
tf.renameTo(file);
forceMove(tf, file);
}
/**
* @param from
* @param to
* @throws IOException
*/
private static void forceMove(final File from, final File to) throws IOException {
if(!(to.delete() && from.renameTo(to))) {
// do it manually
copy(from, to);
if(!from.delete())
from.deleteOnExit();
}
}
/**
@ -567,7 +579,7 @@ public final class serverFileUtils {
* @return
* @throws IOException
*/
public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final String charSet) throws IOException {
public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final Charset charSet) throws IOException {
// the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader."
final Reader sourceReader = new InputStreamReader(data, charSet);
@ -581,7 +593,8 @@ public final class serverFileUtils {
writer.flush();
return count;
}
public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final String charSet) throws IOException {
public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final Charset charSet) throws IOException {
// the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader."
assert writer0 != null;
assert writer1 != null;

Loading…
Cancel
Save