diff --git a/htroot/CacheResource_p.java b/htroot/CacheResource_p.java index e5536e168..ac151bdf1 100644 --- a/htroot/CacheResource_p.java +++ b/htroot/CacheResource_p.java @@ -27,6 +27,7 @@ import java.net.MalformedURLException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -87,10 +88,17 @@ public class CacheResource_p { // because for display a servlet html variable is use // which is internally processed using utf-8, we need to convert encoding of cached resource - final String charset = responseHeader.getCharacterEncoding(); - if (charset != null && !charset.equalsIgnoreCase("utf-8")) { - CharBuffer cb = Charset.forName(charset).decode(ByteBuffer.wrap(resource)); - ByteBuffer x = Charset.forName("UTF-16").encode(cb); // encode to a default java string (which uses utf-16 and is handled correct for servlet content) + final String charsetName = responseHeader.getCharacterEncoding(); + if (charsetName != null && !charsetName.equalsIgnoreCase(StandardCharsets.UTF_8.name())) { + Charset decoderCharset; + /* Specified charset might be incorrect or not supported */ + if(Charset.isSupported(charsetName)) { + decoderCharset = Charset.forName(charsetName); + } else { + decoderCharset = StandardCharsets.UTF_8; + } + CharBuffer cb = decoderCharset.decode(ByteBuffer.wrap(resource)); + ByteBuffer x = StandardCharsets.UTF_16.encode(cb); // encode to a default java string (which uses utf-16 and is handled correct for servlet content) prop.put("resource", x.asCharBuffer().toString()); } else { diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 3050ebde7..fe1a6c813 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; @@ -82,7 +83,7 @@ public class Vocabulary_p { final boolean discoverFromAuthor = post.get("discovermethod", "").equals("author"); final boolean discoverFromCSV = post.get("discovermethod", "").equals("csv"); final String discoverFromCSVPath = post.get("discoverpath", "").replaceAll("%20", " "); - final String discoverFromCSVCharset = post.get("charset", "UTF-8"); + final String discoverFromCSVCharset = post.get("charset", StandardCharsets.UTF_8.name()); final int discovercolumnliteral = post.getInt("discovercolumnliteral", 0); final int discovercolumnsynonyms = post.getInt("discovercolumnsynonyms", -1); final int discovercolumnobjectlink = post.getInt("discovercolumnobjectlink", -1); diff --git a/htroot/api/ymarks/import_ymark.java b/htroot/api/ymarks/import_ymark.java index 9dc4a7de9..5fa57b101 100644 --- a/htroot/api/ymarks/import_ymark.java +++ b/htroot/api/ymarks/import_ymark.java @@ -5,13 +5,15 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.concurrent.ArrayBlockingQueue; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; +import org.xml.sax.SAXException; + import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; @@ -36,8 +38,6 @@ import net.yacy.search.Switchboard; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; -import org.xml.sax.SAXException; - public class import_ymark { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { @@ -106,15 +106,7 @@ public class import_ymark { prop.put("status", "1"); */ } else { - MonitoredReader reader = null; - try { - reader = new MonitoredReader(new InputStreamReader(stream,"UTF-8"), 1024*16, bytes.length); - } catch (final UnsupportedEncodingException e1) { - //TODO: display an error message - ConcurrentLog.logException(e1); - prop.put("status", "0"); - return prop; - } + MonitoredReader reader = new MonitoredReader(new InputStreamReader(stream, StandardCharsets.UTF_8), 1024*16, bytes.length); if(post.get("importer").equals("html") && reader != null) { final YMarkHTMLImporter htmlImporter = new YMarkHTMLImporter(reader, queueSize, root); InstantBusyThread.oneTimeJob(htmlImporter, 0); @@ -206,7 +198,7 @@ public class import_ymark { final File in = new File(sb.workPath, "content.rdf.u8.gz"); final InputStream gzip = new FileInputStream(in); final InputStream content = new GZIPInputStream(gzip); - final InputStreamReader reader = new InputStreamReader(content, "UTF-8"); + final InputStreamReader reader = new InputStreamReader(content, StandardCharsets.UTF_8); final BufferedReader breader = new BufferedReader(reader); final MonitoredReader mreader = new MonitoredReader(breader, 1024*1024, in.length()); diff --git a/source/net/yacy/contentcontrol/SMWListSyncThread.java b/source/net/yacy/contentcontrol/SMWListSyncThread.java index ed4b432e4..805aa6279 100644 --- a/source/net/yacy/contentcontrol/SMWListSyncThread.java +++ b/source/net/yacy/contentcontrol/SMWListSyncThread.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.protocol.ClientIdentification; @@ -145,7 +146,7 @@ public class SMWListSyncThread { InputStreamReader reader = null; try { reader = new InputStreamReader( - urlImport.openStream(), "UTF-8"); + urlImport.openStream(), StandardCharsets.UTF_8); } catch (final Exception e) { ConcurrentLog.logException(e); this.runningjob = false; diff --git a/source/net/yacy/cora/document/WordCache.java b/source/net/yacy/cora/document/WordCache.java index e06f55b28..491d30cd2 100644 --- a/source/net/yacy/cora/document/WordCache.java +++ b/source/net/yacy/cora/document/WordCache.java @@ -26,6 +26,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.HashSet; @@ -73,7 +74,7 @@ public class WordCache { if (file.getName().endsWith(".gz")) { is = new GZIPInputStream(is); } - final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String l; StringBuilder sb; try { diff --git a/source/net/yacy/cora/document/encoding/UTF8.java b/source/net/yacy/cora/document/encoding/UTF8.java index e221083b9..e7ad4b706 100644 --- a/source/net/yacy/cora/document/encoding/UTF8.java +++ b/source/net/yacy/cora/document/encoding/UTF8.java @@ -25,6 +25,7 @@ package net.yacy.cora.document.encoding; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Comparator; import org.apache.http.entity.ContentType; @@ -43,7 +44,7 @@ public class UTF8 implements Comparator { public final static Charset charset; static { - charset = Charset.forName("UTF-8"); + charset = StandardCharsets.UTF_8; } private final static ContentType contentType = ContentType.TEXT_PLAIN.withCharset(charset); diff --git a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java index f4a9f9c12..ccc499d82 100644 --- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java +++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java @@ -21,6 +21,7 @@ package net.yacy.cora.federate.opensearch; import java.io.IOException; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -63,8 +64,8 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme tmps = tmps.replace("{startPage}", ""); tmps = tmps.replace("{count}", Integer.toString(rows)); tmps = tmps.replace("{language}", ""); - tmps = tmps.replace("{inputEncoding}", "UTF-8"); - tmps = tmps.replace("{outputEncoding}", "UTF-8"); + tmps = tmps.replace("{inputEncoding}", StandardCharsets.UTF_8.name()); + tmps = tmps.replace("{outputEncoding}", StandardCharsets.UTF_8.name()); return tmps.replace("{searchTerms}", query); } diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index abce9ba6d..fc0ce9b99 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -24,6 +24,7 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Map; import java.util.Set; @@ -204,7 +205,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo // during the solr query we set the thread name to the query string to get more debugging info in thread dumps String threadname = Thread.currentThread().getName(); - String ql = ""; try {ql = URLDecoder.decode(req.getParams().toString(), "UTF-8");} catch (UnsupportedEncodingException e) {} + String ql = ""; try {ql = URLDecoder.decode(req.getParams().toString(), StandardCharsets.UTF_8.name());} catch (UnsupportedEncodingException e) {} Thread.currentThread().setName("solr query: " + ql); // for debugging in Threaddump ConcurrentLog.info("EmbeddedSolrConnector.query", "QUERY: " + ql); //System.out.println("EmbeddedSolrConnector.query * QUERY: " + ql); System.out.println("STACKTRACE: " + ConcurrentLog.stackTrace()); @@ -330,7 +331,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo if (this.server == null) throw new IOException("server disconnected"); // during the solr query we set the thread name to the query string to get more debugging info in thread dumps String threadname = Thread.currentThread().getName(); - String ql = ""; try {ql = URLDecoder.decode(params.toString(), "UTF-8");} catch (UnsupportedEncodingException e) {} + String ql = ""; try {ql = URLDecoder.decode(params.toString(), StandardCharsets.UTF_8.name());} catch (UnsupportedEncodingException e) {} Thread.currentThread().setName("solr query: q=" + ql); ConcurrentLog.info("EmbeddedSolrConnector.getResponseByParams", "QUERY: " + ql); //System.out.println("EmbeddedSolrConnector.getResponseByParams * QUERY: " + ql); System.out.println("STACKTRACE: " + ConcurrentLog.stackTrace()); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index bcba91477..755cdde18 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -22,6 +22,7 @@ package net.yacy.cora.federate.solr.responsewriter; import java.io.IOException; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -178,8 +179,8 @@ public class GSAResponseWriter implements QueryResponseWriter { OpensearchResponseWriter.solitaireTag(writer, "Q", query); paramTag(writer, "sort", sort); paramTag(writer, "output", "xml_no_dtd"); - paramTag(writer, "ie", "UTF-8"); - paramTag(writer, "oe", "UTF-8"); + paramTag(writer, "ie", StandardCharsets.UTF_8.name()); + paramTag(writer, "oe", StandardCharsets.UTF_8.name()); paramTag(writer, "client", client); paramTag(writer, "q", query); paramTag(writer, "site", site); diff --git a/source/net/yacy/cora/geo/GeonamesLocation.java b/source/net/yacy/cora/geo/GeonamesLocation.java index 053fb9bba..b7bb5d02d 100644 --- a/source/net/yacy/cora/geo/GeonamesLocation.java +++ b/source/net/yacy/cora/geo/GeonamesLocation.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -92,7 +93,7 @@ public class GeonamesLocation implements Locations { entryName = entryName.substring(0, entryName.length() - 3) + "txt"; final ZipEntry ze = zf.getEntry(entryName); final InputStream is = zf.getInputStream(ze); - reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); } catch (final IOException e ) { log.warn(e); return; diff --git a/source/net/yacy/cora/geo/OpenGeoDBLocation.java b/source/net/yacy/cora/geo/OpenGeoDBLocation.java index 02542d468..95ebf42a2 100644 --- a/source/net/yacy/cora/geo/OpenGeoDBLocation.java +++ b/source/net/yacy/cora/geo/OpenGeoDBLocation.java @@ -28,6 +28,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -79,7 +80,7 @@ public class OpenGeoDBLocation implements Locations if ( file.getName().endsWith(".gz") ) { is = new GZIPInputStream(is); } - reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line; // read lines diff --git a/source/net/yacy/cora/protocol/ftp/FTPClient.java b/source/net/yacy/cora/protocol/ftp/FTPClient.java index 5978c328b..74bb90b27 100644 --- a/source/net/yacy/cora/protocol/ftp/FTPClient.java +++ b/source/net/yacy/cora/protocol/ftp/FTPClient.java @@ -49,6 +49,7 @@ import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.Socket; import java.net.SocketException; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -1967,7 +1968,7 @@ public class FTPClient { private void send(final String buf) throws IOException { if (this.clientOutput == null) return; - byte[] b = buf.getBytes("UTF-8"); + byte[] b = buf.getBytes(StandardCharsets.UTF_8); this.clientOutput.write(b, 0, b.length); this.clientOutput.write('\r'); this.clientOutput.write('\n'); diff --git a/source/net/yacy/cora/storage/Files.java b/source/net/yacy/cora/storage/Files.java index 8b9cbea65..ad6d096d9 100644 --- a/source/net/yacy/cora/storage/Files.java +++ b/source/net/yacy/cora/storage/Files.java @@ -34,6 +34,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.zip.GZIPInputStream; @@ -78,7 +79,7 @@ public class Files { public static BlockingQueue concurentLineReader(final File f) throws IOException { final BlockingQueue q = new LinkedBlockingQueue(); final InputStream is = read(f); - final BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); + final BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); Thread t = new Thread() { @Override public void run() { diff --git a/source/net/yacy/cora/storage/KeyList.java b/source/net/yacy/cora/storage/KeyList.java index d28944a3f..563056b49 100644 --- a/source/net/yacy/cora/storage/KeyList.java +++ b/source/net/yacy/cora/storage/KeyList.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -59,7 +60,7 @@ public class KeyList implements Iterable { if (file.getName().endsWith(".gz")) { is = new GZIPInputStream(is); } - final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String l; try { while ((l = reader.readLine()) != null) { diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index 306341cc9..cc6f57265 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -26,6 +26,7 @@ package net.yacy.crawler.retrieval; +import java.nio.charset.StandardCharsets; import java.util.Date; import net.yacy.cora.document.analysis.Classification; @@ -858,7 +859,7 @@ public class Response { final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.getContentType()); if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url()); try { - return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content); + return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content); } catch (final Exception e) { return null; } diff --git a/source/net/yacy/data/BookmarkHelper.java b/source/net/yacy/data/BookmarkHelper.java index 86f17ad90..7747135c3 100644 --- a/source/net/yacy/data/BookmarkHelper.java +++ b/source/net/yacy/data/BookmarkHelper.java @@ -28,9 +28,9 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; @@ -44,6 +44,12 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; @@ -58,12 +64,6 @@ import net.yacy.document.parser.html.TransformerWriter; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.util.FileUtils; -import org.w3c.dom.Document; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; - public class BookmarkHelper { @@ -117,16 +117,12 @@ public class BookmarkHelper { // -------------------------------------- public static int importFromBookmarks(final BookmarksDB db, final DigestURL baseURL, final String input, final String tag, final boolean importPublic){ - try { - // convert string to input stream - final ByteArrayInputStream byteIn = new ByteArrayInputStream(UTF8.getBytes(input)); - final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8"); - - // import stream - return importFromBookmarks(db, baseURL, reader, tag, importPublic); - } catch (final UnsupportedEncodingException e) { - return 0; - } + // convert string to input stream + final ByteArrayInputStream byteIn = new ByteArrayInputStream(UTF8.getBytes(input)); + final InputStreamReader reader = new InputStreamReader(byteIn, StandardCharsets.UTF_8); + + // import stream + return importFromBookmarks(db, baseURL, reader, tag, importPublic); } private static int importFromBookmarks(final BookmarksDB db, final DigestURL baseURL, final InputStreamReader input, final String tag, final boolean importPublic){ diff --git a/source/net/yacy/data/Translator.java b/source/net/yacy/data/Translator.java index dd2a27b56..c659e6274 100644 --- a/source/net/yacy/data/Translator.java +++ b/source/net/yacy/data/Translator.java @@ -39,6 +39,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -153,7 +154,7 @@ public class Translator { StringBuilder content = new StringBuilder(); BufferedReader br = null; try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile),"UTF-8")); + br = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile), StandardCharsets.UTF_8)); String line = null; while( (line = br.readLine()) != null){ content.append(line).append(net.yacy.server.serverCore.CRLF_STRING); @@ -172,7 +173,7 @@ public class Translator { String processedContent = translate(content.toString(), translationList); BufferedWriter bw = null; try{ - bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(destFile),"UTF-8")); + bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(destFile), StandardCharsets.UTF_8)); bw.write(processedContent); bw.close(); }catch(final IOException e){ diff --git a/source/net/yacy/data/wiki/AbstractWikiParser.java b/source/net/yacy/data/wiki/AbstractWikiParser.java index 7f02a8a36..a8f0b7f14 100644 --- a/source/net/yacy/data/wiki/AbstractWikiParser.java +++ b/source/net/yacy/data/wiki/AbstractWikiParser.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.StringReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; abstract class AbstractWikiParser implements WikiParser { @@ -50,7 +51,7 @@ abstract class AbstractWikiParser implements WikiParser { @Override public String transform(String hostport, final byte[] content) throws UnsupportedEncodingException { - return transform(hostport, content, "UTF-8"); + return transform(hostport, content, StandardCharsets.UTF_8.name()); } @Override diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 265cbf45a..a594878e2 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -33,6 +33,7 @@ import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -805,7 +806,7 @@ dc_rights public String toString() { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { - final Writer osw = new OutputStreamWriter(baos, "UTF-8"); + final Writer osw = new OutputStreamWriter(baos, StandardCharsets.UTF_8); writeXML(osw); osw.close(); return UTF8.String(baos.toByteArray()); diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index a7538b236..dde93b7f7 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -31,6 +31,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -280,7 +281,7 @@ public class LibraryProvider { BufferedReader reader = null; try { - reader = new BufferedReader(new InputStreamReader(derewoTxtEntry, "UTF-8")); + reader = new BufferedReader(new InputStreamReader(derewoTxtEntry, StandardCharsets.UTF_8)); String line; // read until text starts diff --git a/source/net/yacy/document/ProbabilisticClassifier.java b/source/net/yacy/document/ProbabilisticClassifier.java index 09bd4b403..38bb8c2ff 100644 --- a/source/net/yacy/document/ProbabilisticClassifier.java +++ b/source/net/yacy/document/ProbabilisticClassifier.java @@ -24,6 +24,7 @@ package net.yacy.document; import java.io.File; import java.io.IOException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; import java.util.HashMap; @@ -61,7 +62,7 @@ public class ProbabilisticClassifier { public Context(String context_name, Map categoryExampleLinesFiles, File negativeExampleLines) throws IOException { this.context_name = context_name; int requiredSize = 0; - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; Map> categoryBuffer = new HashMap<>(); for (Map.Entry category: categoryExampleLinesFiles.entrySet()) { List list = Files.readAllLines(category.getValue().toPath(), charset); diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java index cc90196dc..f60af57d3 100644 --- a/source/net/yacy/document/content/DCEntry.java +++ b/source/net/yacy/document/content/DCEntry.java @@ -28,6 +28,7 @@ package net.yacy.document.content; import java.io.IOException; import java.io.OutputStreamWriter; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.text.Collator; import java.text.ParseException; import java.util.ArrayList; @@ -332,7 +333,7 @@ public class DCEntry extends MultiMapSolrParams { return new Document( getIdentifier(true), "text/html", - "UTF-8", + StandardCharsets.UTF_8.name(), this, languages, getSubject(), // might be null diff --git a/source/net/yacy/document/content/SurrogateReader.java b/source/net/yacy/document/content/SurrogateReader.java index 22557a97d..8caf267ec 100644 --- a/source/net/yacy/document/content/SurrogateReader.java +++ b/source/net/yacy/document/content/SurrogateReader.java @@ -32,6 +32,8 @@ import java.io.PushbackInputStream; import java.io.Reader; import java.io.StringReader; import java.net.MalformedURLException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; @@ -83,7 +85,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable { private final CrawlStacker crawlStacker; private final CollectionConfiguration configuration; private final int concurrency; - private String charsetName = "UTF-8"; + private Charset charset = StandardCharsets.UTF_8; private static final ThreadLocal tlSax = new ThreadLocal(); private static SAXParser getParser() throws SAXException { @@ -113,9 +115,9 @@ public class SurrogateReader extends DefaultHandler implements Runnable { this.elementName = null; this.surrogates = new ArrayBlockingQueue<>(queueSize); - Reader reader = new BufferedReader(new InputStreamReader(stream, this.charsetName)); + Reader reader = new BufferedReader(new InputStreamReader(stream, this.charset)); this.inputSource = new InputSource(reader); - this.inputSource.setEncoding(this.charsetName); + this.inputSource.setEncoding(this.charset.name()); this.inputStream = stream; try { @@ -131,7 +133,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable { // test the syntax of the stream by reading parts of the beginning try { if (isSolrDump()) { - BufferedReader br = new BufferedReader(new InputStreamReader(this.inputStream, this.charsetName)); + BufferedReader br = new BufferedReader(new InputStreamReader(this.inputStream, this.charset)); String line; while ((line = br.readLine()) != null) { if (!line.startsWith("")) continue; @@ -191,7 +193,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable { try { nbRead = this.inputStream.read(b); if(nbRead > 0) { - String s = new String(b, 0, nbRead, this.charsetName); + String s = new String(b, 0, nbRead, this.charset); if ((s.contains("") && s.contains("")) || s.startsWith("")) { res = true; } diff --git a/source/net/yacy/document/content/dao/PhpBB3Dao.java b/source/net/yacy/document/content/dao/PhpBB3Dao.java index d443ceac6..bdcdd77bf 100644 --- a/source/net/yacy/document/content/dao/PhpBB3Dao.java +++ b/source/net/yacy/document/content/dao/PhpBB3Dao.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -285,7 +286,7 @@ public class PhpBB3Dao implements Dao { outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml"); if (outputfiletmp.exists()) outputfiletmp.delete(); if (outputfile.exists()) outputfile.delete(); - osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8"); + osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), StandardCharsets.UTF_8); osw.write("\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n"); } e.writeXML(osw); diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java index 4e7de8208..8dc14e29a 100644 --- a/source/net/yacy/document/importer/MediawikiImporter.java +++ b/source/net/yacy/document/importer/MediawikiImporter.java @@ -39,6 +39,7 @@ import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; import java.lang.reflect.Array; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -161,7 +162,7 @@ public class MediawikiImporter extends Thread implements Importer { } else if (this.sourcefile.getName().endsWith(".gz")) { is = new GZIPInputStream(is); } - final BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8"), 4 * 1024 * 1024); + final BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, StandardCharsets.UTF_8), 4 * 1024 * 1024); String t; StringBuilder sb = new StringBuilder(); boolean page = false, text = false; @@ -520,7 +521,7 @@ public class MediawikiImporter extends Thread implements Importer { public void genDocument() throws Parser.Failure { try { this.url = new AnchorURL(this.urlStub + this.title); - final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html)); + final Document[] parsed = TextParser.parseSource(this.url, "text/html", StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html)); this.document = Document.mergeDocuments(this.url, "text/html", parsed); // the wiki parser is not able to find the proper title in the source text, so it must be set here this.document.setTitle(this.title); @@ -712,7 +713,7 @@ public class MediawikiImporter extends Thread implements Importer { if (this.osw == null) { // start writing a new file this.outputfilename = this.targetstub + "." + this.fc + ".xml.prt"; - this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), "UTF-8"); + this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), StandardCharsets.UTF_8); this.osw.write("\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n"); } ConcurrentLog.info("WIKITRANSLATION", "[CONSUME] Title: " + record.title); @@ -726,7 +727,7 @@ public class MediawikiImporter extends Thread implements Importer { this.rc = 0; this.fc++; this.outputfilename = this.targetstub + "." + this.fc + ".xml.prt"; - this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), "UTF-8"); + this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), StandardCharsets.UTF_8); this.osw.write("\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n"); } } diff --git a/source/net/yacy/document/parser/apkParser.java b/source/net/yacy/document/parser/apkParser.java index c91a7fad0..af4d1f4e0 100644 --- a/source/net/yacy/document/parser/apkParser.java +++ b/source/net/yacy/document/parser/apkParser.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Date; @@ -411,7 +412,7 @@ public class apkParser extends AbstractParser implements Parser { final byte[] asa = new byte[arscStream.available()]; arscStream.read(asa); int pos = 0; - final Charset charset = Charset.forName("UTF-8"); + final Charset charset = StandardCharsets.UTF_8; final List s = new ArrayList<>(); parseloop: while (pos < asa.length) { while (pos < asa.length && asa[pos] != 0) pos++; diff --git a/source/net/yacy/document/parser/docParser.java b/source/net/yacy/document/parser/docParser.java index f6a9af827..5dbae9848 100644 --- a/source/net/yacy/document/parser/docParser.java +++ b/source/net/yacy/document/parser/docParser.java @@ -28,6 +28,7 @@ package net.yacy.document.parser; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -111,7 +112,7 @@ public class docParser extends AbstractParser implements Parser { docs = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, keywlist, diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 30310e017..5fe02e088 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -28,6 +28,7 @@ import java.io.Writer; import java.lang.reflect.Array; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; @@ -1154,7 +1155,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { if (page == null) throw new IOException("no content in file " + file.toString()); // scrape document to look up charset - final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), "UTF-8", new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset); + final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset); String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset()); htmlFilter.close(); if (charset == null) charset = Charset.defaultCharset().toString(); diff --git a/source/net/yacy/document/parser/html/ScraperInputStream.java b/source/net/yacy/document/parser/html/ScraperInputStream.java index ae681f97f..199e16647 100644 --- a/source/net/yacy/document/parser/html/ScraperInputStream.java +++ b/source/net/yacy/document/parser/html/ScraperInputStream.java @@ -33,6 +33,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.Properties; import net.yacy.cora.document.id.DigestURL; @@ -77,12 +78,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener { try { this.reader = (inputStreamCharset == null) ? new InputStreamReader(this) : new InputStreamReader(this,inputStreamCharset); } catch (final UnsupportedEncodingException e) { - try { - this.reader = new InputStreamReader(this, "UTF-8"); - } catch (final UnsupportedEncodingException e1) { - // how is that possible? - this.reader = new InputStreamReader(this); - } + this.reader = new InputStreamReader(this, StandardCharsets.UTF_8); } this.writer = new TransformerWriter(null,null,scraper,transformer,passbyIfBinarySuspect); } diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index 6c122d902..4fd1dc24c 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -32,6 +32,7 @@ import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; import java.nio.charset.UnsupportedCharsetException; import java.util.HashSet; import java.util.LinkedHashMap; @@ -291,8 +292,8 @@ public class htmlParser extends AbstractParser implements Parser { encoding = CommonPattern.UNDERSCORE.matcher(encoding).replaceAll("-"); if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312"; - if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8"; - if (encoding.startsWith("US")) return "US-ASCII"; + if (encoding.matches(".*UTF[-_]?8.*")) return StandardCharsets.UTF_8.name(); + if (encoding.startsWith("US")) return StandardCharsets.US_ASCII.name(); if (encoding.startsWith("KOI")) return "KOI8-R"; // patch missing '-' @@ -385,7 +386,7 @@ public class htmlParser extends AbstractParser implements Parser { try { url = new AnchorURL(args[0]); final byte[] content = url.get(ClientIdentification.yacyInternetCrawlerAgent, null, null); - final Document[] document = new htmlParser().parse(url, "text/html", "utf-8", new VocabularyScraper(), 0, new ByteArrayInputStream(content)); + final Document[] document = new htmlParser().parse(url, "text/html", StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(content)); final String title = document[0].dc_title(); System.out.println(title); } catch (final MalformedURLException e) { diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java index 78d645fd9..6e656c3a0 100644 --- a/source/net/yacy/document/parser/images/genericImageParser.java +++ b/source/net/yacy/document/parser/images/genericImageParser.java @@ -34,6 +34,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; @@ -183,7 +184,7 @@ public class genericImageParser extends AbstractParser implements Parser { return new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, languages, keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords @@ -302,7 +303,7 @@ public class genericImageParser extends AbstractParser implements Parser { AnchorURL uri; try { uri = new AnchorURL("http://localhost/" + image.getName()); - final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), "UTF-8", new VocabularyScraper(), 0, new FileInputStream(image)); + final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new FileInputStream(image)); System.out.println(document[0].toString()); } catch (final MalformedURLException e) { e.printStackTrace(); diff --git a/source/net/yacy/document/parser/images/svgParser.java b/source/net/yacy/document/parser/images/svgParser.java index 1f2733247..920b85401 100644 --- a/source/net/yacy/document/parser/images/svgParser.java +++ b/source/net/yacy/document/parser/images/svgParser.java @@ -21,6 +21,7 @@ package net.yacy.document.parser.images; import java.io.EOFException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import javax.xml.parsers.ParserConfigurationException; @@ -118,7 +119,7 @@ public class svgParser extends AbstractParser implements Parser { Document[] docs = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, null, diff --git a/source/net/yacy/document/parser/mmParser.java b/source/net/yacy/document/parser/mmParser.java index 686b9cddb..0799ca97c 100644 --- a/source/net/yacy/document/parser/mmParser.java +++ b/source/net/yacy/document/parser/mmParser.java @@ -26,6 +26,7 @@ package net.yacy.document.parser; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -109,7 +110,7 @@ public class mmParser extends AbstractParser implements Parser { return new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, null, diff --git a/source/net/yacy/document/parser/odtParser.java b/source/net/yacy/document/parser/odtParser.java index fb8493c42..2e96ffbeb 100644 --- a/source/net/yacy/document/parser/odtParser.java +++ b/source/net/yacy/document/parser/odtParser.java @@ -29,6 +29,7 @@ package net.yacy.document.parser; import java.io.File; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.Enumeration; @@ -188,7 +189,7 @@ public class odtParser extends AbstractParser implements Parser { docs = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, languages, docKeywords, diff --git a/source/net/yacy/document/parser/ooxmlParser.java b/source/net/yacy/document/parser/ooxmlParser.java index c119f642a..c8cc2505c 100644 --- a/source/net/yacy/document/parser/ooxmlParser.java +++ b/source/net/yacy/document/parser/ooxmlParser.java @@ -29,6 +29,7 @@ package net.yacy.document.parser; import java.io.File; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.Enumeration; @@ -172,7 +173,7 @@ public class ooxmlParser extends AbstractParser implements Parser { docs = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, languages, docKeywords, diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index 5aeb2ed29..5d969f3f1 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -33,6 +33,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Date; @@ -169,7 +170,7 @@ public class pdfParser extends AbstractParser implements Parser { pdflinks = extractPdfLinks(pdfDoc); // get the fulltext (either per document or for each page) - final PDFTextStripper stripper = new PDFTextStripper("UTF-8"); + final PDFTextStripper stripper = new PDFTextStripper(StandardCharsets.UTF_8.name()); if (individualPages) { // this is a hack which stores individual pages of the source pdf into individual index documents @@ -193,7 +194,7 @@ public class pdfParser extends AbstractParser implements Parser { result[page] = new Document( new AnchorURL(loc + (loc.indexOf('?') > 0 ? '&' : '?') + individualPagePropertyname + '=' + (page + 1)), // these are virtual new pages; we cannot combine them with '#' as that would be removed when computing the urlhash mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, docKeywords, @@ -243,7 +244,7 @@ public class pdfParser extends AbstractParser implements Parser { result = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, docKeywords, diff --git a/source/net/yacy/document/parser/pptParser.java b/source/net/yacy/document/parser/pptParser.java index b41ff3eac..c3c8c3bdf 100644 --- a/source/net/yacy/document/parser/pptParser.java +++ b/source/net/yacy/document/parser/pptParser.java @@ -29,6 +29,7 @@ package net.yacy.document.parser; import java.io.BufferedInputStream; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -104,7 +105,7 @@ public class pptParser extends AbstractParser implements Parser { final Document[] docs = new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, keywlist, diff --git a/source/net/yacy/document/parser/psParser.java b/source/net/yacy/document/parser/psParser.java index 923df6f96..10f1fd5e1 100644 --- a/source/net/yacy/document/parser/psParser.java +++ b/source/net/yacy/document/parser/psParser.java @@ -34,6 +34,7 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Date; import net.yacy.cora.document.id.AnchorURL; @@ -104,7 +105,7 @@ public class psParser extends AbstractParser implements Parser { final Document[] docs = new Document[]{new Document( location, // url mimeType, // mime - "UTF-8", // charset + StandardCharsets.UTF_8.name(), // charset this, null, // languages null, // keywords diff --git a/source/net/yacy/document/parser/rtfParser.java b/source/net/yacy/document/parser/rtfParser.java index e6ea7d334..63a8c7bbc 100644 --- a/source/net/yacy/document/parser/rtfParser.java +++ b/source/net/yacy/document/parser/rtfParser.java @@ -28,6 +28,7 @@ package net.yacy.document.parser; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Date; import javax.swing.text.DefaultStyledDocument; @@ -73,7 +74,7 @@ public class rtfParser extends AbstractParser implements Parser { return new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, null, diff --git a/source/net/yacy/document/parser/sidAudioParser.java b/source/net/yacy/document/parser/sidAudioParser.java index 1eb216a3b..e0c164e21 100644 --- a/source/net/yacy/document/parser/sidAudioParser.java +++ b/source/net/yacy/document/parser/sidAudioParser.java @@ -26,7 +26,7 @@ package net.yacy.document.parser; import java.io.IOException; import java.io.InputStream; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -88,7 +88,7 @@ public class sidAudioParser extends AbstractParser implements Parser { return new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, null, @@ -134,9 +134,9 @@ public class sidAudioParser extends AbstractParser implements Parser { Map ret = new HashMap(); - ret.put("name", new String(name, Charset.forName("ISO-8859-1")).trim()); - ret.put("author", new String(author, Charset.forName("ISO-8859-1")).trim()); - ret.put("publisher", new String(copyright, Charset.forName("ISO-8859-1")).trim()); + ret.put("name", new String(name, StandardCharsets.ISO_8859_1).trim()); + ret.put("author", new String(author, StandardCharsets.ISO_8859_1).trim()); + ret.put("publisher", new String(copyright, StandardCharsets.ISO_8859_1).trim()); return ret; } diff --git a/source/net/yacy/document/parser/swfParser.java b/source/net/yacy/document/parser/swfParser.java index 154a85bf9..47e0ac516 100644 --- a/source/net/yacy/document/parser/swfParser.java +++ b/source/net/yacy/document/parser/swfParser.java @@ -29,6 +29,7 @@ package net.yacy.document.parser; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -110,7 +111,7 @@ public class swfParser extends AbstractParser implements Parser { return new Document[]{new Document( location, // url of the source document mimeType, // the documents mime type - "UTF-8", // charset of the document text + StandardCharsets.UTF_8.name(), // charset of the document text this, null, null, //keywords diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java index 3b096ebf1..bed4d56d3 100644 --- a/source/net/yacy/document/parser/torrentParser.java +++ b/source/net/yacy/document/parser/torrentParser.java @@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.List; import java.util.Map; @@ -126,7 +127,7 @@ public class torrentParser extends AbstractParser implements Parser { try { byte[] b = FileUtils.read(new File(args[0])); torrentParser parser = new torrentParser(); - Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, "UTF-8", new VocabularyScraper(), 0, new ByteArrayInputStream(b)); + Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(b)); Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false, 0); Map w = c.words(); for (Map.Entry e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText); diff --git a/source/net/yacy/document/parser/vsdParser.java b/source/net/yacy/document/parser/vsdParser.java index c826f8bd6..94f036ed1 100644 --- a/source/net/yacy/document/parser/vsdParser.java +++ b/source/net/yacy/document/parser/vsdParser.java @@ -28,6 +28,7 @@ package net.yacy.document.parser; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -112,7 +113,7 @@ public class vsdParser extends AbstractParser implements Parser { return new Document[]{new Document( location, // url of the source document mimeType, // the documents mime type - "UTF-8", // charset of the document text + StandardCharsets.UTF_8.name(), // charset of the document text this, null, // language keywords, diff --git a/source/net/yacy/document/parser/xlsParser.java b/source/net/yacy/document/parser/xlsParser.java index cf178c85e..3005c0bab 100644 --- a/source/net/yacy/document/parser/xlsParser.java +++ b/source/net/yacy/document/parser/xlsParser.java @@ -28,6 +28,7 @@ package net.yacy.document.parser; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Date; import net.yacy.cora.document.id.AnchorURL; @@ -123,7 +124,7 @@ public class xlsParser extends AbstractParser implements Parser { return new Document[]{new Document( location, mimeType, - "UTF-8", + StandardCharsets.UTF_8.name(), this, null, null, diff --git a/source/net/yacy/http/servlets/SolrServlet.java b/source/net/yacy/http/servlets/SolrServlet.java index 020ad8840..3b2966154 100644 --- a/source/net/yacy/http/servlets/SolrServlet.java +++ b/source/net/yacy/http/servlets/SolrServlet.java @@ -23,7 +23,7 @@ package net.yacy.http.servlets; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import javax.servlet.ServletException; import javax.servlet.ServletRequest; @@ -31,12 +31,6 @@ import javax.servlet.ServletResponse; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; -import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; -import net.yacy.cora.util.ConcurrentLog; -import net.yacy.search.Switchboard; -import net.yacy.search.schema.CollectionSchema; -import net.yacy.search.schema.WebgraphSchema; - import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.request.SolrQueryRequest; @@ -48,10 +42,15 @@ import org.apache.solr.servlet.SolrRequestParsers; import org.apache.solr.servlet.cache.Method; import org.apache.solr.util.FastWriter; +import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; +import net.yacy.cora.util.ConcurrentLog; +import net.yacy.search.Switchboard; +import net.yacy.search.schema.CollectionSchema; +import net.yacy.search.schema.WebgraphSchema; + public class SolrServlet extends HttpServlet { private static final long serialVersionUID = 1L; - private static final Charset UTF8 = Charset.forName("UTF-8"); @Override public void service(ServletRequest request, ServletResponse response) throws IOException, ServletException { @@ -89,8 +88,8 @@ public class SolrServlet extends HttpServlet { binWriter.write(response.getOutputStream(), solrReq, solrRsp); } else { String charset = ContentStreamBase.getCharsetFromContentType(ct); - Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8")) - ? new OutputStreamWriter(response.getOutputStream(), UTF8) + Writer out = (charset == null || charset.equalsIgnoreCase(StandardCharsets.UTF_8.name())) + ? new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8) : new OutputStreamWriter(response.getOutputStream(), charset); out = new FastWriter(out); responseWriter.write(out, solrReq, solrRsp); diff --git a/source/net/yacy/http/servlets/UrlProxyServlet.java b/source/net/yacy/http/servlets/UrlProxyServlet.java index 85e00aa62..66c84c2b1 100644 --- a/source/net/yacy/http/servlets/UrlProxyServlet.java +++ b/source/net/yacy/http/servlets/UrlProxyServlet.java @@ -7,6 +7,7 @@ import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.regex.PatternSyntaxException; import javax.servlet.Servlet; @@ -321,7 +322,7 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet { if (b == -1) { return null; } - return buf.toString("UTF-8"); + return buf.toString(StandardCharsets.UTF_8.name()); } /** diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index 502a5fe45..ef849300b 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -34,6 +34,7 @@ import java.lang.reflect.Array; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.AbstractMap; import java.util.Enumeration; import java.util.Iterator; @@ -478,7 +479,7 @@ public class YaCyDefaultServlet extends HttpServlet { return; } - byte[] data = dir.getBytes("UTF-8"); + byte[] data = dir.getBytes(StandardCharsets.UTF_8); response.setContentType(MimeTypes.Type.TEXT_HTML_UTF_8.asString()); response.setContentLength(data.length); response.setHeader(HeaderFramework.CACHE_CONTROL, "no-cache, no-store"); @@ -1093,7 +1094,7 @@ public class YaCyDefaultServlet extends HttpServlet { // simple text if (item.getContentType() == null || !item.getContentType().contains("charset")) { // old yacy clients use their local default charset, on most systems UTF-8 (I hope ;) - args.add(item.getFieldName(), item.getString("UTF-8")); + args.add(item.getFieldName(), item.getString(StandardCharsets.UTF_8.name())); } else { // use default encoding (given as header or ISO-8859-1) args.add(item.getFieldName(), item.getString()); diff --git a/source/net/yacy/http/servlets/YaCyProxyServlet.java b/source/net/yacy/http/servlets/YaCyProxyServlet.java index 690debcfe..1b42cbc81 100644 --- a/source/net/yacy/http/servlets/YaCyProxyServlet.java +++ b/source/net/yacy/http/servlets/YaCyProxyServlet.java @@ -8,6 +8,7 @@ import java.io.StringWriter; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.StringTokenizer; import java.util.regex.Matcher; @@ -299,7 +300,7 @@ public class YaCyProxyServlet extends ProxyServlet implements Servlet { if (b == -1) { return null; } - return buf.toString("UTF-8"); + return buf.toString(StandardCharsets.UTF_8.name()); } /** diff --git a/source/net/yacy/kelondro/util/FileUtils.java b/source/net/yacy/kelondro/util/FileUtils.java index 4e78cb69f..7911c97dc 100644 --- a/source/net/yacy/kelondro/util/FileUtils.java +++ b/source/net/yacy/kelondro/util/FileUtils.java @@ -44,6 +44,7 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -430,7 +431,7 @@ public final class FileUtils { PrintWriter pw = null; final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000)); try { - pw = new PrintWriter(tf, "UTF-8"); + pw = new PrintWriter(tf, StandardCharsets.UTF_8.name()); pw.println("# " + comment); String key, value; for ( final Map.Entry entry : props.entrySet() ) { @@ -518,11 +519,7 @@ public final class FileUtils { if ( a == null ) { return new ArrayList().iterator(); } - try { - return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8"))); - } catch (final UnsupportedEncodingException e ) { - return null; - } + return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), StandardCharsets.UTF_8))); } /** @@ -536,7 +533,7 @@ public final class FileUtils { final ArrayList list = new ArrayList(); BufferedReader br = null; try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), "UTF-8")); + br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), StandardCharsets.UTF_8)); while ( (line = br.readLine()) != null ) { list.add(line); diff --git a/source/net/yacy/search/query/AccessTracker.java b/source/net/yacy/search/query/AccessTracker.java index 07c06b15d..7f8587e03 100644 --- a/source/net/yacy/search/query/AccessTracker.java +++ b/source/net/yacy/search/query/AccessTracker.java @@ -32,6 +32,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; @@ -240,7 +241,7 @@ public class AccessTracker { raf.readFully(buffer); // we make a copy because that dramatically speeds up reading lines; RandomAccessFile.readLine is very slow raf.close(); ByteArrayInputStream bais = new ByteArrayInputStream(buffer); - BufferedReader reader = new BufferedReader(new InputStreamReader(bais, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(bais, StandardCharsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { // parse the line diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index e3338dbd2..c23b7e170 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -23,6 +23,7 @@ package net.yacy.search.query; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -209,7 +210,7 @@ public class QueryGoal { String ret; if (encodeHTML){ try { - ret = URLEncoder.encode(this.query_original, "UTF-8"); + ret = URLEncoder.encode(this.query_original, StandardCharsets.UTF_8.name()); } catch (final UnsupportedEncodingException e) { ret = this.query_original; } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 7094a1a51..bd2fd4716 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -30,6 +30,7 @@ import java.io.Serializable; import java.lang.reflect.Array; import java.net.InetAddress; import java.net.MalformedURLException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Date; @@ -318,7 +319,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri if (allAttr || contains(CollectionSchema.linkscount_i)) add(doc, CollectionSchema.linkscount_i, md.llocal() + md.lother()); if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal()); if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, md.lother()); - if (allAttr || contains(CollectionSchema.charset_s)) add(doc, CollectionSchema.charset_s, "UTF-8"); + if (allAttr || contains(CollectionSchema.charset_s)) add(doc, CollectionSchema.charset_s, StandardCharsets.UTF_8.name()); // coordinates if (md.lat() != 0.0 && md.lon() != 0.0) { diff --git a/source/net/yacy/server/http/ChunkedInputStream.java b/source/net/yacy/server/http/ChunkedInputStream.java index 961b2bb35..eef30c530 100644 --- a/source/net/yacy/server/http/ChunkedInputStream.java +++ b/source/net/yacy/server/http/ChunkedInputStream.java @@ -36,7 +36,10 @@ package net.yacy.server.http; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import org.eclipse.jetty.http.HttpMethod; /** @@ -292,16 +295,12 @@ public class ChunkedInputStream extends InputStream { * * @since 3.0 */ - private static String getAsciiString(final byte[] data) throws IOException { + private static String getAsciiString(final byte[] data) { if (data == null) { throw new IllegalArgumentException("Parameter may not be null"); } - try { - return new String(data, 0, data.length, "US-ASCII"); - } catch (final UnsupportedEncodingException e) { - throw new IOException("HttpClient requires ASCII support"); - } + return new String(data, 0, data.length, StandardCharsets.US_ASCII); } /** @@ -310,7 +309,7 @@ public class ChunkedInputStream extends InputStream { */ private void skipTrailerHeaders() throws IOException { for (; ;) { - String line = readLine(this.in, "US-ASCII"); + String line = readLine(this.in, StandardCharsets.US_ASCII); if ((line == null) || (line.trim().length() < 1)) break; } } @@ -330,7 +329,7 @@ public class ChunkedInputStream extends InputStream { * * @since 3.0 */ - private static String readLine(InputStream inputStream, String charset) throws IOException { + private static String readLine(InputStream inputStream, Charset charset) throws IOException { byte[] rawdata = readRawLine(inputStream); if (rawdata == null) { return null; @@ -354,9 +353,7 @@ public class ChunkedInputStream extends InputStream { /** - * Converts the byte array of HTTP content characters to a string. If - * the specified charset is not supported, default system encoding - * is used. + * Converts the byte array of HTTP content characters to a string. * * @param data the byte array to be encoded * @param offset the index of the first byte to encode @@ -370,22 +367,18 @@ public class ChunkedInputStream extends InputStream { final byte[] data, int offset, int length, - String charset + Charset charset ) { if (data == null) { throw new IllegalArgumentException("Parameter may not be null"); } - if (charset == null || charset.isEmpty()) { - throw new IllegalArgumentException("charset may not be null or empty"); + if (charset == null) { + throw new IllegalArgumentException("charset may not be null"); } - try { - return new String(data, offset, length, charset); - } catch (final UnsupportedEncodingException e) { - return new String(data, offset, length); - } + return new String(data, offset, length, charset); } /** diff --git a/source/net/yacy/server/http/TemplateEngine.java b/source/net/yacy/server/http/TemplateEngine.java index 570ca5cec..5158772b2 100644 --- a/source/net/yacy/server/http/TemplateEngine.java +++ b/source/net/yacy/server/http/TemplateEngine.java @@ -55,6 +55,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PushbackInputStream; +import java.nio.charset.StandardCharsets; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; @@ -415,7 +416,7 @@ public final class TemplateEngine { BufferedReader br = null; try{ //br = new BufferedReader(new InputStreamReader(new FileInputStream( filename ))); //Simple Include - br = new BufferedReader( new InputStreamReader(new FileInputStream( HTTPDFileHandler.getLocalizedFile(UTF8.String(filename))),"UTF-8") ); //YaCy (with Locales) + br = new BufferedReader( new InputStreamReader(new FileInputStream( HTTPDFileHandler.getLocalizedFile(UTF8.String(filename))), StandardCharsets.UTF_8) ); //YaCy (with Locales) //Read the Include String line = ""; while ((line = br.readLine()) != null) { diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java index d18a38c99..97ce36717 100644 --- a/test/net/yacy/document/parser/htmlParserTest.java +++ b/test/net/yacy/document/parser/htmlParserTest.java @@ -5,6 +5,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.List; import junit.framework.TestCase; import net.yacy.cora.document.id.AnchorURL; @@ -24,15 +25,15 @@ public class htmlParserTest extends TestCase { new String[]{null,null}, new String[]{"windows1250","windows-1250"}, new String[]{"windows_1250","windows-1250"}, - new String[]{"ISO-8859-1","ISO-8859-1"}, - new String[]{"ISO8859-1","ISO-8859-1"}, - new String[]{"ISO-88591","ISO-8859-1"}, - new String[]{"ISO88591","ISO-8859-1"}, - new String[]{"iso_8859_1","ISO-8859-1"}, + new String[]{"ISO-8859-1", StandardCharsets.ISO_8859_1.name()}, + new String[]{"ISO8859-1", StandardCharsets.ISO_8859_1.name()}, + new String[]{"ISO-88591", StandardCharsets.ISO_8859_1.name()}, + new String[]{"ISO88591", StandardCharsets.ISO_8859_1.name()}, + new String[]{"iso_8859_1", StandardCharsets.ISO_8859_1.name()}, new String[]{"cp-1252","windows-1252"}, new String[]{"gb_2312","gb2312"}, // was: x-EUC-CN new String[]{"gb_2312-80","gb2312"}, // was: x-EUC-CN - new String[]{"UTF-8;","UTF-8"} + new String[]{"UTF-8;", StandardCharsets.UTF_8.name()} }; for (int i=0; i < testStrings.length; i++) { @@ -93,7 +94,7 @@ public class htmlParserTest extends TestCase { // test link with inline html in text // expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt final AnchorURL url = new AnchorURL("http://localhost/"); - final String charset = "UTF-8"; + final String charset = StandardCharsets.UTF_8.name(); final String testhtml = "" + "testtext" // "testtext" + " Start" // "Start" @@ -126,7 +127,7 @@ public class htmlParserTest extends TestCase { @Test public void testParseToScraper_TagTest() throws Exception { final AnchorURL url = new AnchorURL("http://localhost/"); - final String charset = "UTF-8"; + final String charset = StandardCharsets.UTF_8.name(); final String textSource = "test text"; final String testhtml = "" + ""