Merge pull request #38 from luccioman/master

Refactoring : use StandardCharsets instead of hardcoded charset names
pull/41/head
Michael Peter Christen 9 years ago
commit 9a25751850

@ -27,6 +27,7 @@ import java.net.MalformedURLException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
@ -87,10 +88,17 @@ public class CacheResource_p {
// because for display a servlet html variable is use
// which is internally processed using utf-8, we need to convert encoding of cached resource
final String charset = responseHeader.getCharacterEncoding();
if (charset != null && !charset.equalsIgnoreCase("utf-8")) {
CharBuffer cb = Charset.forName(charset).decode(ByteBuffer.wrap(resource));
ByteBuffer x = Charset.forName("UTF-16").encode(cb); // encode to a default java string (which uses utf-16 and is handled correct for servlet content)
final String charsetName = responseHeader.getCharacterEncoding();
if (charsetName != null && !charsetName.equalsIgnoreCase(StandardCharsets.UTF_8.name())) {
Charset decoderCharset;
/* Specified charset might be incorrect or not supported */
if(Charset.isSupported(charsetName)) {
decoderCharset = Charset.forName(charsetName);
} else {
decoderCharset = StandardCharsets.UTF_8;
}
CharBuffer cb = decoderCharset.decode(ByteBuffer.wrap(resource));
ByteBuffer x = StandardCharsets.UTF_16.encode(cb); // encode to a default java string (which uses utf-16 and is handled correct for servlet content)
prop.put("resource", x.asCharBuffer().toString());
} else {

@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
@ -82,7 +83,7 @@ public class Vocabulary_p {
final boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
final boolean discoverFromCSV = post.get("discovermethod", "").equals("csv");
final String discoverFromCSVPath = post.get("discoverpath", "").replaceAll("%20", " ");
final String discoverFromCSVCharset = post.get("charset", "UTF-8");
final String discoverFromCSVCharset = post.get("charset", StandardCharsets.UTF_8.name());
final int discovercolumnliteral = post.getInt("discovercolumnliteral", 0);
final int discovercolumnsynonyms = post.getInt("discovercolumnsynonyms", -1);
final int discovercolumnobjectlink = post.getInt("discovercolumnobjectlink", -1);

@ -5,13 +5,15 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.xml.sax.SAXException;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
@ -36,8 +38,6 @@ import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.xml.sax.SAXException;
public class import_ymark {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
@ -106,15 +106,7 @@ public class import_ymark {
prop.put("status", "1");
*/
} else {
MonitoredReader reader = null;
try {
reader = new MonitoredReader(new InputStreamReader(stream,"UTF-8"), 1024*16, bytes.length);
} catch (final UnsupportedEncodingException e1) {
//TODO: display an error message
ConcurrentLog.logException(e1);
prop.put("status", "0");
return prop;
}
MonitoredReader reader = new MonitoredReader(new InputStreamReader(stream, StandardCharsets.UTF_8), 1024*16, bytes.length);
if(post.get("importer").equals("html") && reader != null) {
final YMarkHTMLImporter htmlImporter = new YMarkHTMLImporter(reader, queueSize, root);
InstantBusyThread.oneTimeJob(htmlImporter, 0);
@ -206,7 +198,7 @@ public class import_ymark {
final File in = new File(sb.workPath, "content.rdf.u8.gz");
final InputStream gzip = new FileInputStream(in);
final InputStream content = new GZIPInputStream(gzip);
final InputStreamReader reader = new InputStreamReader(content, "UTF-8");
final InputStreamReader reader = new InputStreamReader(content, StandardCharsets.UTF_8);
final BufferedReader breader = new BufferedReader(reader);
final MonitoredReader mreader = new MonitoredReader(breader, 1024*1024, in.length());

@ -4,6 +4,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
@ -145,7 +146,7 @@ public class SMWListSyncThread {
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
urlImport.openStream(), "UTF-8");
urlImport.openStream(), StandardCharsets.UTF_8);
} catch (final Exception e) {
ConcurrentLog.logException(e);
this.runningjob = false;

@ -26,6 +26,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
@ -73,7 +74,7 @@ public class WordCache {
if (file.getName().endsWith(".gz")) {
is = new GZIPInputStream(is);
}
final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String l;
StringBuilder sb;
try {

@ -25,6 +25,7 @@
package net.yacy.cora.document.encoding;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Comparator;
import org.apache.http.entity.ContentType;
@ -43,7 +44,7 @@ public class UTF8 implements Comparator<String> {
public final static Charset charset;
static {
charset = Charset.forName("UTF-8");
charset = StandardCharsets.UTF_8;
}
private final static ContentType contentType = ContentType.TEXT_PLAIN.withCharset(charset);

@ -21,6 +21,7 @@ package net.yacy.cora.federate.opensearch;
import java.io.IOException;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -63,8 +64,8 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme
tmps = tmps.replace("{startPage}", "");
tmps = tmps.replace("{count}", Integer.toString(rows));
tmps = tmps.replace("{language}", "");
tmps = tmps.replace("{inputEncoding}", "UTF-8");
tmps = tmps.replace("{outputEncoding}", "UTF-8");
tmps = tmps.replace("{inputEncoding}", StandardCharsets.UTF_8.name());
tmps = tmps.replace("{outputEncoding}", StandardCharsets.UTF_8.name());
return tmps.replace("{searchTerms}", query);
}

@ -24,6 +24,7 @@ package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
@ -204,7 +205,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String threadname = Thread.currentThread().getName();
String ql = ""; try {ql = URLDecoder.decode(req.getParams().toString(), "UTF-8");} catch (UnsupportedEncodingException e) {}
String ql = ""; try {ql = URLDecoder.decode(req.getParams().toString(), StandardCharsets.UTF_8.name());} catch (UnsupportedEncodingException e) {}
Thread.currentThread().setName("solr query: " + ql); // for debugging in Threaddump
ConcurrentLog.info("EmbeddedSolrConnector.query", "QUERY: " + ql);
//System.out.println("EmbeddedSolrConnector.query * QUERY: " + ql); System.out.println("STACKTRACE: " + ConcurrentLog.stackTrace());
@ -330,7 +331,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
if (this.server == null) throw new IOException("server disconnected");
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String threadname = Thread.currentThread().getName();
String ql = ""; try {ql = URLDecoder.decode(params.toString(), "UTF-8");} catch (UnsupportedEncodingException e) {}
String ql = ""; try {ql = URLDecoder.decode(params.toString(), StandardCharsets.UTF_8.name());} catch (UnsupportedEncodingException e) {}
Thread.currentThread().setName("solr query: q=" + ql);
ConcurrentLog.info("EmbeddedSolrConnector.getResponseByParams", "QUERY: " + ql);
//System.out.println("EmbeddedSolrConnector.getResponseByParams * QUERY: " + ql); System.out.println("STACKTRACE: " + ConcurrentLog.stackTrace());

@ -22,6 +22,7 @@ package net.yacy.cora.federate.solr.responsewriter;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@ -178,8 +179,8 @@ public class GSAResponseWriter implements QueryResponseWriter {
OpensearchResponseWriter.solitaireTag(writer, "Q", query);
paramTag(writer, "sort", sort);
paramTag(writer, "output", "xml_no_dtd");
paramTag(writer, "ie", "UTF-8");
paramTag(writer, "oe", "UTF-8");
paramTag(writer, "ie", StandardCharsets.UTF_8.name());
paramTag(writer, "oe", StandardCharsets.UTF_8.name());
paramTag(writer, "client", client);
paramTag(writer, "q", query);
paramTag(writer, "site", site);

@ -27,6 +27,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -92,7 +93,7 @@ public class GeonamesLocation implements Locations {
entryName = entryName.substring(0, entryName.length() - 3) + "txt";
final ZipEntry ze = zf.getEntry(entryName);
final InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
} catch (final IOException e ) {
log.warn(e);
return;

@ -28,6 +28,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -79,7 +80,7 @@ public class OpenGeoDBLocation implements Locations
if ( file.getName().endsWith(".gz") ) {
is = new GZIPInputStream(is);
}
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String line;
// read lines

@ -49,6 +49,7 @@ import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -1967,7 +1968,7 @@ public class FTPClient {
private void send(final String buf) throws IOException {
if (this.clientOutput == null) return;
byte[] b = buf.getBytes("UTF-8");
byte[] b = buf.getBytes(StandardCharsets.UTF_8);
this.clientOutput.write(b, 0, b.length);
this.clientOutput.write('\r');
this.clientOutput.write('\n');

@ -34,6 +34,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.zip.GZIPInputStream;
@ -78,7 +79,7 @@ public class Files {
public static BlockingQueue<String> concurentLineReader(final File f) throws IOException {
final BlockingQueue<String> q = new LinkedBlockingQueue<String>();
final InputStream is = read(f);
final BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
final BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
Thread t = new Thread() {
@Override
public void run() {

@ -31,6 +31,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@ -59,7 +60,7 @@ public class KeyList implements Iterable<String> {
if (file.getName().endsWith(".gz")) {
is = new GZIPInputStream(is);
}
final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String l;
try {
while ((l = reader.readLine()) != null) {

@ -26,6 +26,7 @@
package net.yacy.crawler.retrieval;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import net.yacy.cora.document.analysis.Classification;
@ -858,7 +859,7 @@ public class Response {
final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.getContentType());
if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url());
try {
return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
} catch (final Exception e) {
return null;
}

@ -28,9 +28,9 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
@ -44,6 +44,12 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -58,12 +64,6 @@ import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class BookmarkHelper {
@ -117,16 +117,12 @@ public class BookmarkHelper {
// --------------------------------------
public static int importFromBookmarks(final BookmarksDB db, final DigestURL baseURL, final String input, final String tag, final boolean importPublic){
try {
// convert string to input stream
final ByteArrayInputStream byteIn = new ByteArrayInputStream(UTF8.getBytes(input));
final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8");
// import stream
return importFromBookmarks(db, baseURL, reader, tag, importPublic);
} catch (final UnsupportedEncodingException e) {
return 0;
}
// convert string to input stream
final ByteArrayInputStream byteIn = new ByteArrayInputStream(UTF8.getBytes(input));
final InputStreamReader reader = new InputStreamReader(byteIn, StandardCharsets.UTF_8);
// import stream
return importFromBookmarks(db, baseURL, reader, tag, importPublic);
}
private static int importFromBookmarks(final BookmarksDB db, final DigestURL baseURL, final InputStreamReader input, final String tag, final boolean importPublic){

@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
@ -153,7 +154,7 @@ public class Translator {
StringBuilder content = new StringBuilder();
BufferedReader br = null;
try{
br = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile),"UTF-8"));
br = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile), StandardCharsets.UTF_8));
String line = null;
while( (line = br.readLine()) != null){
content.append(line).append(net.yacy.server.serverCore.CRLF_STRING);
@ -172,7 +173,7 @@ public class Translator {
String processedContent = translate(content.toString(), translationList);
BufferedWriter bw = null;
try{
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(destFile),"UTF-8"));
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(destFile), StandardCharsets.UTF_8));
bw.write(processedContent);
bw.close();
}catch(final IOException e){

@ -31,6 +31,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
abstract class AbstractWikiParser implements WikiParser {
@ -50,7 +51,7 @@ abstract class AbstractWikiParser implements WikiParser {
@Override
public String transform(String hostport, final byte[] content) throws UnsupportedEncodingException {
return transform(hostport, content, "UTF-8");
return transform(hostport, content, StandardCharsets.UTF_8.name());
}
@Override

@ -33,6 +33,7 @@ import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -805,7 +806,7 @@ dc_rights
public String toString() {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
final Writer osw = new OutputStreamWriter(baos, "UTF-8");
final Writer osw = new OutputStreamWriter(baos, StandardCharsets.UTF_8);
writeXML(osw);
osw.close();
return UTF8.String(baos.toByteArray());

@ -31,6 +31,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@ -280,7 +281,7 @@ public class LibraryProvider {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(derewoTxtEntry, "UTF-8"));
reader = new BufferedReader(new InputStreamReader(derewoTxtEntry, StandardCharsets.UTF_8));
String line;
// read until text starts

@ -24,6 +24,7 @@ package net.yacy.document;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
@ -61,7 +62,7 @@ public class ProbabilisticClassifier {
public Context(String context_name, Map<String, File> categoryExampleLinesFiles, File negativeExampleLines) throws IOException {
this.context_name = context_name;
int requiredSize = 0;
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
Map<String, List<String>> categoryBuffer = new HashMap<>();
for (Map.Entry<String, File> category: categoryExampleLinesFiles.entrySet()) {
List<String> list = Files.readAllLines(category.getValue().toPath(), charset);

@ -28,6 +28,7 @@ package net.yacy.document.content;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.text.Collator;
import java.text.ParseException;
import java.util.ArrayList;
@ -332,7 +333,7 @@ public class DCEntry extends MultiMapSolrParams {
return new Document(
getIdentifier(true),
"text/html",
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
languages,
getSubject(), // might be null

@ -32,6 +32,8 @@ import java.io.PushbackInputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
@ -83,7 +85,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
private final CrawlStacker crawlStacker;
private final CollectionConfiguration configuration;
private final int concurrency;
private String charsetName = "UTF-8";
private Charset charset = StandardCharsets.UTF_8;
private static final ThreadLocal<SAXParser> tlSax = new ThreadLocal<SAXParser>();
private static SAXParser getParser() throws SAXException {
@ -113,9 +115,9 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
this.elementName = null;
this.surrogates = new ArrayBlockingQueue<>(queueSize);
Reader reader = new BufferedReader(new InputStreamReader(stream, this.charsetName));
Reader reader = new BufferedReader(new InputStreamReader(stream, this.charset));
this.inputSource = new InputSource(reader);
this.inputSource.setEncoding(this.charsetName);
this.inputSource.setEncoding(this.charset.name());
this.inputStream = stream;
try {
@ -131,7 +133,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
// test the syntax of the stream by reading parts of the beginning
try {
if (isSolrDump()) {
BufferedReader br = new BufferedReader(new InputStreamReader(this.inputStream, this.charsetName));
BufferedReader br = new BufferedReader(new InputStreamReader(this.inputStream, this.charset));
String line;
while ((line = br.readLine()) != null) {
if (!line.startsWith("<doc>")) continue;
@ -191,7 +193,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
try {
nbRead = this.inputStream.read(b);
if(nbRead > 0) {
String s = new String(b, 0, nbRead, this.charsetName);
String s = new String(b, 0, nbRead, this.charset);
if ((s.contains("<response>") && s.contains("<result>")) || s.startsWith("<doc>")) {
res = true;
}

@ -31,6 +31,7 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
@ -285,7 +286,7 @@ public class PhpBB3Dao implements Dao {
outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
if (outputfiletmp.exists()) outputfiletmp.delete();
if (outputfile.exists()) outputfile.delete();
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), StandardCharsets.UTF_8);
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
}
e.writeXML(osw);

@ -39,6 +39,7 @@ import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@ -161,7 +162,7 @@ public class MediawikiImporter extends Thread implements Importer {
} else if (this.sourcefile.getName().endsWith(".gz")) {
is = new GZIPInputStream(is);
}
final BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, "UTF-8"), 4 * 1024 * 1024);
final BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, StandardCharsets.UTF_8), 4 * 1024 * 1024);
String t;
StringBuilder sb = new StringBuilder();
boolean page = false, text = false;
@ -520,7 +521,7 @@ public class MediawikiImporter extends Thread implements Importer {
public void genDocument() throws Parser.Failure {
try {
this.url = new AnchorURL(this.urlStub + this.title);
final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html));
final Document[] parsed = TextParser.parseSource(this.url, "text/html", StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html));
this.document = Document.mergeDocuments(this.url, "text/html", parsed);
// the wiki parser is not able to find the proper title in the source text, so it must be set here
this.document.setTitle(this.title);
@ -712,7 +713,7 @@ public class MediawikiImporter extends Thread implements Importer {
if (this.osw == null) {
// start writing a new file
this.outputfilename = this.targetstub + "." + this.fc + ".xml.prt";
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), "UTF-8");
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), StandardCharsets.UTF_8);
this.osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
}
ConcurrentLog.info("WIKITRANSLATION", "[CONSUME] Title: " + record.title);
@ -726,7 +727,7 @@ public class MediawikiImporter extends Thread implements Importer {
this.rc = 0;
this.fc++;
this.outputfilename = this.targetstub + "." + this.fc + ".xml.prt";
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), "UTF-8");
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(this.targetdir, this.outputfilename))), StandardCharsets.UTF_8);
this.osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
}
}

@ -26,6 +26,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
@ -411,7 +412,7 @@ public class apkParser extends AbstractParser implements Parser {
final byte[] asa = new byte[arscStream.available()];
arscStream.read(asa);
int pos = 0;
final Charset charset = Charset.forName("UTF-8");
final Charset charset = StandardCharsets.UTF_8;
final List<String> s = new ArrayList<>();
parseloop: while (pos < asa.length) {
while (pos < asa.length && asa[pos] != 0) pos++;

@ -28,6 +28,7 @@
package net.yacy.document.parser;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@ -111,7 +112,7 @@ public class docParser extends AbstractParser implements Parser {
docs = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
keywlist,

@ -28,6 +28,7 @@ import java.io.Writer;
import java.lang.reflect.Array;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
@ -1154,7 +1155,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (page == null) throw new IOException("no content in file " + file.toString());
// scrape document to look up charset
final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), "UTF-8", new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset);
final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), StandardCharsets.UTF_8.name(), new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset);
String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
htmlFilter.close();
if (charset == null) charset = Charset.defaultCharset().toString();

@ -33,6 +33,7 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import net.yacy.cora.document.id.DigestURL;
@ -77,12 +78,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
try {
this.reader = (inputStreamCharset == null) ? new InputStreamReader(this) : new InputStreamReader(this,inputStreamCharset);
} catch (final UnsupportedEncodingException e) {
try {
this.reader = new InputStreamReader(this, "UTF-8");
} catch (final UnsupportedEncodingException e1) {
// how is that possible?
this.reader = new InputStreamReader(this);
}
this.reader = new InputStreamReader(this, StandardCharsets.UTF_8);
}
this.writer = new TransformerWriter(null,null,scraper,transformer,passbyIfBinarySuspect);
}

@ -32,6 +32,7 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.HashSet;
import java.util.LinkedHashMap;
@ -291,8 +292,8 @@ public class htmlParser extends AbstractParser implements Parser {
encoding = CommonPattern.UNDERSCORE.matcher(encoding).replaceAll("-");
if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312";
if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8";
if (encoding.startsWith("US")) return "US-ASCII";
if (encoding.matches(".*UTF[-_]?8.*")) return StandardCharsets.UTF_8.name();
if (encoding.startsWith("US")) return StandardCharsets.US_ASCII.name();
if (encoding.startsWith("KOI")) return "KOI8-R";
// patch missing '-'
@ -385,7 +386,7 @@ public class htmlParser extends AbstractParser implements Parser {
try {
url = new AnchorURL(args[0]);
final byte[] content = url.get(ClientIdentification.yacyInternetCrawlerAgent, null, null);
final Document[] document = new htmlParser().parse(url, "text/html", "utf-8", new VocabularyScraper(), 0, new ByteArrayInputStream(content));
final Document[] document = new htmlParser().parse(url, "text/html", StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(content));
final String title = document[0].dc_title();
System.out.println(title);
} catch (final MalformedURLException e) {

@ -34,6 +34,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
@ -183,7 +184,7 @@ public class genericImageParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
languages,
keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords
@ -302,7 +303,7 @@ public class genericImageParser extends AbstractParser implements Parser {
AnchorURL uri;
try {
uri = new AnchorURL("http://localhost/" + image.getName());
final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), "UTF-8", new VocabularyScraper(), 0, new FileInputStream(image));
final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new FileInputStream(image));
System.out.println(document[0].toString());
} catch (final MalformedURLException e) {
e.printStackTrace();

@ -21,6 +21,7 @@ package net.yacy.document.parser.images;
import java.io.EOFException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import javax.xml.parsers.ParserConfigurationException;
@ -118,7 +119,7 @@ public class svgParser extends AbstractParser implements Parser {
Document[] docs = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
null,

@ -26,6 +26,7 @@ package net.yacy.document.parser;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@ -109,7 +110,7 @@ public class mmParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
null,

@ -29,6 +29,7 @@ package net.yacy.document.parser;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
@ -188,7 +189,7 @@ public class odtParser extends AbstractParser implements Parser {
docs = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
languages,
docKeywords,

@ -29,6 +29,7 @@ package net.yacy.document.parser;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
@ -172,7 +173,7 @@ public class ooxmlParser extends AbstractParser implements Parser {
docs = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
languages,
docKeywords,

@ -33,6 +33,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
@ -169,7 +170,7 @@ public class pdfParser extends AbstractParser implements Parser {
pdflinks = extractPdfLinks(pdfDoc);
// get the fulltext (either per document or for each page)
final PDFTextStripper stripper = new PDFTextStripper("UTF-8");
final PDFTextStripper stripper = new PDFTextStripper(StandardCharsets.UTF_8.name());
if (individualPages) {
// this is a hack which stores individual pages of the source pdf into individual index documents
@ -193,7 +194,7 @@ public class pdfParser extends AbstractParser implements Parser {
result[page] = new Document(
new AnchorURL(loc + (loc.indexOf('?') > 0 ? '&' : '?') + individualPagePropertyname + '=' + (page + 1)), // these are virtual new pages; we cannot combine them with '#' as that would be removed when computing the urlhash
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
docKeywords,
@ -243,7 +244,7 @@ public class pdfParser extends AbstractParser implements Parser {
result = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
docKeywords,

@ -29,6 +29,7 @@ package net.yacy.document.parser;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@ -104,7 +105,7 @@ public class pptParser extends AbstractParser implements Parser {
final Document[] docs = new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
keywlist,

@ -34,6 +34,7 @@ import java.io.FileReader;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import net.yacy.cora.document.id.AnchorURL;
@ -104,7 +105,7 @@ public class psParser extends AbstractParser implements Parser {
final Document[] docs = new Document[]{new Document(
location, // url
mimeType, // mime
"UTF-8", // charset
StandardCharsets.UTF_8.name(), // charset
this,
null, // languages
null, // keywords

@ -28,6 +28,7 @@
package net.yacy.document.parser;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import javax.swing.text.DefaultStyledDocument;
@ -73,7 +74,7 @@ public class rtfParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
null,

@ -26,7 +26,7 @@ package net.yacy.document.parser;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
@ -88,7 +88,7 @@ public class sidAudioParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
null,
@ -134,9 +134,9 @@ public class sidAudioParser extends AbstractParser implements Parser {
Map<String, String> ret = new HashMap<String, String>();
ret.put("name", new String(name, Charset.forName("ISO-8859-1")).trim());
ret.put("author", new String(author, Charset.forName("ISO-8859-1")).trim());
ret.put("publisher", new String(copyright, Charset.forName("ISO-8859-1")).trim());
ret.put("name", new String(name, StandardCharsets.ISO_8859_1).trim());
ret.put("author", new String(author, StandardCharsets.ISO_8859_1).trim());
ret.put("publisher", new String(copyright, StandardCharsets.ISO_8859_1).trim());
return ret;
}

@ -29,6 +29,7 @@ package net.yacy.document.parser;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@ -110,7 +111,7 @@ public class swfParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location, // url of the source document
mimeType, // the documents mime type
"UTF-8", // charset of the document text
StandardCharsets.UTF_8.name(), // charset of the document text
this,
null,
null, //keywords

@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.List;
import java.util.Map;
@ -126,7 +127,7 @@ public class torrentParser extends AbstractParser implements Parser {
try {
byte[] b = FileUtils.read(new File(args[0]));
torrentParser parser = new torrentParser();
Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, "UTF-8", new VocabularyScraper(), 0, new ByteArrayInputStream(b));
Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(b));
Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false, 0);
Map<String, Word> w = c.words();
for (Map.Entry<String, Word> e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText);

@ -28,6 +28,7 @@
package net.yacy.document.parser;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@ -112,7 +113,7 @@ public class vsdParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location, // url of the source document
mimeType, // the documents mime type
"UTF-8", // charset of the document text
StandardCharsets.UTF_8.name(), // charset of the document text
this,
null, // language
keywords,

@ -28,6 +28,7 @@
package net.yacy.document.parser;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import net.yacy.cora.document.id.AnchorURL;
@ -123,7 +124,7 @@ public class xlsParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
StandardCharsets.UTF_8.name(),
this,
null,
null,

@ -23,7 +23,7 @@ package net.yacy.http.servlets;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
@ -31,12 +31,6 @@ import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.request.SolrQueryRequest;
@ -48,10 +42,15 @@ import org.apache.solr.servlet.SolrRequestParsers;
import org.apache.solr.servlet.cache.Method;
import org.apache.solr.util.FastWriter;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
public class SolrServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
private static final Charset UTF8 = Charset.forName("UTF-8");
@Override
public void service(ServletRequest request, ServletResponse response) throws IOException, ServletException {
@ -89,8 +88,8 @@ public class SolrServlet extends HttpServlet {
binWriter.write(response.getOutputStream(), solrReq, solrRsp);
} else {
String charset = ContentStreamBase.getCharsetFromContentType(ct);
Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8"))
? new OutputStreamWriter(response.getOutputStream(), UTF8)
Writer out = (charset == null || charset.equalsIgnoreCase(StandardCharsets.UTF_8.name()))
? new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8)
: new OutputStreamWriter(response.getOutputStream(), charset);
out = new FastWriter(out);
responseWriter.write(out, solrReq, solrRsp);

@ -7,6 +7,7 @@ import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.regex.PatternSyntaxException;
import javax.servlet.Servlet;
@ -321,7 +322,7 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet {
if (b == -1) {
return null;
}
return buf.toString("UTF-8");
return buf.toString(StandardCharsets.UTF_8.name());
}
/**

@ -34,6 +34,7 @@ import java.lang.reflect.Array;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap;
import java.util.Enumeration;
import java.util.Iterator;
@ -478,7 +479,7 @@ public class YaCyDefaultServlet extends HttpServlet {
return;
}
byte[] data = dir.getBytes("UTF-8");
byte[] data = dir.getBytes(StandardCharsets.UTF_8);
response.setContentType(MimeTypes.Type.TEXT_HTML_UTF_8.asString());
response.setContentLength(data.length);
response.setHeader(HeaderFramework.CACHE_CONTROL, "no-cache, no-store");
@ -1093,7 +1094,7 @@ public class YaCyDefaultServlet extends HttpServlet {
// simple text
if (item.getContentType() == null || !item.getContentType().contains("charset")) {
// old yacy clients use their local default charset, on most systems UTF-8 (I hope ;)
args.add(item.getFieldName(), item.getString("UTF-8"));
args.add(item.getFieldName(), item.getString(StandardCharsets.UTF_8.name()));
} else {
// use default encoding (given as header or ISO-8859-1)
args.add(item.getFieldName(), item.getString());

@ -8,6 +8,7 @@ import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
@ -299,7 +300,7 @@ public class YaCyProxyServlet extends ProxyServlet implements Servlet {
if (b == -1) {
return null;
}
return buf.toString("UTF-8");
return buf.toString(StandardCharsets.UTF_8.name());
}
/**

@ -44,6 +44,7 @@ import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@ -430,7 +431,7 @@ public final class FileUtils {
PrintWriter pw = null;
final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000));
try {
pw = new PrintWriter(tf, "UTF-8");
pw = new PrintWriter(tf, StandardCharsets.UTF_8.name());
pw.println("# " + comment);
String key, value;
for ( final Map.Entry<String, String> entry : props.entrySet() ) {
@ -518,11 +519,7 @@ public final class FileUtils {
if ( a == null ) {
return new ArrayList<String>().iterator();
}
try {
return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8")));
} catch (final UnsupportedEncodingException e ) {
return null;
}
return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), StandardCharsets.UTF_8)));
}
/**
@ -536,7 +533,7 @@ public final class FileUtils {
final ArrayList<String> list = new ArrayList<String>();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), "UTF-8"));
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), StandardCharsets.UTF_8));
while ( (line = br.readLine()) != null ) {
list.add(line);

@ -32,6 +32,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
@ -240,7 +241,7 @@ public class AccessTracker {
raf.readFully(buffer); // we make a copy because that dramatically speeds up reading lines; RandomAccessFile.readLine is very slow
raf.close();
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
BufferedReader reader = new BufferedReader(new InputStreamReader(bais, "UTF-8"));
BufferedReader reader = new BufferedReader(new InputStreamReader(bais, StandardCharsets.UTF_8));
String line;
while ((line = reader.readLine()) != null) {
// parse the line

@ -23,6 +23,7 @@ package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
@ -209,7 +210,7 @@ public class QueryGoal {
String ret;
if (encodeHTML){
try {
ret = URLEncoder.encode(this.query_original, "UTF-8");
ret = URLEncoder.encode(this.query_original, StandardCharsets.UTF_8.name());
} catch (final UnsupportedEncodingException e) {
ret = this.query_original;
}

@ -30,6 +30,7 @@ import java.io.Serializable;
import java.lang.reflect.Array;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
@ -318,7 +319,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (allAttr || contains(CollectionSchema.linkscount_i)) add(doc, CollectionSchema.linkscount_i, md.llocal() + md.lother());
if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal());
if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, md.lother());
if (allAttr || contains(CollectionSchema.charset_s)) add(doc, CollectionSchema.charset_s, "UTF-8");
if (allAttr || contains(CollectionSchema.charset_s)) add(doc, CollectionSchema.charset_s, StandardCharsets.UTF_8.name());
// coordinates
if (md.lat() != 0.0 && md.lon() != 0.0) {

@ -36,7 +36,10 @@ package net.yacy.server.http;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.eclipse.jetty.http.HttpMethod;
/**
@ -292,16 +295,12 @@ public class ChunkedInputStream extends InputStream {
*
* @since 3.0
*/
private static String getAsciiString(final byte[] data) throws IOException {
private static String getAsciiString(final byte[] data) {
if (data == null) {
throw new IllegalArgumentException("Parameter may not be null");
}
try {
return new String(data, 0, data.length, "US-ASCII");
} catch (final UnsupportedEncodingException e) {
throw new IOException("HttpClient requires ASCII support");
}
return new String(data, 0, data.length, StandardCharsets.US_ASCII);
}
/**
@ -310,7 +309,7 @@ public class ChunkedInputStream extends InputStream {
*/
private void skipTrailerHeaders() throws IOException {
for (; ;) {
String line = readLine(this.in, "US-ASCII");
String line = readLine(this.in, StandardCharsets.US_ASCII);
if ((line == null) || (line.trim().length() < 1)) break;
}
}
@ -330,7 +329,7 @@ public class ChunkedInputStream extends InputStream {
*
* @since 3.0
*/
private static String readLine(InputStream inputStream, String charset) throws IOException {
private static String readLine(InputStream inputStream, Charset charset) throws IOException {
byte[] rawdata = readRawLine(inputStream);
if (rawdata == null) {
return null;
@ -354,9 +353,7 @@ public class ChunkedInputStream extends InputStream {
/**
* Converts the byte array of HTTP content characters to a string. If
* the specified charset is not supported, default system encoding
* is used.
* Converts the byte array of HTTP content characters to a string.
*
* @param data the byte array to be encoded
* @param offset the index of the first byte to encode
@ -370,22 +367,18 @@ public class ChunkedInputStream extends InputStream {
final byte[] data,
int offset,
int length,
String charset
Charset charset
) {
if (data == null) {
throw new IllegalArgumentException("Parameter may not be null");
}
if (charset == null || charset.isEmpty()) {
throw new IllegalArgumentException("charset may not be null or empty");
if (charset == null) {
throw new IllegalArgumentException("charset may not be null");
}
try {
return new String(data, offset, length, charset);
} catch (final UnsupportedEncodingException e) {
return new String(data, offset, length);
}
return new String(data, offset, length, charset);
}
/**

@ -55,6 +55,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -415,7 +416,7 @@ public final class TemplateEngine {
BufferedReader br = null;
try{
//br = new BufferedReader(new InputStreamReader(new FileInputStream( filename ))); //Simple Include
br = new BufferedReader( new InputStreamReader(new FileInputStream( HTTPDFileHandler.getLocalizedFile(UTF8.String(filename))),"UTF-8") ); //YaCy (with Locales)
br = new BufferedReader( new InputStreamReader(new FileInputStream( HTTPDFileHandler.getLocalizedFile(UTF8.String(filename))), StandardCharsets.UTF_8) ); //YaCy (with Locales)
//Read the Include
String line = "";
while ((line = br.readLine()) != null) {

@ -5,6 +5,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
import junit.framework.TestCase;
import net.yacy.cora.document.id.AnchorURL;
@ -24,15 +25,15 @@ public class htmlParserTest extends TestCase {
new String[]{null,null},
new String[]{"windows1250","windows-1250"},
new String[]{"windows_1250","windows-1250"},
new String[]{"ISO-8859-1","ISO-8859-1"},
new String[]{"ISO8859-1","ISO-8859-1"},
new String[]{"ISO-88591","ISO-8859-1"},
new String[]{"ISO88591","ISO-8859-1"},
new String[]{"iso_8859_1","ISO-8859-1"},
new String[]{"ISO-8859-1", StandardCharsets.ISO_8859_1.name()},
new String[]{"ISO8859-1", StandardCharsets.ISO_8859_1.name()},
new String[]{"ISO-88591", StandardCharsets.ISO_8859_1.name()},
new String[]{"ISO88591", StandardCharsets.ISO_8859_1.name()},
new String[]{"iso_8859_1", StandardCharsets.ISO_8859_1.name()},
new String[]{"cp-1252","windows-1252"},
new String[]{"gb_2312","gb2312"}, // was: x-EUC-CN
new String[]{"gb_2312-80","gb2312"}, // was: x-EUC-CN
new String[]{"UTF-8;","UTF-8"}
new String[]{"UTF-8;", StandardCharsets.UTF_8.name()}
};
for (int i=0; i < testStrings.length; i++) {
@ -93,7 +94,7 @@ public class htmlParserTest extends TestCase {
// test link with inline html in text
// expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt
final AnchorURL url = new AnchorURL("http://localhost/");
final String charset = "UTF-8";
final String charset = StandardCharsets.UTF_8.name();
final String testhtml = "<html><body>"
+ "<a href='x1.html'><span>testtext</span></a>" // "testtext"
+ "<a href=\"http://localhost/x2.html\"> <i id=\"home-icon\" class=\"img-sprite\"></i>Start</a>" // "Start"
@ -126,7 +127,7 @@ public class htmlParserTest extends TestCase {
@Test
public void testParseToScraper_TagTest() throws Exception {
final AnchorURL url = new AnchorURL("http://localhost/");
final String charset = "UTF-8";
final String charset = StandardCharsets.UTF_8.name();
final String textSource = "test text";
final String testhtml = "<html>"
+ "<head><style type=\"text/css\"> h1 { color: #ffffff; }</style></head>"

Loading…
Cancel
Save