diff --git a/source/de/anomic/data/URLFetcherStack.java b/source/de/anomic/data/URLFetcherStack.java index 28303531e..376b84f18 100644 --- a/source/de/anomic/data/URLFetcherStack.java +++ b/source/de/anomic/data/URLFetcherStack.java @@ -67,7 +67,7 @@ public class URLFetcherStack { public boolean push(final yacyURL url) { try { this.db.push(this.db.row().newEntry( - new byte[][] { url.toNormalform(true, true).getBytes() } + new byte[][] { url.toNormalform(true, true).getBytes("UTF-8") } )); this.pushed++; return true; diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index da22463c0..3b11ec711 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -158,7 +158,7 @@ public class blogBoard { final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try { final DocumentBuilder builder = factory.newDocumentBuilder(); - final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes())); + final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes("UTF-8"))); return parseXMLimport(doc); } catch (final ParserConfigurationException e) { } catch (final SAXException e) { diff --git a/source/de/anomic/http/httpTemplate.java b/source/de/anomic/http/httpTemplate.java index 4a9b9d589..8cd96b08a 100644 --- a/source/de/anomic/http/httpTemplate.java +++ b/source/de/anomic/http/httpTemplate.java @@ -506,10 +506,10 @@ public final class httpTemplate { public static void main(final String[] args) { // arg1 = test input; arg2 = replacement for pattern 'test'; arg3 = default replacement try { - final InputStream i = new ByteArrayInputStream(args[0].getBytes()); + final InputStream i = new ByteArrayInputStream(args[0].getBytes("UTF-8")); final HashMap h = new HashMap(); h.put("test", args[1]); - writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes()); + writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes("UTF-8")); System.out.flush(); } catch (final Exception e) { e.printStackTrace(); diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index 46ff20202..782fb4c17 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -475,7 +475,7 @@ public final class httpdFileHandler { // write the list to the client httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html", aBuffer.length(), new Date(dir.lastModified()), null, new httpResponseHeader(), null, null, true); if (!method.equals(httpHeader.METHOD_HEAD)) { - out.write(aBuffer.toString().getBytes()); + out.write(aBuffer.toString().getBytes("UTF-8")); } return; } diff --git a/source/de/anomic/index/indexURLReference.java b/source/de/anomic/index/indexURLReference.java index e9a048973..f01675e2f 100644 --- a/source/de/anomic/index/indexURLReference.java +++ b/source/de/anomic/index/indexURLReference.java @@ -26,6 +26,7 @@ package de.anomic.index; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.text.ParseException; import java.util.ArrayList; @@ -146,13 +147,21 @@ public class indexURLReference { encodeDate(col_mod, mod); encodeDate(col_load, load); encodeDate(col_fresh, fresh); - this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); + try { + this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); + } this.entry.setCol(col_md5, md5); this.entry.setCol(col_size, size); this.entry.setCol(col_wc, wc); this.entry.setCol(col_dt, new byte[]{(byte) dt}); this.entry.setCol(col_flags, flags.bytes()); - this.entry.setCol(col_lang, lang.getBytes()); + try { + this.entry.setCol(col_lang, lang.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + this.entry.setCol(col_lang, lang.getBytes()); + } this.entry.setCol(col_llocal, llocal); this.entry.setCol(col_lother, lother); this.entry.setCol(col_limage, limage); @@ -181,7 +190,11 @@ public class indexURLReference { s.append(dc_creator).append(10); s.append(dc_subject).append(10); s.append(ETag).append(10); - return s.toString().getBytes(); + try { + return s.toString().getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + return s.toString().getBytes(); + } } public indexURLReference(final kelondroRow.Entry entry, final indexRWIEntry searchedWord, final long ranking) { @@ -224,14 +237,22 @@ public class indexURLReference { } catch (final ParseException e) { encodeDate(col_fresh, new Date()); } - this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes()); + try { + this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes("UTF-8")); + } catch (UnsupportedEncodingException e1) { + this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes()); + } this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", ""))); this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0"))); this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0"))); this.entry.setCol(col_dt, new byte[]{(byte) prop.getProperty("dt", "t").charAt(0)}); final String flags = prop.getProperty("flags", "AAAAAA"); this.entry.setCol(col_flags, (flags.length() > 6) ? plasmaSearchQuery.empty_constraint.bytes() : (new kelondroBitfield(4, flags)).bytes()); - this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes()); + try { + this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes()); + } this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0"))); this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0"))); this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0"))); diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index 9c39e7aa6..524a1ce6a 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -222,7 +222,7 @@ public final class plasmaCondenser { indexWord wprop; sievedWordsEnum wordenum; try { - wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes())); + wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes("UTF-8"))); } catch (final UnsupportedEncodingException e) { return; } @@ -494,7 +494,7 @@ public final class plasmaCondenser { public static Enumeration wordTokenizer(final String s, final String charset) { try { - return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes())); + return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes("UTF-8"))); } catch (final Exception e) { return null; } @@ -706,17 +706,15 @@ public final class plasmaCondenser { return s; } - public static Map getWords(final byte[] text, final String charset) throws UnsupportedEncodingException { - // returns a word/indexWord relation map - if (text == null) return null; - final ByteArrayInputStream buffer = new ByteArrayInputStream(text); - return new plasmaCondenser(buffer, "UTF-8", 2, 1).words(); - } - public static Map getWords(final String text) { // returns a word/indexWord relation map if (text == null) return null; - final ByteArrayInputStream buffer = new ByteArrayInputStream(text.getBytes()); + ByteArrayInputStream buffer; + try { + buffer = new ByteArrayInputStream(text.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e1) { + buffer = new ByteArrayInputStream(text.getBytes()); + } try { return new plasmaCondenser(buffer, "UTF-8", 2, 1).words(); } catch (final UnsupportedEncodingException e) { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 314ebed24..49c168aa6 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -225,7 +225,7 @@ public final class plasmaHTCache { public static void storeFile(yacyURL url, byte[] file) { try { - fileDB.put(url.hash().getBytes(), file); + fileDB.put(url.hash().getBytes("UTF-8"), file); } catch (IOException e) { e.printStackTrace(); } @@ -271,7 +271,7 @@ public final class plasmaHTCache { public static byte[] getResourceContent(final yacyURL url) { // load the url as resource from the cache try { - return fileDB.get(url.hash().getBytes()); + return fileDB.get(url.hash().getBytes("UTF-8")); } catch (IOException e) { e.printStackTrace(); return null; @@ -281,7 +281,7 @@ public final class plasmaHTCache { public static long getResourceContentLength(final yacyURL url) { // load the url as resource from the cache try { - return fileDB.length(url.hash().getBytes()); + return fileDB.length(url.hash().getBytes("UTF-8")); } catch (IOException e) { e.printStackTrace(); return -1; @@ -290,6 +290,6 @@ public final class plasmaHTCache { public static void deleteFromCache(yacyURL url) throws IOException { responseHeaderDB.remove(url.hash()); - fileDB.remove(url.hash().getBytes()); + fileDB.remove(url.hash().getBytes("UTF-8")); } } diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index d46efb4df..e937c4295 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -768,20 +768,16 @@ public final class plasmaSearchEvent { final String filename = urlcomps.url().getFile(); String address = null; if ((seed == null) || ((address = seed.getPublicAddress()) == null)) { - // seed is not known from here - try { - wordIndex.removeWordReferences( - plasmaCondenser.getWords( - ("yacyshare " + - filename.replace('?', ' ') + - " " + - urlcomps.dc_title()).getBytes(), "UTF-8").keySet(), - urlentry.hash()); - wordIndex.removeURL(urlentry.hash()); // clean up - throw new RuntimeException("index void"); - } catch (final UnsupportedEncodingException e) { - throw new RuntimeException("parser failed: " + e.getMessage()); - } + // seed is not known from here + wordIndex.removeWordReferences( + plasmaCondenser.getWords( + ("yacyshare " + + filename.replace('?', ' ') + + " " + + urlcomps.dc_title())).keySet(), + urlentry.hash()); + wordIndex.removeURL(urlentry.hash()); // clean up + throw new RuntimeException("index void"); } alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename; alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename; diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 862684830..5e9e2676f 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -623,7 +623,12 @@ public final class yacyClient { synchronized (abstractCache) { singleAbstract = abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes if (singleAbstract == null) singleAbstract = new TreeMap(); - ci = new serverByteBuffer(entry.getValue().getBytes()); + try { + ci = new serverByteBuffer(entry.getValue().getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + return null; + } //System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); indexContainer.decompressIndex(singleAbstract, ci, target.hash); abstractCache.put(wordhash, singleAbstract);