* added utf8-encoding to many getBytes-calls

* utf8 should work now


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5323 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 17 years ago
parent fad044fb54
commit 7e1fe05e3c

@ -67,7 +67,7 @@ public class URLFetcherStack {
public boolean push(final yacyURL url) {
try {
this.db.push(this.db.row().newEntry(
new byte[][] { url.toNormalform(true, true).getBytes() }
new byte[][] { url.toNormalform(true, true).getBytes("UTF-8") }
));
this.pushed++;
return true;

@ -158,7 +158,7 @@ public class blogBoard {
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
final DocumentBuilder builder = factory.newDocumentBuilder();
final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes()));
final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes("UTF-8")));
return parseXMLimport(doc);
} catch (final ParserConfigurationException e) {
} catch (final SAXException e) {

@ -506,10 +506,10 @@ public final class httpTemplate {
public static void main(final String[] args) {
// arg1 = test input; arg2 = replacement for pattern 'test'; arg3 = default replacement
try {
final InputStream i = new ByteArrayInputStream(args[0].getBytes());
final InputStream i = new ByteArrayInputStream(args[0].getBytes("UTF-8"));
final HashMap<String, String> h = new HashMap<String, String>();
h.put("test", args[1]);
writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes());
writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes("UTF-8"));
System.out.flush();
} catch (final Exception e) {
e.printStackTrace();

@ -475,7 +475,7 @@ public final class httpdFileHandler {
// write the list to the client
httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html", aBuffer.length(), new Date(dir.lastModified()), null, new httpResponseHeader(), null, null, true);
if (!method.equals(httpHeader.METHOD_HEAD)) {
out.write(aBuffer.toString().getBytes());
out.write(aBuffer.toString().getBytes("UTF-8"));
}
return;
}

@ -26,6 +26,7 @@
package de.anomic.index;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
@ -146,13 +147,21 @@ public class indexURLReference {
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes());
try {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes());
}
this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, lang.getBytes());
try {
this.entry.setCol(col_lang, lang.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, lang.getBytes());
}
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage);
@ -181,7 +190,11 @@ public class indexURLReference {
s.append(dc_creator).append(10);
s.append(dc_subject).append(10);
s.append(ETag).append(10);
return s.toString().getBytes();
try {
return s.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return s.toString().getBytes();
}
}
public indexURLReference(final kelondroRow.Entry entry, final indexRWIEntry searchedWord, final long ranking) {
@ -224,14 +237,22 @@ public class indexURLReference {
} catch (final ParseException e) {
encodeDate(col_fresh, new Date());
}
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes());
try {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes());
}
this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
this.entry.setCol(col_dt, new byte[]{(byte) prop.getProperty("dt", "t").charAt(0)});
final String flags = prop.getProperty("flags", "AAAAAA");
this.entry.setCol(col_flags, (flags.length() > 6) ? plasmaSearchQuery.empty_constraint.bytes() : (new kelondroBitfield(4, flags)).bytes());
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes());
try {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes());
}
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));

@ -222,7 +222,7 @@ public final class plasmaCondenser {
indexWord wprop;
sievedWordsEnum wordenum;
try {
wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes()));
wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes("UTF-8")));
} catch (final UnsupportedEncodingException e) {
return;
}
@ -494,7 +494,7 @@ public final class plasmaCondenser {
public static Enumeration<StringBuffer> wordTokenizer(final String s, final String charset) {
try {
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()));
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes("UTF-8")));
} catch (final Exception e) {
return null;
}
@ -706,17 +706,15 @@ public final class plasmaCondenser {
return s;
}
public static Map<String, indexWord> getWords(final byte[] text, final String charset) throws UnsupportedEncodingException {
// returns a word/indexWord relation map
if (text == null) return null;
final ByteArrayInputStream buffer = new ByteArrayInputStream(text);
return new plasmaCondenser(buffer, "UTF-8", 2, 1).words();
}
public static Map<String, indexWord> getWords(final String text) {
// returns a word/indexWord relation map
if (text == null) return null;
final ByteArrayInputStream buffer = new ByteArrayInputStream(text.getBytes());
ByteArrayInputStream buffer;
try {
buffer = new ByteArrayInputStream(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
buffer = new ByteArrayInputStream(text.getBytes());
}
try {
return new plasmaCondenser(buffer, "UTF-8", 2, 1).words();
} catch (final UnsupportedEncodingException e) {

@ -225,7 +225,7 @@ public final class plasmaHTCache {
public static void storeFile(yacyURL url, byte[] file) {
try {
fileDB.put(url.hash().getBytes(), file);
fileDB.put(url.hash().getBytes("UTF-8"), file);
} catch (IOException e) {
e.printStackTrace();
}
@ -271,7 +271,7 @@ public final class plasmaHTCache {
public static byte[] getResourceContent(final yacyURL url) {
// load the url as resource from the cache
try {
return fileDB.get(url.hash().getBytes());
return fileDB.get(url.hash().getBytes("UTF-8"));
} catch (IOException e) {
e.printStackTrace();
return null;
@ -281,7 +281,7 @@ public final class plasmaHTCache {
public static long getResourceContentLength(final yacyURL url) {
// load the url as resource from the cache
try {
return fileDB.length(url.hash().getBytes());
return fileDB.length(url.hash().getBytes("UTF-8"));
} catch (IOException e) {
e.printStackTrace();
return -1;
@ -290,6 +290,6 @@ public final class plasmaHTCache {
public static void deleteFromCache(yacyURL url) throws IOException {
responseHeaderDB.remove(url.hash());
fileDB.remove(url.hash().getBytes());
fileDB.remove(url.hash().getBytes("UTF-8"));
}
}

@ -768,20 +768,16 @@ public final class plasmaSearchEvent {
final String filename = urlcomps.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
// seed is not known from here
try {
wordIndex.removeWordReferences(
plasmaCondenser.getWords(
("yacyshare " +
filename.replace('?', ' ') +
" " +
urlcomps.dc_title()).getBytes(), "UTF-8").keySet(),
urlentry.hash());
wordIndex.removeURL(urlentry.hash()); // clean up
throw new RuntimeException("index void");
} catch (final UnsupportedEncodingException e) {
throw new RuntimeException("parser failed: " + e.getMessage());
}
// seed is not known from here
wordIndex.removeWordReferences(
plasmaCondenser.getWords(
("yacyshare " +
filename.replace('?', ' ') +
" " +
urlcomps.dc_title())).keySet(),
urlentry.hash());
wordIndex.removeURL(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;

@ -623,7 +623,12 @@ public final class yacyClient {
synchronized (abstractCache) {
singleAbstract = abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap<String, String>();
ci = new serverByteBuffer(entry.getValue().getBytes());
try {
ci = new serverByteBuffer(entry.getValue().getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return null;
}
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexContainer.decompressIndex(singleAbstract, ci, target.hash);
abstractCache.put(wordhash, singleAbstract);

Loading…
Cancel
Save