* added utf8-encoding to many getBytes-calls

* utf8 should work now


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5323 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 17 years ago
parent fad044fb54
commit 7e1fe05e3c

@ -67,7 +67,7 @@ public class URLFetcherStack {
public boolean push(final yacyURL url) { public boolean push(final yacyURL url) {
try { try {
this.db.push(this.db.row().newEntry( this.db.push(this.db.row().newEntry(
new byte[][] { url.toNormalform(true, true).getBytes() } new byte[][] { url.toNormalform(true, true).getBytes("UTF-8") }
)); ));
this.pushed++; this.pushed++;
return true; return true;

@ -158,7 +158,7 @@ public class blogBoard {
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try { try {
final DocumentBuilder builder = factory.newDocumentBuilder(); final DocumentBuilder builder = factory.newDocumentBuilder();
final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes())); final Document doc = builder.parse(new ByteArrayInputStream(input.getBytes("UTF-8")));
return parseXMLimport(doc); return parseXMLimport(doc);
} catch (final ParserConfigurationException e) { } catch (final ParserConfigurationException e) {
} catch (final SAXException e) { } catch (final SAXException e) {

@ -506,10 +506,10 @@ public final class httpTemplate {
public static void main(final String[] args) { public static void main(final String[] args) {
// arg1 = test input; arg2 = replacement for pattern 'test'; arg3 = default replacement // arg1 = test input; arg2 = replacement for pattern 'test'; arg3 = default replacement
try { try {
final InputStream i = new ByteArrayInputStream(args[0].getBytes()); final InputStream i = new ByteArrayInputStream(args[0].getBytes("UTF-8"));
final HashMap<String, String> h = new HashMap<String, String>(); final HashMap<String, String> h = new HashMap<String, String>();
h.put("test", args[1]); h.put("test", args[1]);
writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes()); writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes("UTF-8"));
System.out.flush(); System.out.flush();
} catch (final Exception e) { } catch (final Exception e) {
e.printStackTrace(); e.printStackTrace();

@ -475,7 +475,7 @@ public final class httpdFileHandler {
// write the list to the client // write the list to the client
httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html", aBuffer.length(), new Date(dir.lastModified()), null, new httpResponseHeader(), null, null, true); httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html", aBuffer.length(), new Date(dir.lastModified()), null, new httpResponseHeader(), null, null, true);
if (!method.equals(httpHeader.METHOD_HEAD)) { if (!method.equals(httpHeader.METHOD_HEAD)) {
out.write(aBuffer.toString().getBytes()); out.write(aBuffer.toString().getBytes("UTF-8"));
} }
return; return;
} }

@ -26,6 +26,7 @@
package de.anomic.index; package de.anomic.index;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
@ -146,13 +147,21 @@ public class indexURLReference {
encodeDate(col_mod, mod); encodeDate(col_mod, mod);
encodeDate(col_load, load); encodeDate(col_load, load);
encodeDate(col_fresh, fresh); encodeDate(col_fresh, fresh);
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); try {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes());
}
this.entry.setCol(col_md5, md5); this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size); this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc); this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt}); this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes()); this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, lang.getBytes()); try {
this.entry.setCol(col_lang, lang.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, lang.getBytes());
}
this.entry.setCol(col_llocal, llocal); this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother); this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage); this.entry.setCol(col_limage, limage);
@ -181,7 +190,11 @@ public class indexURLReference {
s.append(dc_creator).append(10); s.append(dc_creator).append(10);
s.append(dc_subject).append(10); s.append(dc_subject).append(10);
s.append(ETag).append(10); s.append(ETag).append(10);
return s.toString().getBytes(); try {
return s.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return s.toString().getBytes();
}
} }
public indexURLReference(final kelondroRow.Entry entry, final indexRWIEntry searchedWord, final long ranking) { public indexURLReference(final kelondroRow.Entry entry, final indexRWIEntry searchedWord, final long ranking) {
@ -224,14 +237,22 @@ public class indexURLReference {
} catch (final ParseException e) { } catch (final ParseException e) {
encodeDate(col_fresh, new Date()); encodeDate(col_fresh, new Date());
} }
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes()); try {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes());
}
this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", ""))); this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0"))); this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0"))); this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
this.entry.setCol(col_dt, new byte[]{(byte) prop.getProperty("dt", "t").charAt(0)}); this.entry.setCol(col_dt, new byte[]{(byte) prop.getProperty("dt", "t").charAt(0)});
final String flags = prop.getProperty("flags", "AAAAAA"); final String flags = prop.getProperty("flags", "AAAAAA");
this.entry.setCol(col_flags, (flags.length() > 6) ? plasmaSearchQuery.empty_constraint.bytes() : (new kelondroBitfield(4, flags)).bytes()); this.entry.setCol(col_flags, (flags.length() > 6) ? plasmaSearchQuery.empty_constraint.bytes() : (new kelondroBitfield(4, flags)).bytes());
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes()); try {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes());
}
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0"))); this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0"))); this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0"))); this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));

@ -222,7 +222,7 @@ public final class plasmaCondenser {
indexWord wprop; indexWord wprop;
sievedWordsEnum wordenum; sievedWordsEnum wordenum;
try { try {
wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes())); wordenum = new sievedWordsEnum(new ByteArrayInputStream(text.getBytes("UTF-8")));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
return; return;
} }
@ -494,7 +494,7 @@ public final class plasmaCondenser {
public static Enumeration<StringBuffer> wordTokenizer(final String s, final String charset) { public static Enumeration<StringBuffer> wordTokenizer(final String s, final String charset) {
try { try {
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes())); return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes("UTF-8")));
} catch (final Exception e) { } catch (final Exception e) {
return null; return null;
} }
@ -706,17 +706,15 @@ public final class plasmaCondenser {
return s; return s;
} }
public static Map<String, indexWord> getWords(final byte[] text, final String charset) throws UnsupportedEncodingException {
// returns a word/indexWord relation map
if (text == null) return null;
final ByteArrayInputStream buffer = new ByteArrayInputStream(text);
return new plasmaCondenser(buffer, "UTF-8", 2, 1).words();
}
public static Map<String, indexWord> getWords(final String text) { public static Map<String, indexWord> getWords(final String text) {
// returns a word/indexWord relation map // returns a word/indexWord relation map
if (text == null) return null; if (text == null) return null;
final ByteArrayInputStream buffer = new ByteArrayInputStream(text.getBytes()); ByteArrayInputStream buffer;
try {
buffer = new ByteArrayInputStream(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
buffer = new ByteArrayInputStream(text.getBytes());
}
try { try {
return new plasmaCondenser(buffer, "UTF-8", 2, 1).words(); return new plasmaCondenser(buffer, "UTF-8", 2, 1).words();
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {

@ -225,7 +225,7 @@ public final class plasmaHTCache {
public static void storeFile(yacyURL url, byte[] file) { public static void storeFile(yacyURL url, byte[] file) {
try { try {
fileDB.put(url.hash().getBytes(), file); fileDB.put(url.hash().getBytes("UTF-8"), file);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -271,7 +271,7 @@ public final class plasmaHTCache {
public static byte[] getResourceContent(final yacyURL url) { public static byte[] getResourceContent(final yacyURL url) {
// load the url as resource from the cache // load the url as resource from the cache
try { try {
return fileDB.get(url.hash().getBytes()); return fileDB.get(url.hash().getBytes("UTF-8"));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
@ -281,7 +281,7 @@ public final class plasmaHTCache {
public static long getResourceContentLength(final yacyURL url) { public static long getResourceContentLength(final yacyURL url) {
// load the url as resource from the cache // load the url as resource from the cache
try { try {
return fileDB.length(url.hash().getBytes()); return fileDB.length(url.hash().getBytes("UTF-8"));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
return -1; return -1;
@ -290,6 +290,6 @@ public final class plasmaHTCache {
public static void deleteFromCache(yacyURL url) throws IOException { public static void deleteFromCache(yacyURL url) throws IOException {
responseHeaderDB.remove(url.hash()); responseHeaderDB.remove(url.hash());
fileDB.remove(url.hash().getBytes()); fileDB.remove(url.hash().getBytes("UTF-8"));
} }
} }

@ -768,20 +768,16 @@ public final class plasmaSearchEvent {
final String filename = urlcomps.url().getFile(); final String filename = urlcomps.url().getFile();
String address = null; String address = null;
if ((seed == null) || ((address = seed.getPublicAddress()) == null)) { if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
// seed is not known from here // seed is not known from here
try { wordIndex.removeWordReferences(
wordIndex.removeWordReferences( plasmaCondenser.getWords(
plasmaCondenser.getWords( ("yacyshare " +
("yacyshare " + filename.replace('?', ' ') +
filename.replace('?', ' ') + " " +
" " + urlcomps.dc_title())).keySet(),
urlcomps.dc_title()).getBytes(), "UTF-8").keySet(), urlentry.hash());
urlentry.hash()); wordIndex.removeURL(urlentry.hash()); // clean up
wordIndex.removeURL(urlentry.hash()); // clean up throw new RuntimeException("index void");
throw new RuntimeException("index void");
} catch (final UnsupportedEncodingException e) {
throw new RuntimeException("parser failed: " + e.getMessage());
}
} }
alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename; alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename; alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;

@ -623,7 +623,12 @@ public final class yacyClient {
synchronized (abstractCache) { synchronized (abstractCache) {
singleAbstract = abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes singleAbstract = abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap<String, String>(); if (singleAbstract == null) singleAbstract = new TreeMap<String, String>();
ci = new serverByteBuffer(entry.getValue().getBytes()); try {
ci = new serverByteBuffer(entry.getValue().getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return null;
}
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); //System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexContainer.decompressIndex(singleAbstract, ci, target.hash); indexContainer.decompressIndex(singleAbstract, ci, target.hash);
abstractCache.put(wordhash, singleAbstract); abstractCache.put(wordhash, singleAbstract);

Loading…
Cancel
Save