From 30aed9824a8b363454b5b85981b305d6a08d2980 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 10 Mar 2011 12:35:32 +0000 Subject: [PATCH] moved getBytes() to UTF8.getBytes() to use a default String encoding git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7580 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControlRWIs_p.java | 2 +- htroot/Supporter.java | 8 ++-- htroot/Surftips.java | 8 ++-- source/de/anomic/crawler/ZURL.java | 4 +- .../de/anomic/crawler/retrieval/Request.java | 6 +-- source/de/anomic/search/DocumentIndex.java | 3 +- .../de/anomic/search/MetadataRepository.java | 14 +------ source/de/anomic/search/ReferenceOrder.java | 11 ++---- source/de/anomic/search/SearchEvent.java | 2 +- source/de/anomic/search/Segments.java | 1 + source/de/anomic/yacy/yacyNewsDB.java | 8 ++-- source/de/anomic/yacy/yacyNewsQueue.java | 2 +- source/net/yacy/cora/document/UTF8.java | 5 +++ source/net/yacy/dbtest.java | 2 +- .../document/parser/html/ContentScraper.java | 7 ---- source/net/yacy/kelondro/blob/Compressor.java | 2 +- .../net/yacy/kelondro/blob/MapDataMining.java | 2 +- source/net/yacy/kelondro/blob/MapHeap.java | 4 +- .../net/yacy/kelondro/blob/ObjectBuffer.java | 9 +++-- source/net/yacy/kelondro/blob/Tables.java | 16 ++++---- .../data/citation/CitationReferenceRow.java | 5 ++- .../data/image/ImageReferenceRow.java | 5 ++- .../yacy/kelondro/data/meta/DigestURI.java | 2 +- .../kelondro/data/meta/URIMetadataRow.java | 37 ++++--------------- .../navigation/NavigationReferenceRow.java | 8 ++-- .../kelondro/data/word/WordReference.java | 2 +- .../kelondro/data/word/WordReferenceRow.java | 14 +++---- .../kelondro/data/word/WordReferenceVars.java | 10 +++-- source/net/yacy/kelondro/index/IndexTest.java | 2 +- source/net/yacy/kelondro/index/Row.java | 30 +++------------ .../yacy/kelondro/index/RowCollection.java | 4 +- source/net/yacy/kelondro/index/RowSet.java | 5 ++- .../net/yacy/kelondro/io/AbstractWriter.java | 3 +- source/net/yacy/kelondro/io/CharBuffer.java | 13 ++----- source/net/yacy/kelondro/order/Digest.java | 8 +--- .../net/yacy/kelondro/order/StringOrder.java | 4 +- source/net/yacy/kelondro/table/Relations.java | 4 +- source/net/yacy/kelondro/table/SQLTable.java | 4 +- source/net/yacy/kelondro/util/BDecoder.java | 10 ++--- source/net/yacy/kelondro/util/ByteBuffer.java | 15 +++----- source/net/yacy/kelondro/util/OS.java | 3 +- .../net/yacy/repository/LoaderDispatcher.java | 5 ++- 42 files changed, 125 insertions(+), 184 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 86bf79944..f69970cbc 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -535,7 +535,7 @@ public class IndexControlRWIs_p { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { final QueryParams query = new QueryParams(UTF8.String(keyhash), -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p"); - final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); + final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang)); final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE); ranked.run(); diff --git a/htroot/Supporter.java b/htroot/Supporter.java index 88ffff12b..58c9fbe37 100644 --- a/htroot/Supporter.java +++ b/htroot/Supporter.java @@ -128,14 +128,14 @@ public class Supporter { row = Supporter.get(urlhash); if (row == null) continue; - url = row.getColString(0, null); + url = row.getColString(0); try { if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; } catch(final MalformedURLException e) {continue;} - title = row.getColString(1,"UTF-8"); - description = row.getColString(2,"UTF-8"); + title = row.getColString(1); + description = row.getColString(2); if ((url == null) || (title == null) || (description == null)) continue; - refid = row.getColString(3, null); + refid = row.getColString(3); voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) || (sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null); prop.put("supporter_results_" + i + "_authorized", authenticated ? "1" : "0"); diff --git a/htroot/Surftips.java b/htroot/Surftips.java index a0620e402..80b96c7cb 100644 --- a/htroot/Surftips.java +++ b/htroot/Surftips.java @@ -136,15 +136,15 @@ public class Surftips { row = surftips.get(urlhash); if (row == null) continue; - url = row.getColString(0, null); + url = row.getColString(0); try{ if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url))) continue; }catch(final MalformedURLException e){continue;}; - title = row.getColString(1,"UTF-8"); - description = row.getColString(2,"UTF-8"); + title = row.getColString(1); + description = row.getColString(2); if ((url == null) || (title == null) || (description == null)) continue; - refid = row.getColString(3, null); + refid = row.getColString(3); voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) || (sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null); prop.put("surftips_results_" + i + "_authorized", (authenticated) ? "1" : "0"); diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java index 6576144d7..b5943a3d7 100755 --- a/source/de/anomic/crawler/ZURL.java +++ b/source/de/anomic/crawler/ZURL.java @@ -251,7 +251,7 @@ public class ZURL implements Iterable { this.executor = entry.getColBytes(1, true); this.workdate = new Date(entry.getColLong(2)); this.workcount = (int) entry.getColLong(3); - this.anycause = entry.getColString(4, "UTF-8"); + this.anycause = entry.getColString(4); this.bentry = new Request(Request.rowdef.newEntry(entry.getColBytes(5, false))); assert (Base64Order.enhancedCoder.equal(entry.getPrimaryKeyBytes(), bentry.url().hash())); this.stored = true; @@ -310,7 +310,7 @@ public class ZURL implements Iterable { try { return new Entry(e); } catch (final IOException ex) { - throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0, null)); + throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0)); } } diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java index 1bded5807..ab024e4f3 100755 --- a/source/de/anomic/crawler/retrieval/Request.java +++ b/source/de/anomic/crawler/retrieval/Request.java @@ -136,15 +136,15 @@ public class Request extends WorkflowJob { } private void insertEntry(final Row.Entry entry) throws IOException { - final String urlstring = entry.getColString(2, null); + final String urlstring = entry.getColString(2); if (urlstring == null) throw new IOException ("url string is null"); this.initiator = entry.getColBytes(1, true); this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator); this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes()); this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true); - this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); + this.name = (entry.empty(4)) ? "" : entry.getColString(4).trim(); this.appdate = entry.getColLong(5); - this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); + this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6).trim(); this.depth = (int) entry.getColLong(7); this.anchors = (int) entry.getColLong(8); this.forkfactor = (int) entry.getColLong(9); diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java index df3b4eae5..7394e2c85 100644 --- a/source/de/anomic/search/DocumentIndex.java +++ b/source/de/anomic/search/DocumentIndex.java @@ -36,6 +36,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; +import net.yacy.cora.document.UTF8; import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.document.LibraryProvider; @@ -193,7 +194,7 @@ public class DocumentIndex extends Segment { public ArrayList find(String querystring, int count) { // make a query and start a search QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault, "DocumentIndex"); - ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); + ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang)); RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation); rankedCache.start(); diff --git a/source/de/anomic/search/MetadataRepository.java b/source/de/anomic/search/MetadataRepository.java index 91ccd87d4..1ce97788d 100644 --- a/source/de/anomic/search/MetadataRepository.java +++ b/source/de/anomic/search/MetadataRepository.java @@ -283,7 +283,7 @@ public final class MetadataRepository implements Iterable { final Row.Entry entry = urlIndexFile.get(urlHashBytes); // getting the wrong url string - oldUrlStr = entry.getColString(1, null).trim(); + oldUrlStr = entry.getColString(1).trim(); int pos = -1; if ((pos = oldUrlStr.indexOf("://")) != -1) { @@ -291,18 +291,6 @@ public final class MetadataRepository implements Iterable { final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3); final DigestURI newUrl = new DigestURI(newUrlStr); - // doing a http head request to test if the url is correct -// final Client client = new Client(10000); -// ResponseContainer res = null; -// try { -// res = client.HEAD(newUrl.toString()); -// } finally { -// if(res != null) { -// // release connection -// res.closeStream(); -// } -// } - if (client.HEADResponse(newUrl.toString()) != null && client.getHttpResponse().getStatusLine().getStatusCode() == 200) { entry.setCol(1, newUrl.toString().getBytes()); diff --git a/source/de/anomic/search/ReferenceOrder.java b/source/de/anomic/search/ReferenceOrder.java index 7adb3b6e2..2bb71ac88 100644 --- a/source/de/anomic/search/ReferenceOrder.java +++ b/source/de/anomic/search/ReferenceOrder.java @@ -45,6 +45,7 @@ import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.rwi.ReferenceContainer; +import net.yacy.kelondro.util.ByteBuffer; public class ReferenceOrder { @@ -55,9 +56,9 @@ public class ReferenceOrder { private WordReferenceVars min, max; private final DynamicScore doms; // collected for "authority" heuristic private final RankingProfile ranking; - private final String language; + private final byte[] language; - public ReferenceOrder(final RankingProfile profile, String language) { + public ReferenceOrder(final RankingProfile profile, byte[] language) { this.min = null; this.max = null; this.ranking = profile; @@ -234,7 +235,7 @@ public class ReferenceOrder { + ((flags.get(Condenser.flag_cat_hasaudio)) ? 255 << ranking.coeff_cathasaudio : 0) + ((flags.get(Condenser.flag_cat_hasvideo)) ? 255 << ranking.coeff_cathasvideo : 0) + ((flags.get(Condenser.flag_cat_hasapp)) ? 255 << ranking.coeff_cathasapp : 0) - + ((patchUK(t.language).equals(this.language)) ? 255 << ranking.coeff_language : 0) + + ((ByteBuffer.equals(t.language, this.language)) ? 255 << ranking.coeff_language : 0) + ((DigestURI.probablyRootURL(t.metadataHash())) ? 15 << ranking.coeff_urllength : 0); //if (searchWords != null) r += (yacyURL.probablyWordURL(t.urlHash(), searchWords) != null) ? 256 << ranking.coeff_appurl : 0; @@ -242,8 +243,4 @@ public class ReferenceOrder { return r; // the higher the number the better the ranking. } - private static final String patchUK(String l) { - // this is to patch a bad language name setting that was used in 0.60 and before - if (l == null || l.equals("uk")) return "en"; else return l; - } } diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 1a83903bc..d94978818 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -108,7 +108,7 @@ public final class SearchEvent { this.IAmaxcounthash = null; this.IAneardhthash = null; this.localSearchThread = null; - this.order = new ReferenceOrder(query.ranking, query.targetlang); + this.order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang)); boolean remote = (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || (query.domType == QueryParams.SEARCHDOM_CLUSTERALL); if (remote && peers.sizeConnected() == 0) remote = false; final long start = System.currentTimeMillis(); diff --git a/source/de/anomic/search/Segments.java b/source/de/anomic/search/Segments.java index c44cc8862..f80b25ae1 100644 --- a/source/de/anomic/search/Segments.java +++ b/source/de/anomic/search/Segments.java @@ -129,6 +129,7 @@ public class Segments implements Iterable { } public Segment segment(final String segmentName) { + if (segments == null) return null; Segment segment = segments.get(segmentName); if (segment == null) { // generate the segment diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index 0590519c0..5458267da 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -164,11 +164,11 @@ public class yacyNewsDB { private Record b2r(final Row.Entry b) { if (b == null) return null; return new yacyNewsDB.Record( - b.getColString(0, null), - b.getColString(1, "UTF-8"), - (b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColString(2, null), GenericFormatter.UTCDiffString()), + b.getColString(0), + b.getColString(1), + (b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColString(2), GenericFormatter.UTCDiffString()), (int) b.getColLong(3), - MapTools.string2map(b.getColString(4, "UTF-8"), ",") + MapTools.string2map(b.getColString(4), ",") ); } diff --git a/source/de/anomic/yacy/yacyNewsQueue.java b/source/de/anomic/yacy/yacyNewsQueue.java index 06e44cb22..5b0e02efc 100644 --- a/source/de/anomic/yacy/yacyNewsQueue.java +++ b/source/de/anomic/yacy/yacyNewsQueue.java @@ -158,7 +158,7 @@ public class yacyNewsQueue { yacyNewsDB.Record b2r(final Row.Entry b) throws IOException { if (b == null) return null; - final String id = b.getColString(0, null); + final String id = b.getColString(0); //Date touched = yacyCore.parseUniversalDate(UTF8.String(b[1])); return newsDB.get(id); } diff --git a/source/net/yacy/cora/document/UTF8.java b/source/net/yacy/cora/document/UTF8.java index 1958ff5e0..7d3690d73 100644 --- a/source/net/yacy/cora/document/UTF8.java +++ b/source/net/yacy/cora/document/UTF8.java @@ -71,4 +71,9 @@ public class UTF8 { return new String(bytes, offset, length, charset); } + public final static byte[] getBytes(final String s) { + if (s == null) return null; + return s.getBytes(charset); + } + } diff --git a/source/net/yacy/dbtest.java b/source/net/yacy/dbtest.java index 41831d4f0..7683f34a9 100644 --- a/source/net/yacy/dbtest.java +++ b/source/net/yacy/dbtest.java @@ -400,7 +400,7 @@ public class dbtest { Row.Entry row; while (i.hasNext()) { row = i.next(); - for (int j = 0; j < row.columns(); j++) System.out.print(row.getColString(j, null) + ","); + for (int j = 0; j < row.columns(); j++) System.out.print(row.getColString(j) + ","); System.out.println(); } } diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 6f391abae..ea4543221 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -28,7 +28,6 @@ import java.io.ByteArrayInputStream; import java.io.CharArrayReader; import java.io.File; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.MalformedURLException; import java.nio.charset.Charset; @@ -346,13 +345,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { } public byte[] getText() { - return this.getText("UTF-8"); - } - - public byte[] getText(final String charSet) { try { - return content.getBytes(charSet); - } catch (final UnsupportedEncodingException e) { return content.getBytes(); } catch (final OutOfMemoryError e) { Log.logException(e); diff --git a/source/net/yacy/kelondro/blob/Compressor.java b/source/net/yacy/kelondro/blob/Compressor.java index 01956728c..d305a3889 100644 --- a/source/net/yacy/kelondro/blob/Compressor.java +++ b/source/net/yacy/kelondro/blob/Compressor.java @@ -112,7 +112,7 @@ public class Compressor implements BLOB { try { while ((entry = writeQueue.take()) != poisonWorkerEntry) { try { - Compressor.this.backend.insert(entry.getKey().getBytes(), compress(entry.getValue())); + Compressor.this.backend.insert(UTF8.getBytes(entry.getKey()), compress(entry.getValue())); } catch (IOException e) { Log.logException(e); buffer.put(entry.getKey(), entry.getValue()); diff --git a/source/net/yacy/kelondro/blob/MapDataMining.java b/source/net/yacy/kelondro/blob/MapDataMining.java index 2faec210b..91bb0c09b 100644 --- a/source/net/yacy/kelondro/blob/MapDataMining.java +++ b/source/net/yacy/kelondro/blob/MapDataMining.java @@ -312,7 +312,7 @@ public class MapDataMining extends MapHeap { public byte[] next() { final String r = s.next(); if (r == null) return null; - return r.getBytes(); + return UTF8.getBytes(r); } public void remove() { diff --git a/source/net/yacy/kelondro/blob/MapHeap.java b/source/net/yacy/kelondro/blob/MapHeap.java index aacc4ac2d..515bbc415 100644 --- a/source/net/yacy/kelondro/blob/MapHeap.java +++ b/source/net/yacy/kelondro/blob/MapHeap.java @@ -147,7 +147,7 @@ public class MapHeap implements Map> { key = normalizeKey(key); String s = map2string(newMap, "W" + my_SHORT_SECOND_FORMATTER.format() + " "); assert s != null; - byte[] sb = s.getBytes(); + byte[] sb = UTF8.getBytes(s); if (cache == null) { // write entry if (blob != null) blob.insert(key, sb); @@ -243,7 +243,7 @@ public class MapHeap implements Map> { if (key == null) return null; try { if (key instanceof byte[]) return get((byte[]) key); - if (key instanceof String) return get(((String) key).getBytes()); + if (key instanceof String) return get(UTF8.getBytes((String) key)); } catch (IOException e) { Log.logException(e); } catch (RowSpaceExceededException e) { diff --git a/source/net/yacy/kelondro/blob/ObjectBuffer.java b/source/net/yacy/kelondro/blob/ObjectBuffer.java index ecaa5ce3f..ec82b9e13 100644 --- a/source/net/yacy/kelondro/blob/ObjectBuffer.java +++ b/source/net/yacy/kelondro/blob/ObjectBuffer.java @@ -56,6 +56,7 @@ package net.yacy.kelondro.blob; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.order.NaturalOrder; public class ObjectBuffer { @@ -123,12 +124,12 @@ public class ObjectBuffer { public void put(final String key, final Object value) { if ((key == null) || (value == null)) return; synchronized(this) { - if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){ + if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){ this.writeDouble++; } else { this.writeUnique++; } - this.key = key.getBytes(); + this.key = UTF8.getBytes(key); this.value = value; } } @@ -149,7 +150,7 @@ public class ObjectBuffer { public Object get(final String key) { if (key == null) return null; synchronized(this) { - if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){ + if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){ this.readHit++; return this.value; } else { @@ -172,7 +173,7 @@ public class ObjectBuffer { public void remove(final String key) { if (key == null) return; synchronized(this) { - if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){ + if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){ this.key = null; this.value = null; } diff --git a/source/net/yacy/kelondro/blob/Tables.java b/source/net/yacy/kelondro/blob/Tables.java index b8c5e72b1..853d71036 100644 --- a/source/net/yacy/kelondro/blob/Tables.java +++ b/source/net/yacy/kelondro/blob/Tables.java @@ -140,10 +140,10 @@ public class Tables { } private byte[] ukey(String tablename) throws IOException, RowSpaceExceededException { - Row row = select(system_table_pkcounter, tablename.getBytes()); + Row row = select(system_table_pkcounter, UTF8.getBytes(tablename)); if (row == null) { // table counter entry in pkcounter table does not exist: make a new table entry - row = new Row(tablename.getBytes(), system_table_pkcounter_counterName, int2key(0).getBytes()); + row = new Row(UTF8.getBytes(tablename), system_table_pkcounter_counterName, UTF8.getBytes(int2key(0))); update(system_table_pkcounter, row); } byte[] pk = row.get(system_table_pkcounter_counterName); @@ -154,7 +154,7 @@ public class Tables { pki = (int) (ByteArray.parseDecimal(pk) + 1); } while (true) { - pk = int2key(pki).getBytes(); + pk = UTF8.getBytes(int2key(pki)); if (!has(tablename, pk)) break; pki++; } @@ -181,7 +181,7 @@ public class Tables { byte[] uk = ukey(tablename); update(tablename, uk, map); BEncodedHeap heap = getHeap(system_table_pkcounter); - heap.insert(tablename.getBytes(), system_table_pkcounter_counterName, uk); + heap.insert(UTF8.getBytes(tablename), system_table_pkcounter_counterName, uk); return uk; } @@ -400,19 +400,19 @@ public class Tables { } public void put(String colname, String value) { - super.put(colname, value.getBytes()); + super.put(colname, UTF8.getBytes(value)); } public void put(String colname, int value) { - super.put(colname, Integer.toString(value).getBytes()); + super.put(colname, UTF8.getBytes(Integer.toString(value))); } public void put(String colname, long value) { - super.put(colname, Long.toString(value).getBytes()); + super.put(colname, UTF8.getBytes(Long.toString(value))); } public void put(String colname, Date value) { - super.put(colname, my_SHORT_MILSEC_FORMATTER.format(value).getBytes()); + super.put(colname, UTF8.getBytes(my_SHORT_MILSEC_FORMATTER.format(value))); } public byte[] get(String colname, byte[] dflt) { diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java index af046ba08..5ef1dcda7 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java @@ -28,6 +28,7 @@ package net.yacy.kelondro.data.citation; import java.util.Collection; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; @@ -86,7 +87,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { this.entry = citationRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); - this.entry.setCol(col_urlhash, urlHash, null); + this.entry.setCol(col_urlhash, urlHash); this.entry.setCol(col_lastModified, mddlm); this.entry.setCol(col_lastAccessed, mddct); this.entry.setCol(col_posintext, posintext); @@ -100,7 +101,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { public CitationReferenceRow(final String urlHash, final String code) { // the code is the external form of the row minus the leading urlHash entry - this.entry = citationRow.newEntry((urlHash + code).getBytes()); + this.entry = citationRow.newEntry(UTF8.getBytes((urlHash + code))); } public CitationReferenceRow(final String external) { diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java index 99d2cc52c..42ebe6ad8 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java @@ -29,6 +29,7 @@ package net.yacy.kelondro.data.image; import java.util.ArrayList; import java.util.Collection; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; @@ -163,7 +164,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag this.entry = urlEntryRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); - this.entry.setCol(col_urlhash, urlHash, null); + this.entry.setCol(col_urlhash, urlHash); this.entry.setCol(col_lastModified, mddlm); this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); @@ -175,7 +176,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag public ImageReferenceRow(final String urlHash, final String code) { // the code is the external form of the row minus the leading urlHash entry - this.entry = urlEntryRow.newEntry((urlHash + code).getBytes()); + this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code))); } public ImageReferenceRow(final String external) { diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index cbab5aaf0..4cae92a57 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -220,7 +220,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable { assert hashs.length() == 12; // return result hash - byte[] b = hashs.toString().getBytes(); + byte[] b = UTF8.getBytes(hashs.toString()); assert b.length == 12; return b; } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index da4b42fad..2a8a2b0e7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -26,7 +26,6 @@ package net.yacy.kelondro.data.meta; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.text.ParseException; import java.util.Date; @@ -145,21 +144,13 @@ public class URIMetadataRow implements URIMetadata { encodeDate(col_mod, mod); encodeDate(col_load, load); encodeDate(col_fresh, fresh); - try { - this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); - } + this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer)); this.entry.setCol(col_md5, md5); this.entry.setCol(col_size, size); this.entry.setCol(col_wc, wc); this.entry.setCol(col_dt, new byte[]{(byte) dt}); this.entry.setCol(col_flags, flags.bytes()); - try { - this.entry.setCol(col_lang, lang.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - this.entry.setCol(col_lang, lang.getBytes()); - } + this.entry.setCol(col_lang, UTF8.getBytes(lang)); this.entry.setCol(col_llocal, llocal); this.entry.setCol(col_lother, lother); this.entry.setCol(col_limage, limage); @@ -195,11 +186,7 @@ public class URIMetadataRow implements URIMetadata { s.append(dc_creator).append(10); s.append(dc_subject).append(10); s.append(dc_publisher).append(10); - try { - return s.toString().getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - return s.toString().getBytes(); - } + return UTF8.getBytes(s.toString()); } public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) { @@ -216,7 +203,7 @@ public class URIMetadataRow implements URIMetadata { //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); DigestURI url; try { - url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), prop.getProperty("hash").getBytes()); + url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), UTF8.getBytes(prop.getProperty("hash"))); } catch (final MalformedURLException e) { url = null; } @@ -247,11 +234,7 @@ public class URIMetadataRow implements URIMetadata { } catch (final ParseException e) { encodeDate(col_fresh, new Date()); } - try { - this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes("UTF-8")); - } catch (UnsupportedEncodingException e1) { - this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes()); - } + this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", ""))); this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", ""))); this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0"))); this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0"))); @@ -259,11 +242,7 @@ public class URIMetadataRow implements URIMetadata { this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'}); final String flags = prop.getProperty("flags", "AAAAAA"); this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes()); - try { - this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes()); - } + this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk"))); this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0"))); this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0"))); this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0"))); @@ -431,7 +410,7 @@ public class URIMetadataRow implements URIMetadata { } public String language() { - return this.entry.getColString(col_lang, null); + return this.entry.getColString(col_lang); } public int size() { @@ -511,7 +490,7 @@ public class URIMetadataRow implements URIMetadata { public Request toBalancerEntry(final String initiatorHash) { return new Request( - initiatorHash.getBytes(), + UTF8.getBytes(initiatorHash), metadata().url(), referrerHash(), metadata().dc_title(), diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java index f3a52a488..dd7ed4ff6 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java @@ -89,7 +89,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N assert (termhash.length == 12) : "termhash = " + UTF8.String(termhash); assert (refhash.length == 12) : "refhash = " + UTF8.String(refhash); this.entry = navEntryRow.newEntry(); - this.entry.setCol(col_navhash, UTF8.String(termhash) + UTF8.String(refhash), null); + this.entry.setCol(col_navhash, UTF8.String(termhash) + UTF8.String(refhash)); this.entry.setCol(col_count, count); this.entry.setCol(col_pos, pos); this.entry.setCol(col_flags, flags); @@ -119,15 +119,15 @@ public final class NavigationReferenceRow extends AbstractReference implements N } public String navigationHash() { - return this.entry.getColString(col_navhash, null); + return this.entry.getColString(col_navhash); } public byte[] metadataHash() { - return navigationHash().substring(12).getBytes(); + return UTF8.getBytes(navigationHash().substring(12)); } public byte[] termHash() { - return navigationHash().substring(0, 12).getBytes(); + return UTF8.getBytes(navigationHash().substring(0, 12)); } public int hitcount() { diff --git a/source/net/yacy/kelondro/data/word/WordReference.java b/source/net/yacy/kelondro/data/word/WordReference.java index bbb0d5287..f30812afe 100644 --- a/source/net/yacy/kelondro/data/word/WordReference.java +++ b/source/net/yacy/kelondro/data/word/WordReference.java @@ -43,7 +43,7 @@ public interface WordReference extends Reference { public int phrasesintext(); - public String getLanguage(); + public byte[] getLanguage(); public char getType(); diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index 2efb0eaba..c3f8ccfa8 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -129,7 +129,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef final int posofphrase, // number of the phrase where word appears final long lastmodified, // last-modified time of the document where word appears final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - final String language, // (guessed) language of document + final byte[] language, // (guessed) language of document final char doctype, // type of document final int outlinksSame, // outlinks to same domain final int outlinksOther, // outlinks to other domain @@ -147,7 +147,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_wordsInText, wordcount); this.entry.setCol(col_phrasesInText, phrasecount); this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); - this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language, null); + this.entry.setCol(col_language, (language == null || language.length != urlEntryRow.width(col_language)) ? WordReferenceVars.default_language : language); this.entry.setCol(col_llocal, outlinksSame); this.entry.setCol(col_lother, outlinksOther); this.entry.setCol(col_urlLength, urlLength); @@ -179,14 +179,14 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry = urlEntryRow.newEntry(); final int mddlm = MicroDate.microDateDays(lastmodified); final int mddct = MicroDate.microDateDays(updatetime); - this.entry.setCol(col_urlhash, urlHash, null); + this.entry.setCol(col_urlhash, urlHash); this.entry.setCol(col_lastModified, mddlm); this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation this.entry.setCol(col_wordsInTitle, titleLength / 6); // word count estimation; TODO: change value handover to number of words this.entry.setCol(col_wordsInText, wordcount); this.entry.setCol(col_phrasesInText, phrasecount); this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); - this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language, null); + this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language); this.entry.setCol(col_llocal, outlinksSame); this.entry.setCol(col_lother, outlinksOther); this.entry.setCol(col_urlLength, urlLength); @@ -206,7 +206,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef public WordReferenceRow(final String urlHash, final String code) { // the code is the external form of the row minus the leading urlHash entry - this.entry = urlEntryRow.newEntry((urlHash + code).getBytes()); + this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code))); } public WordReferenceRow(final String external) { @@ -286,8 +286,8 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return (int) this.entry.getColLong(col_phrasesInText); } - public String getLanguage() { - return this.entry.getColString(col_language, null); + public byte[] getLanguage() { + return this.entry.getColBytes(col_language, true); } public char getType() { diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index 60fe4dda4..a20031359 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.Semaphore; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Bitfield; @@ -51,10 +52,11 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc */ public static final WordReferenceVars poison = new WordReferenceVars(); private static int cores = Runtime.getRuntime().availableProcessors(); + public static final byte[] default_language = UTF8.getBytes("uk"); public Bitfield flags; public long lastModified; - public String language; + public byte[] language; public byte[] urlHash; public char type; public int hitcount, llocal, lother, phrasesintext, @@ -77,14 +79,14 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc final int posofphrase, // number of the phrase where word appears final long lastmodified, // last-modified time of the document where word appears final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - String language, // (guessed) language of document + byte[] language, // (guessed) language of document final char doctype, // type of document final int outlinksSame, // outlinks to same domain final int outlinksOther, // outlinks to other domain final Bitfield flags, // attributes to the url and to the word according the url final double termfrequency ) { - if ((language == null) || (language.length() != 2)) language = "uk"; + if (language == null || language.length != 2) language = default_language; final int mddlm = MicroDate.microDateDays(lastmodified); //final int mddct = MicroDate.microDateDays(updatetime); this.flags = flags; @@ -199,7 +201,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return freshUntil; } */ - public String getLanguage() { + public byte[] getLanguage() { return language; } diff --git a/source/net/yacy/kelondro/index/IndexTest.java b/source/net/yacy/kelondro/index/IndexTest.java index c431509a5..068ba8e7d 100644 --- a/source/net/yacy/kelondro/index/IndexTest.java +++ b/source/net/yacy/kelondro/index/IndexTest.java @@ -49,7 +49,7 @@ public class IndexTest { // so we construct a generic Hash using two long values final String s = (Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 6).toString() + Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 6).toString()); - return s.getBytes(); + return UTF8.getBytes(s); } public static byte[] randomHash(final Random r) { diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index 734419bf1..196c986eb 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -27,7 +27,6 @@ package net.yacy.kelondro.index; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; @@ -313,7 +312,7 @@ public final class Row { } else if ((decimalCardinal) && (row[i].celltype == Column.celltype_bitfield)) { setCol(nick, (new Bitfield(row[i].cellwidth, elts[i].substring(p + 1).trim())).bytes()); } else { - setCol(nick, elts[i].substring(p + 1).trim().getBytes()); + setCol(nick, UTF8.getBytes(elts[i].substring(p + 1).trim())); } } } @@ -430,18 +429,7 @@ public final class Row { } public final void setCol(final int column, final String cell) { - setCol(column, cell.getBytes()); - } - - public final void setCol(final int column, final String cell, final String encoding) { - if (encoding == null) - setCol(column, cell.getBytes()); - else - try { - setCol(column, (cell == null) ? null : cell.getBytes(encoding)); - } catch (final UnsupportedEncodingException e) { - Log.logSevere("Row", "", e); - } + setCol(column, UTF8.getBytes(cell)); } @Deprecated @@ -491,23 +479,17 @@ public final class Row { throw new kelondroException("ROW", "addCol did not find appropriate encoding"); } - public final String getColString(final int column, final String encoding) { - return getColString(colstart[column], row[column].cellwidth, encoding); + public final String getColString(final int column) { + return getColString(colstart[column], row[column].cellwidth); } - private final String getColString(final int clstrt, int length, final String encoding) { + private final String getColString(final int clstrt, int length) { if (rowinstance[offset + clstrt] == 0) return null; assert length <= rowinstance.length - offset - clstrt; if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt; while ((length > 0) && (rowinstance[offset + clstrt + length - 1] == 0)) length--; if (length == 0) return null; - try { - if ((encoding == null) || (encoding.length() == 0)) - return UTF8.String(rowinstance, offset + clstrt, length); - return new String(rowinstance, offset + clstrt, length, encoding); - } catch (final UnsupportedEncodingException e) { - return ""; - } + return UTF8.String(rowinstance, offset + clstrt, length); } public final long getColLong(final int column) { diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index 3925e2a6e..aa28f7d4c 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -132,7 +132,7 @@ public class RowCollection implements Iterable, Cloneable { this.chunkcount = chunkcachelength / rowdef.objectsize; // patch problem } this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day; - final String sortOrderKey = exportedCollection.getColString(exp_order_type, null); + final String sortOrderKey = exportedCollection.getColString(exp_order_type); ByteOrder oldOrder = null; if ((sortOrderKey == null) || (sortOrderKey.equals("__"))) { oldOrder = null; @@ -241,7 +241,7 @@ public class RowCollection implements Iterable, Cloneable { entry.setCol(exp_chunkcount, this.chunkcount); entry.setCol(exp_last_read, daysSince2000(System.currentTimeMillis())); entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote)); - entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? "__".getBytes() :this.rowdef.objectOrder.signature().getBytes()); + entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? UTF8.getBytes("__") : UTF8.getBytes(this.rowdef.objectOrder.signature())); entry.setCol(exp_order_bound, this.sortBound); entry.setCol(exp_collection, this.chunkcache); return entry.bytes(); diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java index 84ea31657..42ee64a2d 100644 --- a/source/net/yacy/kelondro/index/RowSet.java +++ b/source/net/yacy/kelondro/index/RowSet.java @@ -660,8 +660,9 @@ public class RowSet extends RowCollection implements Index, Iterable public static byte[] randomHash(final long r0, final long r1) { // a long can have 64 bit, but a 12-byte hash can have 6 * 12 = 72 bits // so we construct a generic Hash using two long values - return (Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 11).substring(5) + - Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 11).substring(5)).getBytes(); + return UTF8.getBytes( + Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 11).substring(5) + + Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 11).substring(5)); } public static byte[] randomHash(final Random r) { return randomHash(r.nextLong(), r.nextLong()); diff --git a/source/net/yacy/kelondro/io/AbstractWriter.java b/source/net/yacy/kelondro/io/AbstractWriter.java index 480e9a30d..a1e5d1192 100644 --- a/source/net/yacy/kelondro/io/AbstractWriter.java +++ b/source/net/yacy/kelondro/io/AbstractWriter.java @@ -32,6 +32,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.util.ByteBuffer; @@ -87,7 +88,7 @@ public abstract class AbstractWriter extends AbstractReader implements Writer { public final void writeLine(final String line) throws IOException { final byte[] b = new byte[line.length() + 2]; - System.arraycopy(line.getBytes(), 0, b, 0, line.length()); + System.arraycopy(UTF8.getBytes(line), 0, b, 0, line.length()); b[b.length - 2] = cr; b[b.length - 1] = lf; this.write(b); diff --git a/source/net/yacy/kelondro/io/CharBuffer.java b/source/net/yacy/kelondro/io/CharBuffer.java index 5c9b0477a..15c20a9c3 100644 --- a/source/net/yacy/kelondro/io/CharBuffer.java +++ b/source/net/yacy/kelondro/io/CharBuffer.java @@ -28,10 +28,11 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.Properties; +import net.yacy.cora.document.UTF8; + public final class CharBuffer extends Writer { public static final char singlequote = '\''; @@ -271,16 +272,8 @@ public final class CharBuffer extends Writer { return tmp; } - public byte[] getBytes(String charset) throws UnsupportedEncodingException { - return new String(buffer, offset, length).getBytes(charset); - } - public byte[] getBytes() { - try { - return getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - return null; - } + return UTF8.getBytes(new String(buffer, offset, length)); } public CharBuffer trim(final int start) { diff --git a/source/net/yacy/kelondro/order/Digest.java b/source/net/yacy/kelondro/order/Digest.java index cbc9e72c6..4d5dc592b 100644 --- a/source/net/yacy/kelondro/order/Digest.java +++ b/source/net/yacy/kelondro/order/Digest.java @@ -32,7 +32,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; -import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.concurrent.ArrayBlockingQueue; @@ -44,6 +43,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.logging.Log; @@ -128,11 +128,7 @@ public class Digest { fromPool = false; } byte[] keyBytes; - try { - keyBytes = key.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - keyBytes = key.getBytes(); - } + keyBytes = UTF8.getBytes(key); digest.update(keyBytes); byte[] result = digest.digest(); digest.reset(); diff --git a/source/net/yacy/kelondro/order/StringOrder.java b/source/net/yacy/kelondro/order/StringOrder.java index 93049b205..0106e2454 100644 --- a/source/net/yacy/kelondro/order/StringOrder.java +++ b/source/net/yacy/kelondro/order/StringOrder.java @@ -29,6 +29,8 @@ package net.yacy.kelondro.order; import java.util.Comparator; +import net.yacy.cora.document.UTF8; + public class StringOrder implements Comparator { public ByteOrder baseOrder; @@ -41,6 +43,6 @@ public class StringOrder implements Comparator { } public final int compare(final String s1, final String s2) { - return baseOrder.compare(s1.getBytes(), s2.getBytes()); + return baseOrder.compare(UTF8.getBytes(s1), UTF8.getBytes(s2)); } } diff --git a/source/net/yacy/kelondro/table/Relations.java b/source/net/yacy/kelondro/table/Relations.java index b9046e0d6..4dd2e51d6 100755 --- a/source/net/yacy/kelondro/table/Relations.java +++ b/source/net/yacy/kelondro/table/Relations.java @@ -137,7 +137,7 @@ public class Relations { } public String putRelation(final String name, final String key, final String value) throws IOException, RowSpaceExceededException { - final byte[] r = putRelation(name, key.getBytes(), value.getBytes()); + final byte[] r = putRelation(name, UTF8.getBytes(key), UTF8.getBytes(value)); if (r == null) return null; return UTF8.String(r); } @@ -156,7 +156,7 @@ public class Relations { } public String getRelation(final String name, final String key) throws IOException, RowSpaceExceededException { - final byte[] r = getRelation(name, key.getBytes()); + final byte[] r = getRelation(name, UTF8.getBytes(key)); if (r == null) return null; return UTF8.String(r); } diff --git a/source/net/yacy/kelondro/table/SQLTable.java b/source/net/yacy/kelondro/table/SQLTable.java index cab0a8316..fc9257324 100644 --- a/source/net/yacy/kelondro/table/SQLTable.java +++ b/source/net/yacy/kelondro/table/SQLTable.java @@ -205,7 +205,7 @@ public class SQLTable implements Index, Iterable { final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery); - sqlStatement.setString(1, row.getColString(0, null)); + sqlStatement.setString(1, row.getColString(0)); sqlStatement.setBytes(2, row.bytes()); sqlStatement.execute(); @@ -226,7 +226,7 @@ public class SQLTable implements Index, Iterable { final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery); - sqlStatement.setString(1, row.getColString(0, null)); + sqlStatement.setString(1, row.getColString(0)); sqlStatement.setBytes(2, row.bytes()); sqlStatement.execute(); diff --git a/source/net/yacy/kelondro/util/BDecoder.java b/source/net/yacy/kelondro/util/BDecoder.java index 49a3ab563..eb34422df 100644 --- a/source/net/yacy/kelondro/util/BDecoder.java +++ b/source/net/yacy/kelondro/util/BDecoder.java @@ -116,19 +116,19 @@ public class BDecoder { return UTF8.String(this.b); } public void toStream(OutputStream os) throws IOException { - os.write(Integer.toString(this.b.length).getBytes()); + os.write(UTF8.getBytes(Integer.toString(this.b.length))); os.write(_p); os.write(this.b); } public static void toStream(OutputStream os, byte[] b) throws IOException { - os.write(Integer.toString(b.length).getBytes()); + os.write(UTF8.getBytes(Integer.toString(b.length))); os.write(_p); os.write(b); } public static void toStream(OutputStream os, String s) throws IOException { - os.write(Integer.toString(s.length()).getBytes()); + os.write(UTF8.getBytes(Integer.toString(s.length()))); os.write(_p); - os.write(s.getBytes()); + os.write(UTF8.getBytes(s)); } } @@ -218,7 +218,7 @@ public class BDecoder { } public void toStream(OutputStream os) throws IOException { os.write(_i); - os.write(Long.toString(this.i).getBytes()); + os.write(UTF8.getBytes(Long.toString(this.i))); os.write(_e); } } diff --git a/source/net/yacy/kelondro/util/ByteBuffer.java b/source/net/yacy/kelondro/util/ByteBuffer.java index 8dc6802e2..75fc4fb32 100644 --- a/source/net/yacy/kelondro/util/ByteBuffer.java +++ b/source/net/yacy/kelondro/util/ByteBuffer.java @@ -207,7 +207,7 @@ public final class ByteBuffer extends OutputStream { try { return append(s.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { - return append(s.getBytes()); + return append(UTF8.getBytes(s)); } } @@ -400,14 +400,6 @@ public final class ByteBuffer extends OutputStream { public String toString() { return UTF8.String(buffer, offset, length); } - - public String toString(final String charsetName) { - try { - return new String(this.getBytes(),charsetName); - } catch (final UnsupportedEncodingException e) { - return UTF8.String(this.getBytes()); - } - } public String toString(final int left, final int length) { return UTF8.String(buffer, offset + left, length); @@ -491,7 +483,10 @@ public final class ByteBuffer extends OutputStream { } public static boolean equals(final byte[] buffer, final byte[] pattern) { - return equals(buffer, 0, pattern); + // compares two byte arrays: true, if pattern appears completely at offset position + if (buffer.length < pattern.length) return false; + for (int i = 0; i < pattern.length; i++) if (buffer[i] != pattern[i]) return false; + return true; } public static boolean equals(final byte[] buffer, final int offset, final byte[] pattern) { diff --git a/source/net/yacy/kelondro/util/OS.java b/source/net/yacy/kelondro/util/OS.java index 86a1a42fe..aecc7e146 100644 --- a/source/net/yacy/kelondro/util/OS.java +++ b/source/net/yacy/kelondro/util/OS.java @@ -33,6 +33,7 @@ import java.util.Vector; import de.anomic.server.serverCore; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.logging.Log; @@ -149,7 +150,7 @@ public final class OS { } public static void deployScript(final File scriptFile, final String theScript) throws IOException { - FileUtils.copy(theScript.getBytes(), scriptFile); + FileUtils.copy(UTF8.getBytes(theScript), scriptFile); if(!isWindows){ // set executable try { Runtime.getRuntime().exec("chmod 755 " + scriptFile.getAbsolutePath().replaceAll(" ", "\\ ")).waitFor(); diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index d1e8f8493..d16665ef7 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -41,6 +41,7 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -114,7 +115,7 @@ public final class LoaderDispatcher { final boolean global ) { return new Request( - sb.peers.mySeed().hash.getBytes(), + UTF8.getBytes(sb.peers.mySeed().hash), url, null, "", @@ -185,7 +186,7 @@ public final class LoaderDispatcher { final String host = url.getHost(); // check if we have the page in the cache - final CrawlProfile crawlProfile = sb.crawler.getActive(request.profileHandle().getBytes()); + final CrawlProfile crawlProfile = sb.crawler.getActive(UTF8.getBytes(request.profileHandle())); if (crawlProfile != null && cacheStrategy != CrawlProfile.CacheStrategy.NOCACHE) { // we have passed a first test if caching is allowed // now see if there is a cache entry