moved getBytes() to UTF8.getBytes() to use a default String encoding

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7580 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent cb6d307bba
commit 30aed9824a

@ -535,7 +535,7 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(UTF8.String(keyhash), -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p");
final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE);
ranked.run();

@ -128,14 +128,14 @@ public class Supporter {
row = Supporter.get(urlhash);
if (row == null) continue;
url = row.getColString(0, null);
url = row.getColString(0);
try {
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch(final MalformedURLException e) {continue;}
title = row.getColString(1,"UTF-8");
description = row.getColString(2,"UTF-8");
title = row.getColString(1);
description = row.getColString(2);
if ((url == null) || (title == null) || (description == null)) continue;
refid = row.getColString(3, null);
refid = row.getColString(3);
voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) ||
(sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null);
prop.put("supporter_results_" + i + "_authorized", authenticated ? "1" : "0");

@ -136,15 +136,15 @@ public class Surftips {
row = surftips.get(urlhash);
if (row == null) continue;
url = row.getColString(0, null);
url = row.getColString(0);
try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url)))
continue;
}catch(final MalformedURLException e){continue;};
title = row.getColString(1,"UTF-8");
description = row.getColString(2,"UTF-8");
title = row.getColString(1);
description = row.getColString(2);
if ((url == null) || (title == null) || (description == null)) continue;
refid = row.getColString(3, null);
refid = row.getColString(3);
voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) ||
(sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null);
prop.put("surftips_results_" + i + "_authorized", (authenticated) ? "1" : "0");

@ -251,7 +251,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
this.executor = entry.getColBytes(1, true);
this.workdate = new Date(entry.getColLong(2));
this.workcount = (int) entry.getColLong(3);
this.anycause = entry.getColString(4, "UTF-8");
this.anycause = entry.getColString(4);
this.bentry = new Request(Request.rowdef.newEntry(entry.getColBytes(5, false)));
assert (Base64Order.enhancedCoder.equal(entry.getPrimaryKeyBytes(), bentry.url().hash()));
this.stored = true;
@ -310,7 +310,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
try {
return new Entry(e);
} catch (final IOException ex) {
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0, null));
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0));
}
}

@ -136,15 +136,15 @@ public class Request extends WorkflowJob {
}
private void insertEntry(final Row.Entry entry) throws IOException {
final String urlstring = entry.getColString(2, null);
final String urlstring = entry.getColString(2);
if (urlstring == null) throw new IOException ("url string is null");
this.initiator = entry.getColBytes(1, true);
this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator);
this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes());
this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.name = (entry.empty(4)) ? "" : entry.getColString(4).trim();
this.appdate = entry.getColLong(5);
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6).trim();
this.depth = (int) entry.getColLong(7);
this.anchors = (int) entry.getColLong(8);
this.forkfactor = (int) entry.getColLong(9);

@ -36,6 +36,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.UTF8;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
@ -193,7 +194,7 @@ public class DocumentIndex extends Segment {
public ArrayList<DigestURI> find(String querystring, int count) {
// make a query and start a search
QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault, "DocumentIndex");
ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation);
rankedCache.start();

@ -283,7 +283,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
final Row.Entry entry = urlIndexFile.get(urlHashBytes);
// getting the wrong url string
oldUrlStr = entry.getColString(1, null).trim();
oldUrlStr = entry.getColString(1).trim();
int pos = -1;
if ((pos = oldUrlStr.indexOf("://")) != -1) {
@ -291,18 +291,6 @@ public final class MetadataRepository implements Iterable<byte[]> {
final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
final DigestURI newUrl = new DigestURI(newUrlStr);
// doing a http head request to test if the url is correct
// final Client client = new Client(10000);
// ResponseContainer res = null;
// try {
// res = client.HEAD(newUrl.toString());
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
if (client.HEADResponse(newUrl.toString()) != null
&& client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
entry.setCol(1, newUrl.toString().getBytes());

@ -45,6 +45,7 @@ import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ByteBuffer;
public class ReferenceOrder {
@ -55,9 +56,9 @@ public class ReferenceOrder {
private WordReferenceVars min, max;
private final DynamicScore<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking;
private final String language;
private final byte[] language;
public ReferenceOrder(final RankingProfile profile, String language) {
public ReferenceOrder(final RankingProfile profile, byte[] language) {
this.min = null;
this.max = null;
this.ranking = profile;
@ -234,7 +235,7 @@ public class ReferenceOrder {
+ ((flags.get(Condenser.flag_cat_hasaudio)) ? 255 << ranking.coeff_cathasaudio : 0)
+ ((flags.get(Condenser.flag_cat_hasvideo)) ? 255 << ranking.coeff_cathasvideo : 0)
+ ((flags.get(Condenser.flag_cat_hasapp)) ? 255 << ranking.coeff_cathasapp : 0)
+ ((patchUK(t.language).equals(this.language)) ? 255 << ranking.coeff_language : 0)
+ ((ByteBuffer.equals(t.language, this.language)) ? 255 << ranking.coeff_language : 0)
+ ((DigestURI.probablyRootURL(t.metadataHash())) ? 15 << ranking.coeff_urllength : 0);
//if (searchWords != null) r += (yacyURL.probablyWordURL(t.urlHash(), searchWords) != null) ? 256 << ranking.coeff_appurl : 0;
@ -242,8 +243,4 @@ public class ReferenceOrder {
return r; // the higher the number the better the ranking.
}
private static final String patchUK(String l) {
// this is to patch a bad language name setting that was used in 0.60 and before
if (l == null || l.equals("uk")) return "en"; else return l;
}
}

@ -108,7 +108,7 @@ public final class SearchEvent {
this.IAmaxcounthash = null;
this.IAneardhthash = null;
this.localSearchThread = null;
this.order = new ReferenceOrder(query.ranking, query.targetlang);
this.order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
boolean remote = (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || (query.domType == QueryParams.SEARCHDOM_CLUSTERALL);
if (remote && peers.sizeConnected() == 0) remote = false;
final long start = System.currentTimeMillis();

@ -129,6 +129,7 @@ public class Segments implements Iterable<Segment> {
}
public Segment segment(final String segmentName) {
if (segments == null) return null;
Segment segment = segments.get(segmentName);
if (segment == null) {
// generate the segment

@ -164,11 +164,11 @@ public class yacyNewsDB {
private Record b2r(final Row.Entry b) {
if (b == null) return null;
return new yacyNewsDB.Record(
b.getColString(0, null),
b.getColString(1, "UTF-8"),
(b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColString(2, null), GenericFormatter.UTCDiffString()),
b.getColString(0),
b.getColString(1),
(b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColString(2), GenericFormatter.UTCDiffString()),
(int) b.getColLong(3),
MapTools.string2map(b.getColString(4, "UTF-8"), ",")
MapTools.string2map(b.getColString(4), ",")
);
}

@ -158,7 +158,7 @@ public class yacyNewsQueue {
yacyNewsDB.Record b2r(final Row.Entry b) throws IOException {
if (b == null) return null;
final String id = b.getColString(0, null);
final String id = b.getColString(0);
//Date touched = yacyCore.parseUniversalDate(UTF8.String(b[1]));
return newsDB.get(id);
}

@ -71,4 +71,9 @@ public class UTF8 {
return new String(bytes, offset, length, charset);
}
public final static byte[] getBytes(final String s) {
if (s == null) return null;
return s.getBytes(charset);
}
}

@ -400,7 +400,7 @@ public class dbtest {
Row.Entry row;
while (i.hasNext()) {
row = i.next();
for (int j = 0; j < row.columns(); j++) System.out.print(row.getColString(j, null) + ",");
for (int j = 0; j < row.columns(); j++) System.out.print(row.getColString(j) + ",");
System.out.println();
}
}

@ -28,7 +28,6 @@ import java.io.ByteArrayInputStream;
import java.io.CharArrayReader;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
@ -346,13 +345,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
public byte[] getText() {
return this.getText("UTF-8");
}
public byte[] getText(final String charSet) {
try {
return content.getBytes(charSet);
} catch (final UnsupportedEncodingException e) {
return content.getBytes();
} catch (final OutOfMemoryError e) {
Log.logException(e);

@ -112,7 +112,7 @@ public class Compressor implements BLOB {
try {
while ((entry = writeQueue.take()) != poisonWorkerEntry) {
try {
Compressor.this.backend.insert(entry.getKey().getBytes(), compress(entry.getValue()));
Compressor.this.backend.insert(UTF8.getBytes(entry.getKey()), compress(entry.getValue()));
} catch (IOException e) {
Log.logException(e);
buffer.put(entry.getKey(), entry.getValue());

@ -312,7 +312,7 @@ public class MapDataMining extends MapHeap {
public byte[] next() {
final String r = s.next();
if (r == null) return null;
return r.getBytes();
return UTF8.getBytes(r);
}
public void remove() {

@ -147,7 +147,7 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
key = normalizeKey(key);
String s = map2string(newMap, "W" + my_SHORT_SECOND_FORMATTER.format() + " ");
assert s != null;
byte[] sb = s.getBytes();
byte[] sb = UTF8.getBytes(s);
if (cache == null) {
// write entry
if (blob != null) blob.insert(key, sb);
@ -243,7 +243,7 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
if (key == null) return null;
try {
if (key instanceof byte[]) return get((byte[]) key);
if (key instanceof String) return get(((String) key).getBytes());
if (key instanceof String) return get(UTF8.getBytes((String) key));
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {

@ -56,6 +56,7 @@
package net.yacy.kelondro.blob;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.order.NaturalOrder;
public class ObjectBuffer {
@ -123,12 +124,12 @@ public class ObjectBuffer {
public void put(final String key, final Object value) {
if ((key == null) || (value == null)) return;
synchronized(this) {
if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){
if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){
this.writeDouble++;
} else {
this.writeUnique++;
}
this.key = key.getBytes();
this.key = UTF8.getBytes(key);
this.value = value;
}
}
@ -149,7 +150,7 @@ public class ObjectBuffer {
public Object get(final String key) {
if (key == null) return null;
synchronized(this) {
if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){
if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){
this.readHit++;
return this.value;
} else {
@ -172,7 +173,7 @@ public class ObjectBuffer {
public void remove(final String key) {
if (key == null) return;
synchronized(this) {
if (NaturalOrder.naturalOrder.equal(this.key, key.getBytes())){
if (NaturalOrder.naturalOrder.equal(this.key, UTF8.getBytes(key))){
this.key = null;
this.value = null;
}

@ -140,10 +140,10 @@ public class Tables {
}
private byte[] ukey(String tablename) throws IOException, RowSpaceExceededException {
Row row = select(system_table_pkcounter, tablename.getBytes());
Row row = select(system_table_pkcounter, UTF8.getBytes(tablename));
if (row == null) {
// table counter entry in pkcounter table does not exist: make a new table entry
row = new Row(tablename.getBytes(), system_table_pkcounter_counterName, int2key(0).getBytes());
row = new Row(UTF8.getBytes(tablename), system_table_pkcounter_counterName, UTF8.getBytes(int2key(0)));
update(system_table_pkcounter, row);
}
byte[] pk = row.get(system_table_pkcounter_counterName);
@ -154,7 +154,7 @@ public class Tables {
pki = (int) (ByteArray.parseDecimal(pk) + 1);
}
while (true) {
pk = int2key(pki).getBytes();
pk = UTF8.getBytes(int2key(pki));
if (!has(tablename, pk)) break;
pki++;
}
@ -181,7 +181,7 @@ public class Tables {
byte[] uk = ukey(tablename);
update(tablename, uk, map);
BEncodedHeap heap = getHeap(system_table_pkcounter);
heap.insert(tablename.getBytes(), system_table_pkcounter_counterName, uk);
heap.insert(UTF8.getBytes(tablename), system_table_pkcounter_counterName, uk);
return uk;
}
@ -400,19 +400,19 @@ public class Tables {
}
public void put(String colname, String value) {
super.put(colname, value.getBytes());
super.put(colname, UTF8.getBytes(value));
}
public void put(String colname, int value) {
super.put(colname, Integer.toString(value).getBytes());
super.put(colname, UTF8.getBytes(Integer.toString(value)));
}
public void put(String colname, long value) {
super.put(colname, Long.toString(value).getBytes());
super.put(colname, UTF8.getBytes(Long.toString(value)));
}
public void put(String colname, Date value) {
super.put(colname, my_SHORT_MILSEC_FORMATTER.format(value).getBytes());
super.put(colname, UTF8.getBytes(my_SHORT_MILSEC_FORMATTER.format(value)));
}
public byte[] get(String colname, byte[] dflt) {

@ -28,6 +28,7 @@ package net.yacy.kelondro.data.citation;
import java.util.Collection;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
@ -86,7 +87,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
this.entry = citationRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setCol(col_urlhash, urlHash);
this.entry.setCol(col_lastModified, mddlm);
this.entry.setCol(col_lastAccessed, mddct);
this.entry.setCol(col_posintext, posintext);
@ -100,7 +101,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
public CitationReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = citationRow.newEntry((urlHash + code).getBytes());
this.entry = citationRow.newEntry(UTF8.getBytes((urlHash + code)));
}
public CitationReferenceRow(final String external) {

@ -29,6 +29,7 @@ package net.yacy.kelondro.data.image;
import java.util.ArrayList;
import java.util.Collection;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
@ -163,7 +164,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setCol(col_urlhash, urlHash);
this.entry.setCol(col_lastModified, mddlm);
this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation
this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
@ -175,7 +176,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
public ImageReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = urlEntryRow.newEntry((urlHash + code).getBytes());
this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code)));
}
public ImageReferenceRow(final String external) {

@ -220,7 +220,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
assert hashs.length() == 12;
// return result hash
byte[] b = hashs.toString().getBytes();
byte[] b = UTF8.getBytes(hashs.toString());
assert b.length == 12;
return b;
}

@ -26,7 +26,6 @@
package net.yacy.kelondro.data.meta;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
@ -145,21 +144,13 @@ public class URIMetadataRow implements URIMetadata {
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
try {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes());
}
this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer));
this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes());
try {
this.entry.setCol(col_lang, lang.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, lang.getBytes());
}
this.entry.setCol(col_lang, UTF8.getBytes(lang));
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage);
@ -195,11 +186,7 @@ public class URIMetadataRow implements URIMetadata {
s.append(dc_creator).append(10);
s.append(dc_subject).append(10);
s.append(dc_publisher).append(10);
try {
return s.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return s.toString().getBytes();
}
return UTF8.getBytes(s.toString());
}
public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
@ -216,7 +203,7 @@ public class URIMetadataRow implements URIMetadata {
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
DigestURI url;
try {
url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), prop.getProperty("hash").getBytes());
url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), UTF8.getBytes(prop.getProperty("hash")));
} catch (final MalformedURLException e) {
url = null;
}
@ -247,11 +234,7 @@ public class URIMetadataRow implements URIMetadata {
} catch (final ParseException e) {
encodeDate(col_fresh, new Date());
}
try {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e1) {
this.entry.setCol(col_referrer, prop.getProperty("referrer", "").getBytes());
}
this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", "")));
this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
@ -259,11 +242,7 @@ public class URIMetadataRow implements URIMetadata {
this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
final String flags = prop.getProperty("flags", "AAAAAA");
this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
try {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
this.entry.setCol(col_lang, prop.getProperty("lang", "uk").getBytes());
}
this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk")));
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));
@ -431,7 +410,7 @@ public class URIMetadataRow implements URIMetadata {
}
public String language() {
return this.entry.getColString(col_lang, null);
return this.entry.getColString(col_lang);
}
public int size() {
@ -511,7 +490,7 @@ public class URIMetadataRow implements URIMetadata {
public Request toBalancerEntry(final String initiatorHash) {
return new Request(
initiatorHash.getBytes(),
UTF8.getBytes(initiatorHash),
metadata().url(),
referrerHash(),
metadata().dc_title(),

@ -89,7 +89,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N
assert (termhash.length == 12) : "termhash = " + UTF8.String(termhash);
assert (refhash.length == 12) : "refhash = " + UTF8.String(refhash);
this.entry = navEntryRow.newEntry();
this.entry.setCol(col_navhash, UTF8.String(termhash) + UTF8.String(refhash), null);
this.entry.setCol(col_navhash, UTF8.String(termhash) + UTF8.String(refhash));
this.entry.setCol(col_count, count);
this.entry.setCol(col_pos, pos);
this.entry.setCol(col_flags, flags);
@ -119,15 +119,15 @@ public final class NavigationReferenceRow extends AbstractReference implements N
}
public String navigationHash() {
return this.entry.getColString(col_navhash, null);
return this.entry.getColString(col_navhash);
}
public byte[] metadataHash() {
return navigationHash().substring(12).getBytes();
return UTF8.getBytes(navigationHash().substring(12));
}
public byte[] termHash() {
return navigationHash().substring(0, 12).getBytes();
return UTF8.getBytes(navigationHash().substring(0, 12));
}
public int hitcount() {

@ -43,7 +43,7 @@ public interface WordReference extends Reference {
public int phrasesintext();
public String getLanguage();
public byte[] getLanguage();
public char getType();

@ -129,7 +129,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
final int posofphrase, // number of the phrase where word appears
final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
final String language, // (guessed) language of document
final byte[] language, // (guessed) language of document
final char doctype, // type of document
final int outlinksSame, // outlinks to same domain
final int outlinksOther, // outlinks to other domain
@ -147,7 +147,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_wordsInText, wordcount);
this.entry.setCol(col_phrasesInText, phrasecount);
this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language, null);
this.entry.setCol(col_language, (language == null || language.length != urlEntryRow.width(col_language)) ? WordReferenceVars.default_language : language);
this.entry.setCol(col_llocal, outlinksSame);
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
@ -179,14 +179,14 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setCol(col_urlhash, urlHash);
this.entry.setCol(col_lastModified, mddlm);
this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation
this.entry.setCol(col_wordsInTitle, titleLength / 6); // word count estimation; TODO: change value handover to number of words
this.entry.setCol(col_wordsInText, wordcount);
this.entry.setCol(col_phrasesInText, phrasecount);
this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language, null);
this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language);
this.entry.setCol(col_llocal, outlinksSame);
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
@ -206,7 +206,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
public WordReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = urlEntryRow.newEntry((urlHash + code).getBytes());
this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code)));
}
public WordReferenceRow(final String external) {
@ -286,8 +286,8 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return (int) this.entry.getColLong(col_phrasesInText);
}
public String getLanguage() {
return this.entry.getColString(col_language, null);
public byte[] getLanguage() {
return this.entry.getColBytes(col_language, true);
}
public char getType() {

@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
@ -51,10 +52,11 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
*/
public static final WordReferenceVars poison = new WordReferenceVars();
private static int cores = Runtime.getRuntime().availableProcessors();
public static final byte[] default_language = UTF8.getBytes("uk");
public Bitfield flags;
public long lastModified;
public String language;
public byte[] language;
public byte[] urlHash;
public char type;
public int hitcount, llocal, lother, phrasesintext,
@ -77,14 +79,14 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int posofphrase, // number of the phrase where word appears
final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
String language, // (guessed) language of document
byte[] language, // (guessed) language of document
final char doctype, // type of document
final int outlinksSame, // outlinks to same domain
final int outlinksOther, // outlinks to other domain
final Bitfield flags, // attributes to the url and to the word according the url
final double termfrequency
) {
if ((language == null) || (language.length() != 2)) language = "uk";
if (language == null || language.length != 2) language = default_language;
final int mddlm = MicroDate.microDateDays(lastmodified);
//final int mddct = MicroDate.microDateDays(updatetime);
this.flags = flags;
@ -199,7 +201,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return freshUntil;
}
*/
public String getLanguage() {
public byte[] getLanguage() {
return language;
}

@ -49,7 +49,7 @@ public class IndexTest {
// so we construct a generic Hash using two long values
final String s = (Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 6).toString() +
Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 6).toString());
return s.getBytes();
return UTF8.getBytes(s);
}
public static byte[] randomHash(final Random r) {

@ -27,7 +27,6 @@
package net.yacy.kelondro.index;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
@ -313,7 +312,7 @@ public final class Row {
} else if ((decimalCardinal) && (row[i].celltype == Column.celltype_bitfield)) {
setCol(nick, (new Bitfield(row[i].cellwidth, elts[i].substring(p + 1).trim())).bytes());
} else {
setCol(nick, elts[i].substring(p + 1).trim().getBytes());
setCol(nick, UTF8.getBytes(elts[i].substring(p + 1).trim()));
}
}
}
@ -430,18 +429,7 @@ public final class Row {
}
public final void setCol(final int column, final String cell) {
setCol(column, cell.getBytes());
}
public final void setCol(final int column, final String cell, final String encoding) {
if (encoding == null)
setCol(column, cell.getBytes());
else
try {
setCol(column, (cell == null) ? null : cell.getBytes(encoding));
} catch (final UnsupportedEncodingException e) {
Log.logSevere("Row", "", e);
}
setCol(column, UTF8.getBytes(cell));
}
@Deprecated
@ -491,23 +479,17 @@ public final class Row {
throw new kelondroException("ROW", "addCol did not find appropriate encoding");
}
public final String getColString(final int column, final String encoding) {
return getColString(colstart[column], row[column].cellwidth, encoding);
public final String getColString(final int column) {
return getColString(colstart[column], row[column].cellwidth);
}
private final String getColString(final int clstrt, int length, final String encoding) {
private final String getColString(final int clstrt, int length) {
if (rowinstance[offset + clstrt] == 0) return null;
assert length <= rowinstance.length - offset - clstrt;
if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt;
while ((length > 0) && (rowinstance[offset + clstrt + length - 1] == 0)) length--;
if (length == 0) return null;
try {
if ((encoding == null) || (encoding.length() == 0))
return UTF8.String(rowinstance, offset + clstrt, length);
return new String(rowinstance, offset + clstrt, length, encoding);
} catch (final UnsupportedEncodingException e) {
return "";
}
return UTF8.String(rowinstance, offset + clstrt, length);
}
public final long getColLong(final int column) {

@ -132,7 +132,7 @@ public class RowCollection implements Iterable<Row.Entry>, Cloneable {
this.chunkcount = chunkcachelength / rowdef.objectsize; // patch problem
}
this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day;
final String sortOrderKey = exportedCollection.getColString(exp_order_type, null);
final String sortOrderKey = exportedCollection.getColString(exp_order_type);
ByteOrder oldOrder = null;
if ((sortOrderKey == null) || (sortOrderKey.equals("__"))) {
oldOrder = null;
@ -241,7 +241,7 @@ public class RowCollection implements Iterable<Row.Entry>, Cloneable {
entry.setCol(exp_chunkcount, this.chunkcount);
entry.setCol(exp_last_read, daysSince2000(System.currentTimeMillis()));
entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote));
entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? "__".getBytes() :this.rowdef.objectOrder.signature().getBytes());
entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? UTF8.getBytes("__") : UTF8.getBytes(this.rowdef.objectOrder.signature()));
entry.setCol(exp_order_bound, this.sortBound);
entry.setCol(exp_collection, this.chunkcache);
return entry.bytes();

@ -660,8 +660,9 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
public static byte[] randomHash(final long r0, final long r1) {
// a long can have 64 bit, but a 12-byte hash can have 6 * 12 = 72 bits
// so we construct a generic Hash using two long values
return (Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 11).substring(5) +
Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 11).substring(5)).getBytes();
return UTF8.getBytes(
Base64Order.enhancedCoder.encodeLongSB(Math.abs(r0), 11).substring(5) +
Base64Order.enhancedCoder.encodeLongSB(Math.abs(r1), 11).substring(5));
}
public static byte[] randomHash(final Random r) {
return randomHash(r.nextLong(), r.nextLong());

@ -32,6 +32,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.util.ByteBuffer;
@ -87,7 +88,7 @@ public abstract class AbstractWriter extends AbstractReader implements Writer {
public final void writeLine(final String line) throws IOException {
final byte[] b = new byte[line.length() + 2];
System.arraycopy(line.getBytes(), 0, b, 0, line.length());
System.arraycopy(UTF8.getBytes(line), 0, b, 0, line.length());
b[b.length - 2] = cr;
b[b.length - 1] = lf;
this.write(b);

@ -28,10 +28,11 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Properties;
import net.yacy.cora.document.UTF8;
public final class CharBuffer extends Writer {
public static final char singlequote = '\'';
@ -271,16 +272,8 @@ public final class CharBuffer extends Writer {
return tmp;
}
public byte[] getBytes(String charset) throws UnsupportedEncodingException {
return new String(buffer, offset, length).getBytes(charset);
}
public byte[] getBytes() {
try {
return getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return null;
}
return UTF8.getBytes(new String(buffer, offset, length));
}
public CharBuffer trim(final int start) {

@ -32,7 +32,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.ArrayBlockingQueue;
@ -44,6 +43,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.logging.Log;
@ -128,11 +128,7 @@ public class Digest {
fromPool = false;
}
byte[] keyBytes;
try {
keyBytes = key.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
keyBytes = key.getBytes();
}
keyBytes = UTF8.getBytes(key);
digest.update(keyBytes);
byte[] result = digest.digest();
digest.reset();

@ -29,6 +29,8 @@ package net.yacy.kelondro.order;
import java.util.Comparator;
import net.yacy.cora.document.UTF8;
public class StringOrder implements Comparator<String> {
public ByteOrder baseOrder;
@ -41,6 +43,6 @@ public class StringOrder implements Comparator<String> {
}
public final int compare(final String s1, final String s2) {
return baseOrder.compare(s1.getBytes(), s2.getBytes());
return baseOrder.compare(UTF8.getBytes(s1), UTF8.getBytes(s2));
}
}

@ -137,7 +137,7 @@ public class Relations {
}
public String putRelation(final String name, final String key, final String value) throws IOException, RowSpaceExceededException {
final byte[] r = putRelation(name, key.getBytes(), value.getBytes());
final byte[] r = putRelation(name, UTF8.getBytes(key), UTF8.getBytes(value));
if (r == null) return null;
return UTF8.String(r);
}
@ -156,7 +156,7 @@ public class Relations {
}
public String getRelation(final String name, final String key) throws IOException, RowSpaceExceededException {
final byte[] r = getRelation(name, key.getBytes());
final byte[] r = getRelation(name, UTF8.getBytes(key));
if (r == null) return null;
return UTF8.String(r);
}

@ -205,7 +205,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery);
sqlStatement.setString(1, row.getColString(0, null));
sqlStatement.setString(1, row.getColString(0));
sqlStatement.setBytes(2, row.bytes());
sqlStatement.execute();
@ -226,7 +226,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery);
sqlStatement.setString(1, row.getColString(0, null));
sqlStatement.setString(1, row.getColString(0));
sqlStatement.setBytes(2, row.bytes());
sqlStatement.execute();

@ -116,19 +116,19 @@ public class BDecoder {
return UTF8.String(this.b);
}
public void toStream(OutputStream os) throws IOException {
os.write(Integer.toString(this.b.length).getBytes());
os.write(UTF8.getBytes(Integer.toString(this.b.length)));
os.write(_p);
os.write(this.b);
}
public static void toStream(OutputStream os, byte[] b) throws IOException {
os.write(Integer.toString(b.length).getBytes());
os.write(UTF8.getBytes(Integer.toString(b.length)));
os.write(_p);
os.write(b);
}
public static void toStream(OutputStream os, String s) throws IOException {
os.write(Integer.toString(s.length()).getBytes());
os.write(UTF8.getBytes(Integer.toString(s.length())));
os.write(_p);
os.write(s.getBytes());
os.write(UTF8.getBytes(s));
}
}
@ -218,7 +218,7 @@ public class BDecoder {
}
public void toStream(OutputStream os) throws IOException {
os.write(_i);
os.write(Long.toString(this.i).getBytes());
os.write(UTF8.getBytes(Long.toString(this.i)));
os.write(_e);
}
}

@ -207,7 +207,7 @@ public final class ByteBuffer extends OutputStream {
try {
return append(s.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
return append(s.getBytes());
return append(UTF8.getBytes(s));
}
}
@ -400,14 +400,6 @@ public final class ByteBuffer extends OutputStream {
public String toString() {
return UTF8.String(buffer, offset, length);
}
public String toString(final String charsetName) {
try {
return new String(this.getBytes(),charsetName);
} catch (final UnsupportedEncodingException e) {
return UTF8.String(this.getBytes());
}
}
public String toString(final int left, final int length) {
return UTF8.String(buffer, offset + left, length);
@ -491,7 +483,10 @@ public final class ByteBuffer extends OutputStream {
}
public static boolean equals(final byte[] buffer, final byte[] pattern) {
return equals(buffer, 0, pattern);
// compares two byte arrays: true, if pattern appears completely at offset position
if (buffer.length < pattern.length) return false;
for (int i = 0; i < pattern.length; i++) if (buffer[i] != pattern[i]) return false;
return true;
}
public static boolean equals(final byte[] buffer, final int offset, final byte[] pattern) {

@ -33,6 +33,7 @@ import java.util.Vector;
import de.anomic.server.serverCore;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.logging.Log;
@ -149,7 +150,7 @@ public final class OS {
}
public static void deployScript(final File scriptFile, final String theScript) throws IOException {
FileUtils.copy(theScript.getBytes(), scriptFile);
FileUtils.copy(UTF8.getBytes(theScript), scriptFile);
if(!isWindows){ // set executable
try {
Runtime.getRuntime().exec("chmod 755 " + scriptFile.getAbsolutePath().replaceAll(" ", "\\ ")).waitFor();

@ -41,6 +41,7 @@ import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -114,7 +115,7 @@ public final class LoaderDispatcher {
final boolean global
) {
return new Request(
sb.peers.mySeed().hash.getBytes(),
UTF8.getBytes(sb.peers.mySeed().hash),
url,
null,
"",
@ -185,7 +186,7 @@ public final class LoaderDispatcher {
final String host = url.getHost();
// check if we have the page in the cache
final CrawlProfile crawlProfile = sb.crawler.getActive(request.profileHandle().getBytes());
final CrawlProfile crawlProfile = sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
if (crawlProfile != null && cacheStrategy != CrawlProfile.CacheStrategy.NOCACHE) {
// we have passed a first test if caching is allowed
// now see if there is a cache entry

Loading…
Cancel
Save