avoid string conversion

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7584 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 694fa3a2a5
commit dc0db3550e

@ -102,7 +102,7 @@ public class yacydoc {
prop.putXML("dc_date", entry.moddate().toString()); prop.putXML("dc_date", entry.moddate().toString());
prop.putXML("dc_type", String.valueOf(entry.doctype())); prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", metadata.url().toNormalform(false, true)); prop.putXML("dc_identifier", metadata.url().toNormalform(false, true));
prop.putXML("dc_language", entry.language()); prop.putXML("dc_language", UTF8.String(entry.language()));
prop.put("yacy_urlhash", metadata.url().hash()); prop.put("yacy_urlhash", metadata.url().hash());
prop.putXML("yacy_loaddate", entry.loaddate().toString()); prop.putXML("yacy_loaddate", entry.loaddate().toString());

@ -221,7 +221,7 @@ public final class ResultURLs {
public static void main(final String[] args) { public static void main(final String[] args) {
try { try {
final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/"); final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/");
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de", 0, 0, 0, 0, 0, 0); final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0);
EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING; EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING;
System.out.println("valid test:\n======="); System.out.println("valid test:\n=======");
// add // add

@ -429,7 +429,7 @@ public class YMarkTables {
metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet())); metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount())); metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype())); metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, urlEntry.language()); metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));
final URIMetadataRow.Components meta = urlEntry.metadata(); final URIMetadataRow.Components meta = urlEntry.metadata();
if (meta != null) { if (meta != null) {

@ -218,13 +218,13 @@ public class Segment {
Map.Entry<String, Word> wentry; Map.Entry<String, Word> wentry;
String word; String word;
int len = (document == null) ? urlLength : document.dc_title().length(); int len = (document == null) ? urlLength : document.dc_title().length();
WordReferenceRow ientry = new WordReferenceRow(UTF8.String(url.hash()), WordReferenceRow ientry = new WordReferenceRow(url.hash(),
urlLength, urlComps, len, urlLength, urlComps, len,
condenser.RESULT_NUMB_WORDS, condenser.RESULT_NUMB_WORDS,
condenser.RESULT_NUMB_SENTENCES, condenser.RESULT_NUMB_SENTENCES,
urlModified.getTime(), urlModified.getTime(),
System.currentTimeMillis(), System.currentTimeMillis(),
language, UTF8.getBytes(language),
doctype, doctype,
outlinksSame, outlinksOther); outlinksSame, outlinksOther);
Word wprop; Word wprop;
@ -247,10 +247,10 @@ public class Segment {
try { try {
container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1); container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
container.add(ientry); container.add(ientry);
rankingProcess.add(container, true, sourceName, -1);
} catch (RowSpaceExceededException e) { } catch (RowSpaceExceededException e) {
continue; continue;
} }
rankingProcess.add(container, true, sourceName, -1);
} }
} }
if (rankingProcess != null) rankingProcess.oneFeederTerminated(); if (rankingProcess != null) rankingProcess.oneFeederTerminated();
@ -339,7 +339,7 @@ public class Segment {
condenser.RESULT_NUMB_WORDS, // word count condenser.RESULT_NUMB_WORDS, // word count
Response.docType(document.dc_format()), // doctype Response.docType(document.dc_format()), // doctype
condenser.RESULT_FLAGS, // flags condenser.RESULT_FLAGS, // flags
language, // language UTF8.getBytes(language), // language
document.inboundLinks(), // inbound links document.inboundLinks(), // inbound links
document.outboundLinks(), // outbound links document.outboundLinks(), // outbound links
document.getAudiolinks().size(), // laudio document.getAudiolinks().size(), // laudio

@ -73,7 +73,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
private final Row.Entry entry; private final Row.Entry entry;
public CitationReferenceRow( public CitationReferenceRow(
final String urlHash, final byte[] urlHash,
final long lastmodified, // last-modified time of the document where word appears final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time final long updatetime, // update time
final int posintext, // occurrence of url; counts the url final int posintext, // occurrence of url; counts the url
@ -83,7 +83,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
final int urlComps, // number of path components final int urlComps, // number of path components
final byte typeofurl // outlinks to same domain final byte typeofurl // outlinks to same domain
) { ) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash; assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = citationRow.newEntry(); this.entry = citationRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified); final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime); final int mddct = MicroDate.microDateDays(updatetime);

@ -147,7 +147,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
this.entry.setCol(col_reserve2, 0); this.entry.setCol(col_reserve2, 0);
} }
public ImageReferenceRow(final String urlHash, public ImageReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components final int urlComps, // number of path components
final int titleLength, // length of description/length (longer are better?) final int titleLength, // length of description/length (longer are better?)
@ -160,7 +160,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
final int outlinksSame, // outlinks to same domain final int outlinksSame, // outlinks to same domain
final int outlinksOther // outlinks to other domain final int outlinksOther // outlinks to other domain
) { ) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash; assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = urlEntryRow.newEntry(); this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified); final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime); final int mddct = MicroDate.microDateDays(updatetime);

@ -54,7 +54,7 @@ public interface URIMetadata {
public char doctype(); public char doctype();
public String language(); public byte[] language();
public int size(); public int size();

@ -68,7 +68,7 @@ public class URIMetadataRow implements URIMetadata {
"Cardinal wc-3 {b256}, " + // size of file by number of words; for video and audio: seconds "Cardinal wc-3 {b256}, " + // size of file by number of words; for video and audio: seconds
"byte[] dt-1, " + // doctype, taken from extension or any other heuristic "byte[] dt-1, " + // doctype, taken from extension or any other heuristic
"Bitfield flags-4, " + // flags; any stuff (see Word-Entity definition) "Bitfield flags-4, " + // flags; any stuff (see Word-Entity definition)
"String lang-2, " + // language "byte[] lang-2, " + // language
"Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width "Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width
"Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height "Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height
"Cardinal limage-2 {b256}, " + // # of embedded image links "Cardinal limage-2 {b256}, " + // # of embedded image links
@ -130,7 +130,7 @@ public class URIMetadataRow implements URIMetadata {
final int wc, final int wc,
final char dt, final char dt,
final Bitfield flags, final Bitfield flags,
final String lang, final byte[] lang,
final int llocal, final int llocal,
final int lother, final int lother,
final int laudio, final int laudio,
@ -150,7 +150,7 @@ public class URIMetadataRow implements URIMetadata {
this.entry.setCol(col_wc, wc); this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt}); this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes()); this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, UTF8.getBytes(lang)); this.entry.setCol(col_lang, lang);
this.entry.setCol(col_llocal, llocal); this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother); this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage); this.entry.setCol(col_limage, limage);
@ -409,8 +409,8 @@ public class URIMetadataRow implements URIMetadata {
return (char) entry.getColByte(col_dt); return (char) entry.getColByte(col_dt);
} }
public String language() { public byte[] language() {
return this.entry.getColString(col_lang); return this.entry.getColBytes(col_lang, true);
} }
public int size() { public int size() {

@ -162,7 +162,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_reserve2, 0); this.entry.setCol(col_reserve2, 0);
} }
public WordReferenceRow(final String urlHash, public WordReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components final int urlComps, // number of path components
final int titleLength, // length of description/length (longer are better?) final int titleLength, // length of description/length (longer are better?)
@ -170,12 +170,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
final int phrasecount, // total number of phrases final int phrasecount, // total number of phrases
final long lastmodified, // last-modified time of the document where word appears final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
final String language, // (guessed) language of document final byte[] language, // (guessed) language of document
final char doctype, // type of document final char doctype, // type of document
final int outlinksSame, // outlinks to same domain final int outlinksSame, // outlinks to same domain
final int outlinksOther // outlinks to other domain final int outlinksOther // outlinks to other domain
) { ) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash; assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = urlEntryRow.newEntry(); this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified); final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime); final int mddct = MicroDate.microDateDays(updatetime);
@ -186,7 +186,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_wordsInText, wordcount); this.entry.setCol(col_wordsInText, wordcount);
this.entry.setCol(col_phrasesInText, phrasecount); this.entry.setCol(col_phrasesInText, phrasecount);
this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language); this.entry.setCol(col_language, ((language == null) || (language.length != urlEntryRow.width(col_language))) ? WordReferenceVars.default_language : language);
this.entry.setCol(col_llocal, outlinksSame); this.entry.setCol(col_llocal, outlinksSame);
this.entry.setCol(col_lother, outlinksOther); this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength); this.entry.setCol(col_urlLength, urlLength);
@ -204,11 +204,6 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_posofphrase, word.numOfPhrase); this.entry.setCol(col_posofphrase, word.numOfPhrase);
} }
public WordReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code)));
}
public WordReferenceRow(final String external) { public WordReferenceRow(final String external) {
this.entry = urlEntryRow.newEntry(external, true); this.entry = urlEntryRow.newEntry(external, true);
} }

@ -478,10 +478,8 @@ public final class Row {
} }
public final String getColString(final int column) { public final String getColString(final int column) {
return getColString(colstart[column], row[column].cellwidth); final int clstrt = colstart[column];
} int length = row[column].cellwidth;
private final String getColString(final int clstrt, int length) {
if (rowinstance[offset + clstrt] == 0) return null; if (rowinstance[offset + clstrt] == 0) return null;
assert length <= rowinstance.length - offset - clstrt; assert length <= rowinstance.length - offset - clstrt;
if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt; if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt;

Loading…
Cancel
Save