fixed position storage problem.

Now the word position is properly stored.
No use of that now, but can be used for better ranking.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1378 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 0371494010
commit 90b940e90e

@ -138,7 +138,7 @@ public final class plasmaWordIndexAssortment {
for (int i = 0; i < assortmentLength; i++) {
entry = (plasmaWordIndexEntry) entries.next();
row[3 + 2 * i] = entry.getUrlHash().getBytes();
row[4 + 2 * i] = entry.toEncodedForm(1).getBytes();
row[4 + 2 * i] = entry.toEncodedForm().getBytes();
}
byte[][] oldrow = null;
try {

@ -153,7 +153,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
row[1] = kelondroRecords.long2bytes(container.size(), 4);
row[2] = kelondroRecords.long2bytes(updateTime, 8);
row[3] = wordEntry.getUrlHash().getBytes();
row[4] = wordEntry.toEncodedForm(1).getBytes();
row[4] = wordEntry.toEncodedForm().getBytes();
dumpArray.set((int) urlcount++, row);
}
}

@ -95,10 +95,10 @@ public final class plasmaWordIndexEntity {
kt = new kelondroTree(theLocation, cacheSize);
} catch (IOException e) {
theLocation.delete();
kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort, false);
kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceLong, false);
} else {
// create new index file
kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort, false);
kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceLong, false);
}
return kt; // everyone who get this should close it when finished!
}
@ -167,11 +167,11 @@ public final class plasmaWordIndexEntity {
public boolean addEntry(plasmaWordIndexEntry entry) throws IOException {
if (entry == null) return false;
if (theTmpMap == null) {
return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(0).getBytes()) == null);
} else {
return (theTmpMap.put(entry.getUrlHash(), entry) == null);
}
if (theTmpMap == null) {
return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm().getBytes()) == null);
} else {
return (theTmpMap.put(entry.getUrlHash(), entry) == null);
}
}
public int addEntries(plasmaWordIndexEntryContainer container) throws IOException {

@ -67,7 +67,7 @@ public final class plasmaWordIndexEntry {
public static final int urlHashLength = yacySeedDB.commonHashLength; // 12
// the size of the index entry attributes
public static final int attrSpaceShort = 12;
//public static final int attrSpaceShort = 12;
public static final int attrSpaceLong = 18;
// the associated hash
@ -257,10 +257,10 @@ public final class plasmaWordIndexEntry {
this.localflag = pr.getProperty("f", ""+LT_LOCAL).charAt(0);
}
public String toEncodedForm(int outputFormat) {
// attention: this integrates NOT the URL into the encoding
public String toEncodedForm() {
// attention: this integrates NOT the URL hash into the encoding
// if you need a complete dump, use toExternalForm()
StringBuffer buf = new StringBuffer((outputFormat >= 1) ? 18 : 12);
StringBuffer buf = new StringBuffer(attrSpaceLong);
buf.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.quality, plasmaURL.urlQualityLength))
.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(plasmaWordIndex.microDateDays(this.lastModified), 3))
@ -269,7 +269,7 @@ public final class plasmaWordIndexEntry {
.append(this.doctype)
.append(this.localflag); // 3 + 3 + 2 + 2 + 1 + 1 = 12 bytes
if (outputFormat >= 1)
buf.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posintext, 2))
.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posinphrase, 2))
.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posofphrase, 2));

Loading…
Cancel
Save