- fixed highslide usage

- some enhancement to index management, better types

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4497 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 2327451653
commit f4c73d8c68

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5 javacTarget=1.5
# Release Configuration # Release Configuration
releaseVersion=0.571 releaseVersion=0.572
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

Binary file not shown.

After

Width:  |  Height:  |  Size: 884 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 838 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 854 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 668 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 673 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 326 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 326 B

File diff suppressed because it is too large Load Diff

@ -51,7 +51,6 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIRowEntry; import de.anomic.index.indexRWIRowEntry;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern;
@ -149,7 +148,7 @@ public final class transferRWI {
int p; int p;
String wordHash; String wordHash;
String urlHash; String urlHash;
indexRWIEntry iEntry; indexRWIRowEntry iEntry;
final HashSet<String> unknownURL = new HashSet<String>(); final HashSet<String> unknownURL = new HashSet<String>();
final HashSet<String> knownURL = new HashSet<String>(); final HashSet<String> knownURL = new HashSet<String>();
String[] wordhashes = new String[v.size()]; String[] wordhashes = new String[v.size()];

@ -14,7 +14,7 @@ Released for free under a Creative Commons Attribution 2.5 License
<script type="text/javascript" src="/js/ajax.js"></script> <script type="text/javascript" src="/js/ajax.js"></script>
<script type="text/javascript" src="/js/xml.js"></script> <script type="text/javascript" src="/js/xml.js"></script>
<script type="text/javascript" src="/js/yacysearch.js"></script> <script type="text/javascript" src="/js/yacysearch.js"></script>
<script type="text/javascript" src="/js/highslide.js"></script> <script type="text/javascript" src="/js/highslide/highslide.js"></script>
<link rel="stylesheet" type="text/css" media="screen" href="/env/highslide.css" /> <link rel="stylesheet" type="text/css" media="screen" href="/env/highslide.css" />
<script type="text/javascript">hs.outlineType = 'rounded-white';</script> <script type="text/javascript">hs.outlineType = 'rounded-white';</script>
</head> </head>

@ -7,7 +7,7 @@
<script type="text/javascript" src="/js/ajax.js"></script> <script type="text/javascript" src="/js/ajax.js"></script>
<script type="text/javascript" src="/js/xml.js"></script> <script type="text/javascript" src="/js/xml.js"></script>
<script type="text/javascript" src="/js/yacysearch.js"></script> <script type="text/javascript" src="/js/yacysearch.js"></script>
<script type="text/javascript" src="/js/highslide.js"></script> <script type="text/javascript" src="/js/highslide/highslide.js"></script>
<link rel="stylesheet" type="text/css" media="screen" href="/env/highslide.css" /> <link rel="stylesheet" type="text/css" media="screen" href="/env/highslide.css" />
<script type="text/javascript">hs.outlineType = 'rounded-white';</script> <script type="text/javascript">hs.outlineType = 'rounded-white';</script>
</head> </head>

@ -73,7 +73,7 @@ public class indexContainer extends kelondroRowSet {
return wordHash; return wordHash;
} }
public void add(indexRWIEntry entry) { public void add(indexRWIRowEntry entry) {
// add without double-occurrence test // add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize; assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize;
this.addUnique(entry.toKelondroEntry()); this.addUnique(entry.toKelondroEntry());
@ -81,8 +81,12 @@ public class indexContainer extends kelondroRowSet {
public void add(indexRWIEntry entry, long updateTime) { public void add(indexRWIEntry entry, long updateTime) {
// add without double-occurrence test // add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize; if (entry instanceof indexRWIRowEntry) {
this.add(entry); assert ((indexRWIRowEntry) entry).toKelondroEntry().objectsize() == super.rowdef.objectsize;
this.add((indexRWIRowEntry) entry);
} else {
this.add(((indexRWIVarEntry) entry).toRowEntry());
}
this.lastTimeWrote = updateTime; this.lastTimeWrote = updateTime;
} }
@ -101,21 +105,21 @@ public class indexContainer extends kelondroRowSet {
return c; return c;
} }
public indexRWIEntry put(indexRWIEntry entry) { public indexRWIEntry put(indexRWIRowEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize; assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize;
kelondroRow.Entry r = super.put(entry.toKelondroEntry()); kelondroRow.Entry r = super.put(entry.toKelondroEntry());
if (r == null) return null; if (r == null) return null;
return new indexRWIRowEntry(r); return new indexRWIRowEntry(r);
} }
public boolean putRecent(indexRWIEntry entry) { public boolean putRecent(indexRWIRowEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize; assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize;
// returns true if the new entry was added, false if it already existed // returns true if the new entry was added, false if it already existed
kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry()); kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry());
if (oldEntryRow == null) { if (oldEntryRow == null) {
return true; return true;
} else { } else {
indexRWIEntry oldEntry = new indexRWIRowEntry(oldEntryRow); indexRWIRowEntry oldEntry = new indexRWIRowEntry(oldEntryRow);
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back this.put(oldEntry.toKelondroEntry()); // put it back
return false; return false;
@ -134,7 +138,7 @@ public class indexContainer extends kelondroRowSet {
Iterator<indexRWIRowEntry> i = c.entries(); Iterator<indexRWIRowEntry> i = c.entries();
while (i.hasNext()) { while (i.hasNext()) {
try { try {
if (putRecent((indexRWIEntry) i.next())) x++; if (putRecent(i.next())) x++;
} catch (ConcurrentModificationException e) { } catch (ConcurrentModificationException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -317,16 +321,17 @@ public class indexContainer extends kelondroRowSet {
assert (keylength == large.rowdef.width(0)); assert (keylength == large.rowdef.width(0));
indexContainer conj = new indexContainer(null, small.rowdef, 0); // start with empty search result indexContainer conj = new indexContainer(null, small.rowdef, 0); // start with empty search result
Iterator<indexRWIRowEntry> se = small.entries(); Iterator<indexRWIRowEntry> se = small.entries();
indexRWIEntry ie0, ie1; indexRWIVarEntry ie0;
indexRWIEntry ie1;
while (se.hasNext()) { while (se.hasNext()) {
ie0 = (indexRWIEntry) se.next(); ie0 = new indexRWIVarEntry(se.next());
ie1 = large.get(ie0.urlHash()); ie1 = large.get(ie0.urlHash());
if ((ie0 != null) && (ie1 != null)) { if ((ie0 != null) && (ie1 != null)) {
assert (ie0.urlHash().length() == keylength) : "ie0.urlHash() = " + ie0.urlHash(); assert (ie0.urlHash().length() == keylength) : "ie0.urlHash() = " + ie0.urlHash();
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash(); assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
// this is a hit. Calculate word distance: // this is a hit. Calculate word distance:
ie0.join(ie1); ie0.join(ie1);
if (ie0.worddistance() <= maxDistance) conj.add(ie0); if (ie0.worddistance() <= maxDistance) conj.add(ie0.toRowEntry());
} }
} }
return conj; return conj;
@ -344,10 +349,10 @@ public class indexContainer extends kelondroRowSet {
Iterator<indexRWIRowEntry> e2 = i2.entries(); Iterator<indexRWIRowEntry> e2 = i2.entries();
int c; int c;
if ((e1.hasNext()) && (e2.hasNext())) { if ((e1.hasNext()) && (e2.hasNext())) {
indexRWIEntry ie1; indexRWIVarEntry ie1;
indexRWIEntry ie2; indexRWIEntry ie2;
ie1 = (indexRWIEntry) e1.next(); ie1 = new indexRWIVarEntry(e1.next());
ie2 = (indexRWIEntry) e2.next(); ie2 = e2.next();
while (true) { while (true) {
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash(); assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
@ -355,15 +360,15 @@ public class indexContainer extends kelondroRowSet {
c = i1.rowdef.getOrdering().compare(ie1.urlHash().getBytes(), ie2.urlHash().getBytes()); c = i1.rowdef.getOrdering().compare(ie1.urlHash().getBytes(), ie2.urlHash().getBytes());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c); //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) { if (c < 0) {
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = new indexRWIVarEntry(e1.next()); else break;
} else if (c > 0) { } else if (c > 0) {
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1.join(ie2); ie1.join(ie2);
if (ie1.worddistance() <= maxDistance) conj.add(ie1); if (ie1.worddistance() <= maxDistance) conj.add(ie1.toRowEntry());
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = new indexRWIVarEntry(e1.next()); else break;
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} }
} }
} }
@ -419,10 +424,10 @@ public class indexContainer extends kelondroRowSet {
Iterator<indexRWIRowEntry> e2 = excl.entries(); Iterator<indexRWIRowEntry> e2 = excl.entries();
int c; int c;
if ((e1.hasNext()) && (e2.hasNext())) { if ((e1.hasNext()) && (e2.hasNext())) {
indexRWIEntry ie1; indexRWIVarEntry ie1;
indexRWIEntry ie2; indexRWIEntry ie2;
ie1 = (indexRWIEntry) e1.next(); ie1 = new indexRWIVarEntry(e1.next());
ie2 = (indexRWIEntry) e2.next(); ie2 = e2.next();
while (true) { while (true) {
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash(); assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
@ -430,15 +435,15 @@ public class indexContainer extends kelondroRowSet {
c = pivot.rowdef.getOrdering().compare(ie1.urlHash().getBytes(), ie2.urlHash().getBytes()); c = pivot.rowdef.getOrdering().compare(ie1.urlHash().getBytes(), ie2.urlHash().getBytes());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c); //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) { if (c < 0) {
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = new indexRWIVarEntry(e1.next()); else break;
} else if (c > 0) { } else if (c > 0) {
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1.join(ie2); ie1.join(ie2);
e1.remove(); e1.remove();
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = new indexRWIVarEntry(e1.next()); else break;
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} }
} }
} }

@ -129,7 +129,7 @@ public final class indexRAMRI implements indexRI {
String wordHash; String wordHash;
indexContainer container; indexContainer container;
long updateTime; long updateTime;
indexRWIEntry iEntry; indexRWIRowEntry iEntry;
kelondroRow.Entry row = dumpArray.row().newEntry(); kelondroRow.Entry row = dumpArray.row().newEntry();
byte[] occ, time; byte[] occ, time;
@ -207,7 +207,7 @@ public final class indexRAMRI implements indexRI {
Iterator<EntryIndex> i = dumpArray.contentRows(-1); Iterator<EntryIndex> i = dumpArray.contentRows(-1);
String wordHash; String wordHash;
//long creationTime; //long creationTime;
indexRWIEntry wordEntry; indexRWIRowEntry wordEntry;
kelondroRow.EntryIndex row; kelondroRow.EntryIndex row;
//Runtime rt = Runtime.getRuntime(); //Runtime rt = Runtime.getRuntime();
while (i.hasNext()) { while (i.hasNext()) {
@ -503,7 +503,7 @@ public final class indexRAMRI implements indexRI {
entries = null; entries = null;
} }
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { public synchronized void addEntry(String wordHash, indexRWIRowEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, 1); if (container == null) container = new indexContainer(wordHash, this.payloadrow, 1);
container.put(newEntry); container.put(newEntry);

@ -27,7 +27,6 @@
package de.anomic.index; package de.anomic.index;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroRow.Entry;
public interface indexRWIEntry { public interface indexRWIEntry {
@ -43,8 +42,6 @@ public interface indexRWIEntry {
public String toPropertyForm(); public String toPropertyForm();
public Entry toKelondroEntry();
public String urlHash(); public String urlHash();
public int virtualAge(); public int virtualAge();
@ -85,10 +82,6 @@ public interface indexRWIEntry {
public String toString(); public String toString();
public void join(indexRWIEntry oe);
public int worddistance();
public boolean isNewer(indexRWIEntry other); public boolean isNewer(indexRWIEntry other);
public boolean isOlder(indexRWIEntry other); public boolean isOlder(indexRWIEntry other);

@ -85,10 +85,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
private static final int col_posintext = 15; // t 2 first appearance of word in text private static final int col_posintext = 15; // t 2 first appearance of word in text
private static final int col_posinphrase = 16; // r 1 position of word in its phrase private static final int col_posinphrase = 16; // r 1 position of word in its phrase
private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears
private static final int col_worddistance = 18; // i 1 initial zero; may be used as reserve: is filled during search private static final int col_reserve1 = 18; // i 1 reserve1
private static final int col_reserve = 19; // k 1 reserve private static final int col_reserve2 = 19; // k 1 reserve2
public double termFrequency;
private kelondroRow.Entry entry; private kelondroRow.Entry entry;
@ -102,15 +100,13 @@ public final class indexRWIRowEntry implements indexRWIEntry {
int posintext, // position of word in all words int posintext, // position of word in all words
int posinphrase, // position of word in its phrase int posinphrase, // position of word in its phrase
int posofphrase, // number of the phrase where word appears int posofphrase, // number of the phrase where word appears
int worddistance, // word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search
long lastmodified, // last-modified time of the document where word appears long lastmodified, // last-modified time of the document where word appears
long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
String language, // (guessed) language of document String language, // (guessed) language of document
char doctype, // type of document char doctype, // type of document
int outlinksSame, // outlinks to same domain int outlinksSame, // outlinks to same domain
int outlinksOther, // outlinks to other domain int outlinksOther, // outlinks to other domain
kelondroBitfield flags, // attributes to the url and to the word according the url kelondroBitfield flags // attributes to the url and to the word according the url
double termFrequency
) { ) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash; assert (urlHash.length() == 12) : "urlhash = " + urlHash;
@ -136,9 +132,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
this.entry.setCol(col_posintext, posintext); this.entry.setCol(col_posintext, posintext);
this.entry.setCol(col_posinphrase, posinphrase); this.entry.setCol(col_posinphrase, posinphrase);
this.entry.setCol(col_posofphrase, posofphrase); this.entry.setCol(col_posofphrase, posofphrase);
this.entry.setCol(col_worddistance, worddistance); this.entry.setCol(col_reserve1, 0);
this.entry.setCol(col_reserve, 0); this.entry.setCol(col_reserve2, 0);
this.termFrequency = termFrequency;
} }
public indexRWIRowEntry(String urlHash, String code) { public indexRWIRowEntry(String urlHash, String code) {
@ -255,36 +250,13 @@ public final class indexRWIRowEntry implements indexRWIEntry {
} }
public double termFrequency() { public double termFrequency() {
if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1))); return (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
return this.termFrequency;
} }
public String toString() { public String toString() {
return toPropertyForm(); return toPropertyForm();
} }
public static indexRWIEntry join(indexRWIRowEntry ie1, indexRWIEntry ie2) {
// returns a modified entry of the first argument
// combine the distance
ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext()));
ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? Math.min(ie1.posinphrase(), ie2.posinphrase()) : 0 /*unknown*/);
ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
// combine term frequency
ie1.entry.setCol(col_wordsInText, ie1.wordsintext() + ie2.wordsintext());
return ie1;
}
public void join(indexRWIEntry oe) {
join(this, oe);
}
public int worddistance() {
return (int) this.entry.getColLong(col_worddistance);
}
public boolean isNewer(indexRWIEntry other) { public boolean isNewer(indexRWIEntry other) {
if (other == null) return true; if (other == null) return true;
if (this.lastModified() > other.lastModified()) return true; if (this.lastModified() > other.lastModified()) return true;

@ -27,7 +27,6 @@
package de.anomic.index; package de.anomic.index;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroRow.Entry;
public class indexRWIVarEntry implements indexRWIEntry { public class indexRWIVarEntry implements indexRWIEntry {
@ -58,13 +57,13 @@ public class indexRWIVarEntry implements indexRWIEntry {
this.urlcomps = e.urlcomps(); this.urlcomps = e.urlcomps();
this.urllength = e.urllength(); this.urllength = e.urllength();
this.virtualAge = e.virtualAge(); this.virtualAge = e.virtualAge();
this.worddistance = e.worddistance(); this.worddistance = 0;
this.wordsintext = e.wordsintext(); this.wordsintext = e.wordsintext();
this.wordsintitle = e.wordsintitle(); this.wordsintitle = e.wordsintitle();
this.termFrequency = e.termFrequency(); this.termFrequency = 0.0;
} }
public void join(indexRWIEntry oe) { public void join(indexRWIVarEntry oe) {
// combine the distance // combine the distance
this.worddistance = this.worddistance() + oe.worddistance() + Math.abs(this.posintext() - oe.posintext()); this.worddistance = this.worddistance() + oe.worddistance() + Math.abs(this.posintext() - oe.posintext());
this.posintext = Math.min(this.posintext(), oe.posintext()); this.posintext = Math.min(this.posintext(), oe.posintext());
@ -133,7 +132,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
return posofphrase; return posofphrase;
} }
private indexRWIRowEntry toRowEntry() { public indexRWIRowEntry toRowEntry() {
return new indexRWIRowEntry( return new indexRWIRowEntry(
urlHash, urlHash,
urllength, // byte-length of complete URL urllength, // byte-length of complete URL
@ -145,23 +144,16 @@ public class indexRWIVarEntry implements indexRWIEntry {
posintext, // position of word in all words posintext, // position of word in all words
posinphrase, // position of word in its phrase posinphrase, // position of word in its phrase
posofphrase, // number of the phrase where word appears posofphrase, // number of the phrase where word appears
worddistance, // word distance
lastModified, // last-modified time of the document where word appears lastModified, // last-modified time of the document where word appears
System.currentTimeMillis(), // update time; System.currentTimeMillis(), // update time;
language, // (guessed) language of document language, // (guessed) language of document
type, // type of document type, // type of document
llocal, // outlinks to same domain llocal, // outlinks to same domain
lother, // outlinks to other domain lother, // outlinks to other domain
flags, // attributes to the url and to the word according the url flags // attributes to the url and to the word according the url
termFrequency
); );
} }
public Entry toKelondroEntry() {
assert false; // should not be used
return null;
}
public String toPropertyForm() { public String toPropertyForm() {
return toRowEntry().toPropertyForm(); return toRowEntry().toPropertyForm();
} }
@ -199,7 +191,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
return this.termFrequency; return this.termFrequency;
} }
public static final void min(indexRWIVarEntry t, indexRWIEntry other) { public static final void min(indexRWIVarEntry t, indexRWIVarEntry other) {
int v; int v;
long w; long w;
double d; double d;
@ -221,7 +213,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
if (t.termFrequency > (d = other.termFrequency())) t.termFrequency = d; if (t.termFrequency > (d = other.termFrequency())) t.termFrequency = d;
} }
public static final void max(indexRWIVarEntry t, indexRWIEntry other) { public static final void max(indexRWIVarEntry t, indexRWIVarEntry other) {
int v; int v;
long w; long w;
double d; double d;
@ -243,4 +235,22 @@ public class indexRWIVarEntry implements indexRWIEntry {
if (t.termFrequency < (d = other.termFrequency())) t.termFrequency = d; if (t.termFrequency < (d = other.termFrequency())) t.termFrequency = d;
} }
public static void join(indexRWIVarEntry ie1, indexRWIEntry ie2) {
// returns a modified entry of the first argument
// combine the distance
ie1.worddistance = ie1.worddistance + ((ie2 instanceof indexRWIVarEntry) ? ((indexRWIVarEntry) ie2).worddistance() : 0) + Math.abs(ie1.posintext() - ie2.posintext());
ie1.posintext = Math.min(ie1.posintext(), ie2.posintext());
ie1.posinphrase = (ie1.posofphrase() == ie2.posofphrase()) ? Math.min(ie1.posinphrase(), ie2.posinphrase()) : 0;
ie1.posofphrase = Math.min(ie1.posofphrase(), ie2.posofphrase());
// combine term frequency
ie1.termFrequency = ie1.termFrequency + ie2.termFrequency();
ie1.wordsintext = ie1.wordsintext() + ie2.wordsintext();
}
public void join(indexRWIEntry oe) {
join(this, oe);
}
} }

@ -158,7 +158,7 @@ public class plasmaSearchAPI {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_llocal", entry.word().llocal()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_llocal", entry.word().llocal());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_lother", entry.word().lother()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_lother", entry.word().lother());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_hitcount", entry.word().hitcount()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_hitcount", entry.word().hitcount());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_worddistance", entry.word().worddistance()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_worddistance", 0);
prop.putNum("genUrlList_urlList_"+i+"_urlExists_pos", entry.word().posintext()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_pos", entry.word().posintext());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrase", entry.word().posofphrase()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrase", entry.word().posofphrase());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_posinphrase", entry.word().posinphrase()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_posinphrase", entry.word().posinphrase());

@ -124,7 +124,6 @@ import de.anomic.http.httpc;
import de.anomic.http.httpd; import de.anomic.http.httpd;
import de.anomic.http.httpdRobotsTxtConfig; import de.anomic.http.httpdRobotsTxtConfig;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIRowEntry; import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
@ -2313,7 +2312,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String word = wentry.getKey(); String word = wentry.getKey();
wordStat = wentry.getValue(); wordStat = wentry.getValue();
String wordHash = plasmaCondenser.word2hash(word); String wordHash = plasmaCondenser.word2hash(word);
indexRWIEntry wordIdxEntry = new indexRWIRowEntry( indexRWIRowEntry wordIdxEntry = new indexRWIRowEntry(
urlHash, urlHash,
urlLength, urlComps, urlLength, urlComps,
wordStat.count, wordStat.count,
@ -2323,15 +2322,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
wordStat.posInText, wordStat.posInText,
wordStat.posInPhrase, wordStat.posInPhrase,
wordStat.numOfPhrase, wordStat.numOfPhrase,
0,
docDate.getTime(), docDate.getTime(),
System.currentTimeMillis(), System.currentTimeMillis(),
language, language,
doctype, doctype,
ioLinks[0].intValue(), ioLinks[0].intValue(),
ioLinks[1].intValue(), ioLinks[1].intValue(),
condenser.RESULT_FLAGS, condenser.RESULT_FLAGS
0.0
); );
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1); indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
wordIdxContainer.add(wordIdxEntry); wordIdxContainer.add(wordIdxEntry);

@ -42,7 +42,6 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder; import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexRAMRI; import de.anomic.index.indexRAMRI;
import de.anomic.index.indexRI; import de.anomic.index.indexRI;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIRowEntry; import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
@ -174,7 +173,7 @@ public final class plasmaWordIndex implements indexRI {
return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount); return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount);
} }
public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) { public void addEntry(String wordHash, indexRWIRowEntry entry, long updateTime, boolean dhtInCase) {
// set dhtInCase depending on wordHash // set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true; if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;
@ -298,7 +297,7 @@ public final class plasmaWordIndex implements indexRI {
Iterator<Map.Entry<String, plasmaCondenser.wordStatProp>> i = condenser.words().entrySet().iterator(); Iterator<Map.Entry<String, plasmaCondenser.wordStatProp>> i = condenser.words().entrySet().iterator();
Map.Entry<String, plasmaCondenser.wordStatProp> wentry; Map.Entry<String, plasmaCondenser.wordStatProp> wentry;
String word; String word;
indexRWIEntry ientry; indexRWIRowEntry ientry;
plasmaCondenser.wordStatProp wprop; plasmaCondenser.wordStatProp wprop;
while (i.hasNext()) { while (i.hasNext()) {
wentry = i.next(); wentry = i.next();
@ -313,14 +312,12 @@ public final class plasmaWordIndex implements indexRI {
wprop.posInText, wprop.posInText,
wprop.posInPhrase, wprop.posInPhrase,
wprop.numOfPhrase, wprop.numOfPhrase,
0,
urlModified.getTime(), urlModified.getTime(),
System.currentTimeMillis(), System.currentTimeMillis(),
language, language,
doctype, doctype,
outlinksSame, outlinksOther, outlinksSame, outlinksOther,
wprop.flags, wprop.flags);
0.0);
addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false); addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false);
wordCount++; wordCount++;
} }

Loading…
Cancel
Save