diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 714166efb..63013c174 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -59,12 +59,12 @@ import java.util.TreeMap; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpHeader; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaWordIndex; -import de.anomic.index.indexURLEntry; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyClient; @@ -154,7 +154,7 @@ public class IndexControl_p { int i = 0; urlx = new String[index.size()]; while (en.hasNext()) { - urlx[i++] = ((indexURLEntry) en.next()).urlHash(); + urlx[i++] = ((indexEntry) en.next()).urlHash(); } index = null; } @@ -257,20 +257,20 @@ public class IndexControl_p { Iterator urlIter = index.entries(); HashMap knownURLs = new HashMap(); HashSet unknownURLEntries = new HashSet(); - indexURLEntry indexEntry; + indexEntry iEntry; plasmaCrawlLURL.Entry lurl; while (urlIter.hasNext()) { - indexEntry = (indexURLEntry) urlIter.next(); + iEntry = (indexEntry) urlIter.next(); try { - lurl = switchboard.urlPool.loadedURL.getEntry(indexEntry.urlHash(), null); + lurl = switchboard.urlPool.loadedURL.getEntry(iEntry.urlHash(), null); if (lurl.toString() == null) { - unknownURLEntries.add(indexEntry.urlHash()); + unknownURLEntries.add(iEntry.urlHash()); urlIter.remove(); } else { - knownURLs.put(indexEntry.urlHash(), lurl); + knownURLs.put(iEntry.urlHash(), lurl); } } catch (IOException e) { - unknownURLEntries.add(indexEntry.urlHash()); + unknownURLEntries.add(iEntry.urlHash()); } } // use whats remaining @@ -439,9 +439,9 @@ public class IndexControl_p { int i = 0; final TreeMap tm = new TreeMap(); - indexURLEntry xi; + indexEntry xi; while (en.hasNext()) { - xi = (indexURLEntry) en.next(); + xi = (indexEntry) en.next(); uh = new String[]{xi.urlHash(), Integer.toString(xi.posintext())}; try { us = switchboard.urlPool.loadedURL.getEntry(uh[0], null).url().toString(); diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 1b3840462..126628761 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -51,8 +51,9 @@ import java.util.Iterator; import java.util.LinkedList; import de.anomic.http.httpHeader; +import de.anomic.index.indexEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.index.indexURLEntry; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -132,7 +133,7 @@ public final class transferRWI { int p; String wordHash; String urlHash; - indexURLEntry entry; + indexEntry iEntry; int wordhashesSize = v.size(); final HashSet unknownURL = new HashSet(); String[] wordhashes = new String[v.size()]; @@ -145,11 +146,11 @@ public final class transferRWI { if (p > 0) { wordHash = estring.substring(0, p); wordhashes[received] = wordHash; - entry = new indexURLEntry(estring.substring(p)); - sb.wordIndex.addEntry(wordHash, entry, System.currentTimeMillis(), true); + iEntry = new indexURLEntryNew(estring.substring(p)); + sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true); serverCore.checkInterruption(); - urlHash = entry.urlHash(); + urlHash = iEntry.urlHash(); try { if ((!(unknownURL.contains(urlHash))) && (!(sb.urlPool.loadedURL.exists(urlHash)))) { diff --git a/source/de/anomic/index/indexAbstractEntry.java b/source/de/anomic/index/indexAbstractEntry.java deleted file mode 100644 index 0fd747897..000000000 --- a/source/de/anomic/index/indexAbstractEntry.java +++ /dev/null @@ -1,153 +0,0 @@ -// indexbstractEntry.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 20.05.2006 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.index; - -import de.anomic.plasma.plasmaWordIndex; - -public abstract class indexAbstractEntry implements indexEntry { - - // the associated hash - protected String urlHash; - - // discrete values - protected int hitcount; // number of this words in file - protected int wordcount; // number of all words in the file - protected int phrasecount; // number of all phrases in the file - protected int posintext; // first position of the word in text as number of word; 0=unknown or irrelevant position - protected int posinphrase; // position within a phrase of the word - protected int posofphrase; // position of the phrase in the text as count of sentences; 0=unknown; 1=path; 2=keywords; 3=headline; >4: in text - protected int worddistance;// distance between the words, only used if the index is artificial (from a conjunction) - protected long lastModified;// calculated by using last-modified - protected int quality; // result of a heuristic on the source file - protected byte[] language; // essentially the country code (the TLD as heuristic), two letters lowercase only - protected char doctype; // type of source - protected char localflag; // indicates if the index was created locally - - public abstract Object clone(); - - public abstract String toEncodedStringForm(); - - public abstract byte[] toEncodedByteArrayForm(); - - public abstract String toPropertyForm(); - - public void combineDistance(indexEntry oe) { - this.worddistance = this.worddistance + ((indexAbstractEntry) oe).worddistance + Math.abs(this.posintext - ((indexAbstractEntry) oe).posintext); - this.posintext = Math.min(this.posintext, ((indexAbstractEntry) oe).posintext); - if (this.posofphrase != ((indexAbstractEntry) oe).posofphrase) this.posinphrase = 0; // (unknown) - this.posofphrase = Math.min(this.posofphrase, ((indexAbstractEntry) oe).posofphrase); - this.wordcount = (this.wordcount + ((indexAbstractEntry) oe).wordcount) / 2; - } - - public void min(indexEntry other) { - if (this.hitcount > ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount; - if (this.wordcount > ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount; - if (this.phrasecount > ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount; - if (this.posintext > ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext; - if (this.posinphrase > ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase; - if (this.posofphrase > ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase; - if (this.worddistance > ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance; - if (this.lastModified > ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified; - if (this.quality > ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality; - } - - public void max(indexEntry other) { - if (this.hitcount < ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount; - if (this.wordcount < ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount; - if (this.phrasecount < ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount; - if (this.posintext < ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext; - if (this.posinphrase < ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase; - if (this.posofphrase < ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase; - if (this.worddistance < ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance; - if (this.lastModified < ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified; - if (this.quality < ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality; - } - - public void normalize(indexEntry mi, indexEntry ma) { - indexAbstractEntry min = (indexAbstractEntry) mi; - indexAbstractEntry max = (indexAbstractEntry) ma; - this.hitcount = (this.hitcount == 0) ? 0 : 1 + 255 * (this.hitcount - min.hitcount ) / (1 + max.hitcount - min.hitcount); - this.wordcount = (this.wordcount == 0) ? 0 : 1 + 255 * (this.wordcount - min.wordcount ) / (1 + max.wordcount - min.wordcount); - this.phrasecount = (this.phrasecount == 0) ? 0 : 1 + 255 * (this.phrasecount - min.phrasecount ) / (1 + max.phrasecount - min.phrasecount); - this.posintext = (this.posintext == 0) ? 0 : 1 + 255 * (this.posintext - min.posintext ) / (1 + max.posintext - min.posintext); - this.posinphrase = (this.posinphrase == 0) ? 0 : 1 + 255 * (this.posinphrase - min.posinphrase ) / (1 + max.posinphrase - min.posinphrase); - this.posofphrase = (this.posofphrase == 0) ? 0 : 1 + 255 * (this.posofphrase - min.posofphrase ) / (1 + max.posofphrase - min.posofphrase); - this.worddistance = (this.worddistance == 0) ? 0 : 1 + 255 * (this.worddistance - min.worddistance) / (1 + max.worddistance - min.worddistance); - this.lastModified = (this.lastModified == 0) ? 0 : 1 + 255 * (this.lastModified - min.lastModified) / (1 + max.lastModified - min.lastModified); - this.quality = (this.quality == 0) ? 0 : 1 + 255 * (this.quality - min.quality ) / (1 + max.quality - min.quality); - } - - public indexEntry generateNormalized(indexEntry min, indexEntry max) { - indexEntry e = (indexAbstractEntry) this.clone(); - e.normalize(min, max); - return e; - } - - public String urlHash() { return urlHash; } - public int quality() { return quality; } - public int virtualAge() { return plasmaWordIndex.microDateDays(lastModified); } - public long lastModified() { return lastModified; } - public int hitcount() { return hitcount; } - public int posintext() { return posintext; } - public int posinphrase() { return posinphrase; } - public int posofphrase() { return posofphrase; } - public int worddistance() { return worddistance; } - public int wordcount() { return wordcount; } - public int phrasecount() { return phrasecount; } - public String getLanguage() { return new String(language); } - public char getType() { return doctype; } - public boolean isLocal() { return localflag == indexEntryAttribute.LT_LOCAL; } - - public boolean isNewer(indexEntry other) { - if (other == null) return true; - if (this.lastModified > ((indexAbstractEntry) other).lastModified) return true; - if (this.lastModified == ((indexAbstractEntry) other).lastModified()) { - if (this.quality > ((indexAbstractEntry) other).quality) return true; - } - return false; - } - - public boolean isOlder(indexEntry other) { - if (other == null) return false; - if (this.lastModified < ((indexAbstractEntry) other).lastModified()) return true; - if (this.lastModified == ((indexAbstractEntry) other).lastModified()) { - if (this.quality < ((indexAbstractEntry) other).quality) return true; - } - return false; - } - - public int domlengthNormalized() { - return 255 * indexURL.domLengthEstimation(this.urlHash) / 30; - } - - public static void main(String[] args) { - // outputs the word hash to a given word - if (args.length != 1) System.exit(0); - System.out.println("WORDHASH: " + indexEntryAttribute.word2hash(args[0])); - } - -} diff --git a/source/de/anomic/index/indexEntry.java b/source/de/anomic/index/indexEntry.java index 72e2c343a..04272abe8 100644 --- a/source/de/anomic/index/indexEntry.java +++ b/source/de/anomic/index/indexEntry.java @@ -31,8 +31,7 @@ import de.anomic.kelondro.kelondroRow; public interface indexEntry { public Object clone(); - public String toEncodedStringForm(); - public byte[] toEncodedByteArrayForm(); // shall be replaced by toKelondroEntry() + public byte[] toEncodedByteArrayForm(boolean includeUrlHash); // shall be replaced by toKelondroEntry() public String toPropertyForm(); public kelondroRow.Entry toKelondroEntry(); diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index e1f3aa188..8f5532566 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -103,7 +103,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { String wordHash; indexTreeMapContainer container; long updateTime; - indexURLEntry wordEntry; + indexEntry iEntry; kelondroRow.Entry row = dumpArray.row().newEntry(); // write kCache, this will be melted with the wCache upon load @@ -116,12 +116,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { if (container != null) { Iterator ci = container.entries(); while (ci.hasNext()) { - wordEntry = (indexURLEntry) ci.next(); + iEntry = (indexEntry) ci.next(); row.setCol(0, container.getWordHash().getBytes()); row.setCol(1, kelondroNaturalOrder.encodeLong(container.size(), 4)); row.setCol(2, kelondroNaturalOrder.encodeLong(container.updated(), 8)); - row.setCol(3, wordEntry.urlHash().getBytes()); - row.setCol(4, wordEntry.toEncodedStringForm().getBytes()); + row.setCol(3, iEntry.urlHash().getBytes()); + row.setCol(4, iEntry.toEncodedByteArrayForm(false)); dumpArray.set((int) urlcount++, row); } } @@ -145,12 +145,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { if (container != null) { Iterator ci = container.entries(); while (ci.hasNext()) { - wordEntry = (indexURLEntry) ci.next(); + iEntry = (indexEntry) ci.next(); row.setCol(0, wordHash.getBytes()); row.setCol(1, kelondroNaturalOrder.encodeLong(container.size(), 4)); row.setCol(2, kelondroNaturalOrder.encodeLong(updateTime, 8)); - row.setCol(3, wordEntry.urlHash().getBytes()); - row.setCol(4, wordEntry.toEncodedStringForm().getBytes()); + row.setCol(3, iEntry.urlHash().getBytes()); + row.setCol(4, iEntry.toEncodedByteArrayForm(false)); dumpArray.set((int) urlcount++, row); } } @@ -184,7 +184,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { int i = dumpArray.size(); String wordHash; //long creationTime; - indexURLEntry wordEntry; + indexEntry wordEntry; kelondroRow.Entry row; //Runtime rt = Runtime.getRuntime(); while (i-- > 0) { @@ -193,7 +193,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { if ((row == null) || (row.empty(0)) || (row.empty(3)) || (row.empty(4))) continue; wordHash = row.getColString(0, "UTF-8"); //creationTime = kelondroRecords.bytes2long(row[2]); - wordEntry = new indexURLEntry(row.getColString(3, "UTF-8"), row.getColString(4, "UTF-8")); + wordEntry = new indexURLEntryNew(row.getColString(3, null), row.getColString(4, null)); // store to cache addEntry(wordHash, wordEntry, startTime, false); urlCount++; diff --git a/source/de/anomic/index/indexRowSetContainer.java b/source/de/anomic/index/indexRowSetContainer.java index af180822e..eb1eba4a7 100644 --- a/source/de/anomic/index/indexRowSetContainer.java +++ b/source/de/anomic/index/indexRowSetContainer.java @@ -27,6 +27,7 @@ package de.anomic.index; import java.lang.reflect.Method; +import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.Set; @@ -78,12 +79,34 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain } public int add(indexContainer c, long maxTime) { - // TODO Auto-generated method stub - return 0; + // returns the number of new elements + long startTime = System.currentTimeMillis(); + if (c == null) return 0; + int x = 0; + synchronized (c) { + Iterator i = c.entries(); + while ((i.hasNext()) && ((maxTime < 0) || ((startTime + maxTime) > System.currentTimeMillis()))) { + try { + if (addi((indexEntry) i.next())) x++; + } catch (ConcurrentModificationException e) {} + } + } + this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); + return x; + } + + private boolean addi(indexEntry entry) { + // returns true if the new entry was added, false if it already existed + indexEntry oldEntry = new indexURLEntryNew(this.put(entry.toKelondroEntry())); // FIXME: see if cloning is necessary + if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this container + this.put(oldEntry.toKelondroEntry()); // put it back + return false; + } + return (oldEntry == null); } public boolean contains(String urlHash) { - // TODO Auto-generated method stub +// TODO Auto-generated method stub return false; } diff --git a/source/de/anomic/index/indexTreeMapContainer.java b/source/de/anomic/index/indexTreeMapContainer.java index d52adb046..f69c33fec 100644 --- a/source/de/anomic/index/indexTreeMapContainer.java +++ b/source/de/anomic/index/indexTreeMapContainer.java @@ -130,7 +130,7 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen Iterator i = c.entries(); while ((i.hasNext()) && ((maxTime < 0) || ((startTime + maxTime) > System.currentTimeMillis()))) { try { - if (addi((indexURLEntry) i.next())) x++; + if (addi((indexEntry) i.next())) x++; } catch (ConcurrentModificationException e) {} } } @@ -140,7 +140,7 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen private boolean addi(indexEntry entry) { // returns true if the new entry was added, false if it already existed - indexURLEntry oldEntry = (indexURLEntry) container.put(entry.urlHash(), entry); + indexEntry oldEntry = (indexEntry) container.put(entry.urlHash(), entry); if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this container container.put(entry.urlHash(), oldEntry); // put it back return false; @@ -153,15 +153,15 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen } public indexEntry get(String urlHash) { - return (indexURLEntry) container.get(urlHash); + return (indexEntry) container.get(urlHash); } public indexEntry[] getEntryArray() { - return (indexURLEntry[]) container.values().toArray(); + return (indexEntry[]) container.values().toArray(); } public indexEntry remove(String urlHash) { - return (indexURLEntry) container.remove(urlHash); + return (indexEntry) container.remove(urlHash); } public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { @@ -290,25 +290,25 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen Iterator e2 = i2.entries(); int c; if ((e1.hasNext()) && (e2.hasNext())) { - indexURLEntry ie1; - indexURLEntry ie2; - ie1 = (indexURLEntry) e1.next(); - ie2 = (indexURLEntry) e2.next(); + indexEntry ie1; + indexEntry ie2; + ie1 = (indexEntry) e1.next(); + ie2 = (indexEntry) e2.next(); long stamp = System.currentTimeMillis(); while ((System.currentTimeMillis() - stamp) < time) { c = i1.getOrdering().compare(ie1.urlHash(), ie2.urlHash()); //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c); if (c < 0) { - if (e1.hasNext()) ie1 = (indexURLEntry) e1.next(); else break; + if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break; } else if (c > 0) { - if (e2.hasNext()) ie2 = (indexURLEntry) e2.next(); else break; + if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break; } else { // we have found the same urls in different searches! ie1.combineDistance(ie2); if (ie1.worddistance() <= maxDistance) conj.add(ie1); - if (e1.hasNext()) ie1 = (indexURLEntry) e1.next(); else break; - if (e2.hasNext()) ie2 = (indexURLEntry) e2.next(); else break; + if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break; + if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break; } } } diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java index df44befc4..85fc7c1d7 100644 --- a/source/de/anomic/index/indexURL.java +++ b/source/de/anomic/index/indexURL.java @@ -523,6 +523,10 @@ public class indexURL { return 20; } + public static int domLengthNormalized(String urlHash) { + return 255 * domLengthEstimation(urlHash) / 30; + } + public static final String oldurlHash(URL url) { if (url == null) return null; String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(htmlFilterContentScraper.urlNormalform(url))).substring(0, urlHashLength); diff --git a/source/de/anomic/index/indexURLEntry.java b/source/de/anomic/index/indexURLEntry.java deleted file mode 100644 index e966b9833..000000000 --- a/source/de/anomic/index/indexURLEntry.java +++ /dev/null @@ -1,207 +0,0 @@ -// indexURLEntry.java -// (C) 2004, 2005, 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 2004 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -/* - This class defines the structures of an index entry for URLs -*/ - -package de.anomic.index; - -import java.util.Properties; - -import de.anomic.index.indexEntry; -import de.anomic.index.indexEntryAttribute; -import de.anomic.index.indexAbstractEntry; -import de.anomic.index.indexURL; -import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroRow; -import de.anomic.kelondro.kelondroRow.Entry; -import de.anomic.plasma.plasmaWordIndex; - -public final class indexURLEntry extends indexAbstractEntry implements Cloneable, indexEntry { - - - // the class instantiation can only be done by a plasmaStore method - // therefore they are all public - public indexURLEntry(String urlHash, - int urlLength, // byte-length of complete URL - int urlComps, // number of path components - int titleLength, // length of description/length (longer are better?) - int hitcount, //*how often appears this word in the text - int wordcount, //*total number of words - int phrasecount, //*total number of phrases - int posintext, //*position of word in all words - int posinphrase, //*position of word in its phrase - int posofphrase, //*number of the phrase where word appears - int distance, //*word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search - int sizeOfPage, // # of bytes of the page - long lastmodified, //*last-modified time of the document where word appears - long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - int quality, //*the entropy value - String language, //*(guessed) language of document - char doctype, //*type of document - int outlinksSame, // outlinks to same domain - int outlinksOther,// outlinks to other domain - boolean local //*flag shows that this index was generated locally; othervise its from a remote peer - ) { - - // more needed attributes: - // - boolean: appearance attributes: title, appears in header, anchor-descr, image-tag etc - // - boolean: URL attributes - - if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk"; - this.urlHash = urlHash; - this.hitcount = hitcount; - this.wordcount = wordcount; - this.phrasecount = phrasecount; - this.posintext = posintext; - this.posinphrase = posinphrase; - this.posofphrase = posofphrase; - this.worddistance = distance; - this.lastModified = lastmodified; - this.quality = quality; - this.language = language.getBytes(); - this.doctype = doctype; - this.localflag = (local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL; - } - - public indexURLEntry(String urlHash, String code) { - // the code is not parsed but used later on - this.urlHash = urlHash; - this.hitcount = (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(6, 8)); - this.lastModified = plasmaWordIndex.reverseMicroDateDays((int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(3, 6))); - this.quality = (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(0, 3)); - this.language = code.substring(8, 10).getBytes(); - this.doctype = code.charAt(10); - this.localflag = code.charAt(11); - this.posintext = (code.length() >= 14) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(12, 14)) : 0; - this.posinphrase = (code.length() >= 15) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(14, 16)) : 0; - this.posofphrase = (code.length() >= 17) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(16, 18)) : 0; - this.worddistance = (code.length() >= 19) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(18, 20)) : 0; - this.wordcount = (code.length() >= 21) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(20, 22)) : 0; - this.phrasecount = (code.length() >= 23) ? (int) kelondroBase64Order.enhancedCoder.decodeLong(code.substring(22, 24)) : 0; - if (hitcount == 0) hitcount = 1; - if (wordcount == 0) wordcount = 1000; - if (phrasecount == 0) phrasecount = 100; - } - - public indexURLEntry(String external) { - // parse external form - String[] elts = external.substring(1, external.length() - 1).split(","); - Properties pr = new Properties(); - int p; - for (int i = 0; i < elts.length; i++) { - pr.put(elts[i].substring(0, (p = elts[i].indexOf("="))), elts[i].substring(p + 1)); - } - // set values - this.urlHash = pr.getProperty("h", ""); - this.hitcount = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("c", "A")); - this.wordcount = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("w", "__")); - this.phrasecount = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("p", "__")); - this.posintext = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("t", "__")); - this.posinphrase = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("r", "__")); - this.posofphrase = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("o", "__")); - this.worddistance = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("i", "__")); - this.lastModified = plasmaWordIndex.reverseMicroDateDays((int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("a", "A"))); - this.quality = (int) kelondroBase64Order.enhancedCoder.decodeLong(pr.getProperty("q", "__")); - this.language = pr.getProperty("l", "uk").getBytes(); - this.doctype = pr.getProperty("d", "u").charAt(0); - this.localflag = pr.getProperty("f", ""+indexEntryAttribute.LT_LOCAL).charAt(0); - } - - public Object clone() { - return new indexURLEntry(this.toPropertyForm()); - } - - public static int encodedStringFormLength() { - // the size of the index entry attributes when encoded to string - return 24; - } - - public String toEncodedStringForm() { - // attention: this integrates NOT the URL hash into the encoding - // if you need a complete dump, use toExternalForm() - StringBuffer buf = new StringBuffer(encodedStringFormLength()); - - buf.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.quality, indexURL.urlQualityLength)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(plasmaWordIndex.microDateDays(this.lastModified), 3)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.hitcount, 2)) - .append(new String(this.language)) - .append(this.doctype) - .append(this.localflag) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posintext, 2)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posinphrase, 2)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posofphrase, 2)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.worddistance, 2)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.wordcount, 2)) - .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.phrasecount, 2)); // 3+3+2+2+1+1+2+2+2+2+2+2= 24 bytes - - return buf.toString(); - } - - public static int encodedByteArrayFormLength() { - // the size of the index entry attributes when encoded to string - return encodedStringFormLength(); - } - - public byte[] toEncodedByteArrayForm() { - return toEncodedStringForm().getBytes(); - } - - public Entry toKelondroEntry() { - kelondroRow.Entry entry = indexURLEntryNew.urlEntryRow.newEntry(toEncodedByteArrayForm()); - return entry; - } - - public String toPropertyForm() { - StringBuffer str = new StringBuffer(61); - - str.append("{") - .append( "h=").append(this.urlHash) - .append(",q=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.quality, indexURL.urlQualityLength)) - .append(",a=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(plasmaWordIndex.microDateDays(this.lastModified), 3)) - .append(",c=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.hitcount, 2)) - .append(",l=").append(new String(this.language)) - .append(",d=").append(this.doctype) - .append(",f=").append(this.localflag) - .append(",t=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posintext, 2)) - .append(",r=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posinphrase, 2)) - .append(",o=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posofphrase, 2)) - .append(",i=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.worddistance, 2)) - .append(",w=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.wordcount, 2)) - .append(",p=").append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.phrasecount, 2)) - .append("}"); - - return str.toString(); - } - - public static void main(String[] args) { - // outputs the word hash to a given word - if (args.length != 1) System.exit(0); - System.out.println("WORDHASH: " + indexEntryAttribute.word2hash(args[0])); - } - -} diff --git a/source/de/anomic/index/indexURLEntryNew.java b/source/de/anomic/index/indexURLEntryNew.java index 1e43e5dbf..b16128ec7 100644 --- a/source/de/anomic/index/indexURLEntryNew.java +++ b/source/de/anomic/index/indexURLEntryNew.java @@ -94,6 +94,7 @@ public class indexURLEntryNew implements Cloneable, indexEntry { // - boolean: URL attributes if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk"; + this.entry = urlEntryRow.newEntry(); this.entry.setColString(col_urlhash, urlHash, null); this.entry.setColLong(col_quality, quality); this.entry.setColLong(col_lastModified, lastmodified); @@ -122,16 +123,27 @@ public class indexURLEntryNew implements Cloneable, indexEntry { this.entry = urlEntryRow.newEntry(row); } + public indexURLEntryNew(kelondroRow.Entry rentry) { + // FIXME: see if cloning is necessary + this.entry = rentry; + } + public Object clone() { - return new indexURLEntryNew(toEncodedByteArrayForm()); + byte[] b = new byte[urlEntryRow.objectsize()]; + System.arraycopy(entry.bytes(), 0, b, 0, urlEntryRow.objectsize()); + return new indexURLEntryNew(b); } - - public String toEncodedStringForm() { - return new String(toEncodedByteArrayForm()); + + public static int encodedByteArrayFormLength(boolean includingHeader) { + // the size of the index entry attributes when encoded to string + return (includingHeader) ? urlEntryRow.objectsize() : urlEntryRow.objectsize() - indexURL.urlHashLength; } - - public byte[] toEncodedByteArrayForm() { - return entry.bytes(); + + public byte[] toEncodedByteArrayForm(boolean includeHash) { + if (includeHash) return entry.bytes(); + byte[] b = new byte[urlEntryRow.objectsize() - indexURL.urlLanguageLength]; + System.arraycopy(entry.bytes(), indexURL.urlLanguageLength, b, 0, b.length); + return b; } public String toPropertyForm() { @@ -278,9 +290,9 @@ public class indexURLEntryNew implements Cloneable, indexEntry { public boolean isOlder(indexEntry other) { if (other == null) return false; - if (this.lastModified() < ((indexAbstractEntry) other).lastModified()) return true; - if (this.lastModified() == ((indexAbstractEntry) other).lastModified()) { - if (this.quality() < ((indexAbstractEntry) other).quality) return true; + if (this.lastModified() < other.lastModified()) return true; + if (this.lastModified() == other.lastModified()) { + if (this.quality() < other.quality()) return true; } return false; } diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index 9109e8bee..bc3d330e3 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -182,9 +182,15 @@ public class kelondroRow { public Entry(byte[][] cols) { rowinstance = new byte[objectsize]; - for (int i = 0; i < objectsize; i++) this.rowinstance[i] = 0; - for (int i = 0; i < cols.length; i++) { - if (cols[i] != null) System.arraycopy(cols[i], 0, rowinstance, colstart[i], Math.min(cols[i].length, row[i].cellwidth())); + int ll; + for (int i = 0; i < row.length; i++) { + if ((i >= cols.length) || (cols[i] == null)) { + for (int j = 0; j < row[i].cellwidth(); j++) this.rowinstance[colstart[i] + j] = 0; + } else { + ll = Math.min(cols[i].length, row[i].cellwidth()); + System.arraycopy(cols[i], 0, rowinstance, colstart[i], ll); + for (int j = ll; j < row[i].cellwidth(); j++) this.rowinstance[colstart[i] + j] = 0; + } } } diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java index 23a82caf3..f5b3394f4 100644 --- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java @@ -7,11 +7,11 @@ import java.util.Iterator; import java.util.TreeSet; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaWordIndex; -import de.anomic.index.indexURLEntry; import de.anomic.server.serverDate; public class plasmaDbImporter extends AbstractImporter implements dbImporter { @@ -128,13 +128,13 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { // loop throug the entities of the container and get the // urlhash Iterator importWordIdxEntries = newContainer.entries(); - indexURLEntry importWordIdxEntry; + indexEntry importWordIdxEntry; while (importWordIdxEntries.hasNext()) { // testing if import process was aborted if (isAborted()) break; // getting next word index entry - importWordIdxEntry = (indexURLEntry) importWordIdxEntries.next(); + importWordIdxEntry = (indexEntry) importWordIdxEntries.next(); String urlHash = importWordIdxEntry.urlHash(); entityUrls.add(urlHash); } diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 7c7b496d3..7dce6bb96 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -66,8 +66,9 @@ import java.util.Properties; import de.anomic.http.httpc; import de.anomic.http.httpc.response; +import de.anomic.index.indexEntry; import de.anomic.index.indexURL; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroRow; @@ -161,7 +162,7 @@ public final class plasmaCrawlLURL extends indexURL { gcrawlResultStack.add(urlHash + initiatorHash + executorHash); } - public Entry getEntry(String hash, indexURLEntry searchedWord) throws IOException { + public Entry getEntry(String hash, indexEntry searchedWord) throws IOException { return new Entry(hash, searchedWord); } @@ -416,7 +417,7 @@ public final class plasmaCrawlLURL extends indexURL { private int size; private int wordCount; private String snippet; - private indexURLEntry word; // this is only used if the url is transported via remote search requests + private indexEntry word; // this is only used if the url is transported via remote search requests private boolean stored; // more needed attributes: @@ -451,7 +452,7 @@ public final class plasmaCrawlLURL extends indexURL { this.stored = false; } - public Entry(String urlHash, indexURLEntry searchedWord) throws IOException { + public Entry(String urlHash, indexEntry searchedWord) throws IOException { // generates an plasmaLURLEntry using the url hash // to speed up the access, the url-hashes are buffered // in the hash cache. @@ -466,13 +467,13 @@ public final class plasmaCrawlLURL extends indexURL { this.stored = true; } - public Entry(kelondroRow.Entry entry, indexURLEntry searchedWord) throws IOException { + public Entry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException { assert (entry != null); insertEntry(entry, word); this.stored = false; } - private void insertEntry(kelondroRow.Entry entry, indexURLEntry searchedWord) throws IOException { + private void insertEntry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException { try { this.urlHash = entry.getColString(0, null); this.url = new URL(entry.getColString(1, "UTF-8").trim()); @@ -522,7 +523,7 @@ public final class plasmaCrawlLURL extends indexURL { this.wordCount = Integer.parseInt(prop.getProperty("wc", "0")); this.snippet = prop.getProperty("snippet", ""); if (snippet.length() == 0) snippet = null; else snippet = crypt.simpleDecode(snippet, null); - this.word = (prop.containsKey("word")) ? new indexURLEntry(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word",""))) : null; + this.word = (prop.containsKey("word")) ? new indexURLEntryNew(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word",""))) : null; this.stored = false; //} } catch (Exception e) { @@ -659,7 +660,7 @@ public final class plasmaCrawlLURL extends indexURL { return snippet; } - public indexURLEntry word() { + public indexEntry word() { return word; } diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index 783387568..bdaddedaf 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -48,8 +48,8 @@ import java.util.HashSet; import java.util.Iterator; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.server.serverCodings; @@ -191,7 +191,7 @@ public class plasmaDHTChunk { Iterator indexContainerIterator = wordIndex.indexContainerSet(hash, resourceLevel, true, maxcount).iterator(); indexContainer container; Iterator urlIter; - indexURLEntry indexEntry; + indexEntry iEntry; plasmaCrawlLURL.Entry lurl; int refcount = 0; int wholesize; @@ -208,29 +208,29 @@ public class plasmaDHTChunk { urlIter = container.entries(); // iterate over indexes to fetch url entries and store them in the urlCache while ((urlIter.hasNext()) && (maxcount > refcount)) { - indexEntry = (indexURLEntry) urlIter.next(); + iEntry = (indexEntry) urlIter.next(); try { - lurl = lurls.getEntry(indexEntry.urlHash(), indexEntry); + lurl = lurls.getEntry(iEntry.urlHash(), iEntry); if ((lurl == null) || (lurl.url() == null)) { - yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + indexEntry.urlHash() + "' for word hash " + container.getWordHash()); + yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash()); notBoundCounter++; urlIter.remove(); - wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true); + wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true); } else { - urlCache.put(indexEntry.urlHash(), lurl); - yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + indexEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); + urlCache.put(iEntry.urlHash(), lurl); + yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); refcount++; } } catch (IOException e) { notBoundCounter++; urlIter.remove(); - wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true); + wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true); } } // remove all remaining; we have enough while (urlIter.hasNext()) { - indexEntry = (indexURLEntry) urlIter.next(); + iEntry = (indexEntry) urlIter.next(); urlIter.remove(); } @@ -272,7 +272,7 @@ public class plasmaDHTChunk { public int deleteTransferIndexes() { Iterator urlIter; - indexURLEntry indexEntry; + indexEntry iEntry; HashSet urlHashes; int count = 0; @@ -282,8 +282,8 @@ public class plasmaDHTChunk { urlHashes = new HashSet(this.indexContainers[i].size()); urlIter = this.indexContainers[i].entries(); while (urlIter.hasNext()) { - indexEntry = (indexURLEntry) urlIter.next(); - urlHashes.add(indexEntry.urlHash()); + iEntry = (indexEntry) urlIter.next(); + urlHashes.add(iEntry.urlHash()); } count += wordIndex.removeEntries(this.indexContainers[i].getWordHash(), urlHashes, true); log.logFine("Deleted partial index (" + c + " URLs) for word " + this.indexContainers[i].getWordHash() + "; " + this.wordIndex.indexSize(indexContainers[i].getWordHash()) + " entries left"); diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 06b98c37f..168f4c1e6 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -52,8 +52,8 @@ import de.anomic.server.logging.serverLog; import de.anomic.server.serverInstantThread; import de.anomic.yacy.yacySearch; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; public final class plasmaSearchEvent extends Thread implements Runnable { @@ -242,7 +242,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { //if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty //if (searchResult.size() == 0) return acc; // case that we have nothing to do - indexURLEntry entry; + indexEntry entry; plasmaCrawlLURL.Entry page; int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT); try { diff --git a/source/de/anomic/plasma/plasmaSearchPreOrder.java b/source/de/anomic/plasma/plasmaSearchPreOrder.java index a4859662a..5c60aa696 100644 --- a/source/de/anomic/plasma/plasmaSearchPreOrder.java +++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java @@ -49,8 +49,8 @@ import java.util.Iterator; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; +import de.anomic.index.indexEntry; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBinSearch; public final class plasmaSearchPreOrder { @@ -58,7 +58,7 @@ public final class plasmaSearchPreOrder { public static kelondroBinSearch[] ybrTables = null; // block-rank tables private static boolean useYBR = true; - private indexURLEntry entryMin, entryMax; + private indexEntry entryMin, entryMax; private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry private plasmaSearchQuery query; private plasmaSearchRankingProfile ranking; @@ -118,36 +118,36 @@ public final class plasmaSearchPreOrder { return pageAcc.size() > 0; } - public indexURLEntry next() { + public indexEntry next() { Object top = pageAcc.lastKey(); - return (indexURLEntry) pageAcc.remove(top); + return (indexEntry) pageAcc.remove(top); } public void addContainer(indexTreeMapContainer container, long maxTime) { long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; - indexURLEntry indexEntry; + indexEntry iEntry; // first pass: find min/max to obtain limits for normalization Iterator i = container.entries(); int count = 0; while (i.hasNext()) { if (System.currentTimeMillis() > limitTime) break; - indexEntry = (indexURLEntry) i.next(); - if (entryMin == null) entryMin = (indexURLEntry) indexEntry.clone(); else entryMin.min(indexEntry); - if (entryMax == null) entryMax = (indexURLEntry) indexEntry.clone(); else entryMax.max(indexEntry); + iEntry = (indexEntry) i.next(); + if (entryMin == null) entryMin = (indexEntry) iEntry.clone(); else entryMin.min(iEntry); + if (entryMax == null) entryMax = (indexEntry) iEntry.clone(); else entryMax.max(iEntry); count++; } // second pass: normalize entries and get ranking i = container.entries(); for (int j = 0; j < count; j++) { - indexEntry = (indexURLEntry) i.next(); - pageAcc.put(serverCodings.encodeHex(this.ranking.preRanking(indexEntry.generateNormalized(entryMin, entryMax)), 16) + indexEntry.urlHash(), indexEntry); + iEntry = (indexEntry) i.next(); + pageAcc.put(serverCodings.encodeHex(this.ranking.preRanking(iEntry.generateNormalized(entryMin, entryMax)), 16) + iEntry.urlHash(), iEntry); } } - public indexURLEntry[] getNormalizer() { - return new indexURLEntry[] {entryMin, entryMax}; + public indexEntry[] getNormalizer() { + return new indexEntry[] {entryMin, entryMax}; } public static int ybr_p(String urlHash) { diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index a00cd4e20..2f9b97779 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -47,7 +47,7 @@ import java.util.Map; import java.util.Set; import de.anomic.index.indexEntry; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURL; public class plasmaSearchRankingProfile { @@ -164,18 +164,16 @@ public class plasmaSearchRankingProfile { return new String(ext); } - public long preRanking(indexEntry entry) { + public long preRanking(indexEntry normalizedEntry) { + // the normalizedEntry must be a normalized indexEntry long ranking = 0; - if (entry instanceof indexURLEntry) { - indexURLEntry normalizedEntry = (indexURLEntry) entry; - ranking += normalizedEntry.quality() << ((Integer) coeff.get(ENTROPY)).intValue(); - ranking += normalizedEntry.virtualAge() << ((Integer) coeff.get(DATE)).intValue(); - ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << ((Integer) coeff.get(YBR)).intValue(); - ranking += (normalizedEntry.posintext() == 0) ? 0 : (255 - normalizedEntry.posintext()) << ((Integer) coeff.get(POSINTEXT)).intValue(); - ranking += (normalizedEntry.worddistance() == 0) ? 0 : (255 - normalizedEntry.worddistance()) << ((Integer) coeff.get(WORDDISTANCE)).intValue(); - ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << ((Integer) coeff.get(HITCOUNT)).intValue(); - ranking += (255 - normalizedEntry.domlengthNormalized()) << ((Integer) coeff.get(DOMLENGTH)).intValue(); - } + ranking += normalizedEntry.quality() << ((Integer) coeff.get(ENTROPY)).intValue(); + ranking += normalizedEntry.virtualAge() << ((Integer) coeff.get(DATE)).intValue(); + ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << ((Integer) coeff.get(YBR)).intValue(); + ranking += (normalizedEntry.posintext() == 0) ? 0 : (255 - normalizedEntry.posintext()) << ((Integer) coeff.get(POSINTEXT)).intValue(); + ranking += (normalizedEntry.worddistance() == 0) ? 0 : (255 - normalizedEntry.worddistance()) << ((Integer) coeff.get(WORDDISTANCE)).intValue(); + ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << ((Integer) coeff.get(HITCOUNT)).intValue(); + ranking += (255 - indexURL.domLengthNormalized(normalizedEntry.urlHash())) << ((Integer) coeff.get(DOMLENGTH)).intValue(); return ranking; } diff --git a/source/de/anomic/plasma/plasmaSearchResult.java b/source/de/anomic/plasma/plasmaSearchResult.java index 14b2f1da3..be8ecfc0f 100644 --- a/source/de/anomic/plasma/plasmaSearchResult.java +++ b/source/de/anomic/plasma/plasmaSearchResult.java @@ -55,13 +55,13 @@ import java.net.MalformedURLException; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.server.serverCodings; import de.anomic.htmlFilter.htmlFilterContentScraper; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; -import de.anomic.index.indexURLEntry; public final class plasmaSearchResult { - private indexURLEntry entryMin, entryMax; + private indexEntry entryMin, entryMax; private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry private kelondroMScoreCluster ref; // reference score computation for the commonSense heuristic private ArrayList results; // this is a buffer for plasmaWordIndexEntry + plasmaCrawlLURL.entry - objects @@ -108,11 +108,11 @@ public final class plasmaSearchResult { return (plasmaCrawlLURL.Entry) pageAcc.remove(top); } - protected void addResult(indexURLEntry indexEntry, plasmaCrawlLURL.Entry page) { + protected void addResult(indexEntry iEntry, plasmaCrawlLURL.Entry page) { // make min/max for normalization - if (entryMin == null) entryMin = (indexURLEntry) indexEntry.clone(); else entryMin.min(indexEntry); - if (entryMax == null) entryMax = (indexURLEntry) indexEntry.clone(); else entryMax.max(indexEntry); + if (entryMin == null) entryMin = (indexEntry) iEntry.clone(); else entryMin.min(iEntry); + if (entryMax == null) entryMax = (indexEntry) iEntry.clone(); else entryMax.max(iEntry); // take out relevant information for reference computation URL url = page.url(); @@ -122,7 +122,7 @@ public final class plasmaSearchResult { String[] descrcomps = descr.toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description // store everything - Object[] resultVector = new Object[] {indexEntry, page, urlcomps, descrcomps}; + Object[] resultVector = new Object[] {iEntry, page, urlcomps, descrcomps}; results.add(resultVector); // add references @@ -140,18 +140,18 @@ public final class plasmaSearchResult { for (int i = 0; i < references.length; i++) commonSense.add(references[i]); Object[] resultVector; - indexURLEntry indexEntry; + indexEntry iEntry; plasmaCrawlLURL.Entry page; long ranking; for (int i = 0; i < results.size(); i++) { // take out values from result array resultVector = (Object[]) results.get(i); - indexEntry = (indexURLEntry) resultVector[0]; + iEntry = (indexEntry) resultVector[0]; page = (plasmaCrawlLURL.Entry) resultVector[1]; // calculate ranking ranking = this.ranking.postRanking( - indexEntry, + iEntry, query, commonSense, (String[]) resultVector[2], @@ -161,7 +161,7 @@ public final class plasmaSearchResult { // insert value //System.out.println("Ranking " + ranking + ", YBR-" + plasmaSearchPreOrder.ybr(indexEntry.getUrlHash()) + " for URL " + page.url()); - pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.urlHash(), page); + pageAcc.put(serverCodings.encodeHex(ranking, 16) + iEntry.urlHash(), page); } // flush memory diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 48a58f2fb..6bc52e517 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -130,10 +130,11 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpHeader; import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexTreeMapContainer; import de.anomic.index.indexURL; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; @@ -1487,7 +1488,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser wordStat = (plasmaCondenser.wordStatProp) wentry.getValue(); String wordHash = indexEntryAttribute.word2hash(word); indexTreeMapContainer wordIdxContainer = new indexTreeMapContainer(wordHash); - indexURLEntry wordIdxEntry = new indexURLEntry(urlHash, + indexEntry wordIdxEntry = new indexURLEntryNew(urlHash, urlLength, urlComps, wordStat.count, document.longTitle.length(), diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 637021d31..ad6b55c9a 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -67,7 +67,7 @@ import de.anomic.index.indexRI; import de.anomic.index.indexAbstractRI; import de.anomic.index.indexRowSetContainer; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMergeIterator; @@ -251,7 +251,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { Iterator i = condenser.words(); Map.Entry wentry; String word; - indexURLEntry ientry; + indexEntry ientry; plasmaCondenser.wordStatProp wprop; String wordHash; int urlLength = url.toString().length(); @@ -263,7 +263,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { wprop = (plasmaCondenser.wordStatProp) wentry.getValue(); // if ((s.length() > 4) && (c > 1)) System.out.println("# " + s + ":" + c); wordHash = indexEntryAttribute.word2hash(word); - ientry = new indexURLEntry(urlHash, + ientry = new indexURLEntryNew(urlHash, urlLength, urlComps, (document == null) ? urlLength : document.longTitle.length(), wprop.count, condenser.RESULT_SIMI_WORDS, @@ -529,11 +529,11 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { // the combined container will fit, read the container try { Iterator entries = entity.elements(true); - indexURLEntry entry; + indexEntry entry; while (entries.hasNext()) { - entry = (indexURLEntry) entries.next(); + entry = (indexEntry) entries.next(); // System.out.println("ENTRY = " + entry.getUrlHash()); - container.add(new indexURLEntry[]{entry}, System.currentTimeMillis()); + container.add(new indexEntry[]{entry}, System.currentTimeMillis()); } // we have read all elements, now delete the entity entity.deleteComplete(); @@ -580,7 +580,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public void run() { serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started"); indexContainer container = null; - indexURLEntry entry = null; + indexEntry entry = null; URL url = null; HashSet urlHashs = new HashSet(); try { @@ -592,7 +592,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { wordHashNow = container.getWordHash(); while (containerIterator.hasNext() && run) { waiter(); - entry = (indexURLEntry) containerIterator.next(); + entry = (indexEntry) containerIterator.next(); // System.out.println("Wordhash: "+wordHash+" UrlHash: // "+entry.getUrlHash()); try { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index bf86f3f4d..35d892073 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -57,9 +57,10 @@ import java.io.IOException; import java.util.Iterator; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroRow; @@ -70,11 +71,11 @@ public final class plasmaWordIndexAssortment { // environment constants private static final String assortmentFileName = "indexAssortment"; public static final int[] bufferStructureBasis = new int[]{ - indexEntryAttribute.wordHashLength, // a wordHash - 4, // occurrence counter - 8, // timestamp of last access - indexEntryAttribute.urlHashLength, // corresponding URL hash - indexURLEntry.encodedStringFormLength() // URL attributes + indexEntryAttribute.wordHashLength, // a wordHash + 4, // occurrence counter + 8, // timestamp of last access + indexEntryAttribute.urlHashLength, // corresponding URL hash + indexURLEntryNew.encodedByteArrayFormLength(false) // URL attributes }; // class variables @@ -151,11 +152,11 @@ public final class plasmaWordIndexAssortment { row.setColLongB256(1, 1); row.setColLongB256(2, newContainer.updated()); Iterator entries = newContainer.entries(); - indexURLEntry entry; + indexEntry entry; for (int i = 0; i < assortmentLength; i++) { - entry = (indexURLEntry) entries.next(); + entry = (indexEntry) entries.next(); row.setCol(3 + 2 * i, entry.urlHash().getBytes()); - row.setCol(4 + 2 * i, entry.toEncodedStringForm().getBytes()); + row.setCol(4 + 2 * i, entry.toEncodedByteArrayForm(false)); } kelondroRow.Entry oldrow = null; try { @@ -249,7 +250,7 @@ public final class plasmaWordIndexAssortment { int al = assortmentCapacity(row.objectsize()); for (int i = 0; i < al; i++) { container.add( - new indexURLEntry[] { new indexURLEntry( + new indexEntry[] { new indexURLEntryNew( new String(row.getColBytes(3 + 2 * i)), new String(row.getColBytes(4 + 2 * i))) }, updateTime); } return container; diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 974b023eb..6fa64915e 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -59,7 +59,6 @@ import de.anomic.index.indexRI; import de.anomic.index.indexAbstractRI; import de.anomic.index.indexRowSetContainer; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroObjectCache; import de.anomic.kelondro.kelondroRecords; @@ -167,7 +166,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl c = new indexTreeMapContainer(newContainer.getWordHash()); for (int k = 0; k < j; k++) { if (i.hasNext()) { - c.add((indexURLEntry) i.next(), newContainer.updated()); + c.add((indexEntry) i.next(), newContainer.updated()); } else { storeForced(c); return; @@ -210,7 +209,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl c = new indexTreeMapContainer(newContainer.getWordHash()); for (int k = 0; k <= j; k++) { assert (i.hasNext()); - c.add((indexURLEntry) i.next(), newContainer.updated()); + c.add((indexEntry) i.next(), newContainer.updated()); } storeForced(c); } diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java index 4fb5a9fa3..0b102bc9c 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFile.java +++ b/source/de/anomic/plasma/plasmaWordIndexFile.java @@ -49,8 +49,9 @@ import java.io.IOException; import java.util.Iterator; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexURL; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; @@ -95,10 +96,10 @@ public final class plasmaWordIndexFile { kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent); } catch (IOException e) { theLocation.delete(); - kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedStringFormLength(), false); + kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntryNew.encodedByteArrayFormLength(false), false); } else { // create new index file - kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedStringFormLength(), false); + kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntryNew.encodedByteArrayFormLength(false), false); } return kt; // everyone who get this should close it when finished! } @@ -137,27 +138,27 @@ public final class plasmaWordIndexFile { } catch (IOException e) {} } - public indexURLEntry getEntry(String urlhash) throws IOException { + public indexEntry getEntry(String urlhash) throws IOException { kelondroRow.Entry n = theIndex.get(urlhash.getBytes()); if (n == null) return null; - return new indexURLEntry(n.getColString(0, null), n.getColString(1, null)); + return new indexURLEntryNew(n.getColString(0, null), n.getColString(1, null)); } public boolean contains(String urlhash) throws IOException { return (theIndex.get(urlhash.getBytes()) != null); } - public boolean contains(indexURLEntry entry) throws IOException { + public boolean contains(indexEntry entry) throws IOException { return (theIndex.get(entry.urlHash().getBytes()) != null); } - public boolean addEntry(indexURLEntry entry) throws IOException { + public boolean addEntry(indexEntry entry) throws IOException { if (entry == null) return false; - indexURLEntry oldEntry = getEntry(entry.urlHash()); + indexEntry oldEntry = getEntry(entry.urlHash()); if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this entity return false; } - return (theIndex.put(entry.urlHash().getBytes(), entry.toEncodedStringForm().getBytes()) == null); + return (theIndex.put(entry.urlHash().getBytes(), entry.toEncodedByteArrayForm(false)) == null); } public int addEntries(indexContainer container) throws IOException { @@ -172,7 +173,7 @@ public final class plasmaWordIndexFile { if (container != null) { Iterator i = container.entries(); while (i.hasNext()) { - if (addEntry((indexURLEntry) i.next())) count++; + if (addEntry((indexEntry) i.next())) count++; } } @@ -237,7 +238,7 @@ public final class plasmaWordIndexFile { public Object next() { if (i == null) return null; kelondroRow.Entry n = (kelondroRow.Entry) i.next(); - return new indexURLEntry(n.getColString(0, null), n.getColString(1, null)); + return new indexURLEntryNew(n.getColString(0, null), n.getColString(1, null)); } public void remove() { throw new UnsupportedOperationException(); @@ -257,7 +258,7 @@ public final class plasmaWordIndexFile { long timeout = (time == -1) ? Long.MAX_VALUE : System.currentTimeMillis() + time; try { while ((i.hasNext()) && (System.currentTimeMillis() < timeout)) { - addEntry((indexURLEntry) i.next()); + addEntry((indexEntry) i.next()); } } catch (kelondroException e) { serverLog.logSevere("PLASMA", "plasmaWordIndexEntity.merge: " + e.getMessage()); diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index 19b5bc791..5a26ecd67 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -51,10 +51,10 @@ import java.util.Set; import java.util.TreeSet; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexRI; import de.anomic.index.indexAbstractRI; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -231,10 +231,10 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index if (plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists()) { plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10); indexTreeMapContainer container = new indexTreeMapContainer(wordHash); - indexURLEntry entry; + indexEntry entry; Iterator i = entity.elements(true); while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) { - entry = (indexURLEntry) i.next(); + entry = (indexEntry) i.next(); container.add(entry); } return container; diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 1e171abc3..d280522ba 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -54,9 +54,10 @@ import java.util.Iterator; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpc; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexTreeMapContainer; -import de.anomic.index.indexURLEntry; +import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaSearchRankingProfile; @@ -484,10 +485,10 @@ public final class yacyClient { urlManager.stackEntry(urlEntry, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2); // save the url entry - final indexURLEntry entry; + final indexEntry entry; if (urlEntry.word() == null) { // the old way to define words - entry = new indexURLEntry( + entry = new indexURLEntryNew( urlEntry.hash(), urlLength, urlComps, urlEntry.descr().length(), @@ -514,7 +515,7 @@ public final class yacyClient { } // add the url entry to the word indexes for (int m = 0; m < words; m++) { - container[m].add(new indexURLEntry[]{entry}, System.currentTimeMillis()); + container[m].add(new indexEntry[]{entry}, System.currentTimeMillis()); } } @@ -888,11 +889,11 @@ public final class yacyClient { // check if we got all necessary urls in the urlCache (only for debugging) Iterator eenum; - indexURLEntry entry; + indexEntry entry; for (int i = 0; i < indexes.length; i++) { eenum = indexes[i].entries(); while (eenum.hasNext()) { - entry = (indexURLEntry) eenum.next(); + entry = (indexEntry) eenum.next(); if (urlCache.get(entry.urlHash()) == null) { yacyCore.log.logFine("DEBUG transferIndex: to-send url hash '" + entry.urlHash() + "' is not contained in urlCache"); } @@ -996,11 +997,11 @@ public final class yacyClient { int indexcount = 0; final StringBuffer entrypost = new StringBuffer(indexes.length*73); Iterator eenum; - indexURLEntry entry; + indexEntry entry; for (int i = 0; i < indexes.length; i++) { eenum = indexes[i].entries(); while (eenum.hasNext()) { - entry = (indexURLEntry) eenum.next(); + entry = (indexEntry) eenum.next(); entrypost.append(indexes[i].getWordHash()) .append(entry.toPropertyForm()) .append(serverCore.crlfString); diff --git a/source/yacy.java b/source/yacy.java index 0b25fb8e0..38427cf2b 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -73,6 +73,7 @@ import de.anomic.http.httpd; import de.anomic.http.httpdFileHandler; import de.anomic.http.httpdProxyHandler; import de.anomic.index.indexContainer; +import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroDyn; @@ -86,7 +87,6 @@ import de.anomic.plasma.plasmaURLPool; import de.anomic.plasma.plasmaWordIndex; import de.anomic.plasma.plasmaWordIndexAssortmentCluster; import de.anomic.plasma.plasmaWordIndexFile; -import de.anomic.index.indexURLEntry; import de.anomic.server.serverCore; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; @@ -717,10 +717,10 @@ public final class yacy { // the combined container will fit, read the container Iterator wordIdxEntries = wordIdxContainer.entries(); - indexURLEntry wordIdxEntry; + indexEntry iEntry; while (wordIdxEntries.hasNext()) { - wordIdxEntry = (indexURLEntry) wordIdxEntries.next(); - String urlHash = wordIdxEntry.urlHash(); + iEntry = (indexEntry) wordIdxEntries.next(); + String urlHash = iEntry.urlHash(); if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try { plasmaCrawlLURL.Entry urlEntry = currentUrlDB.getEntry(urlHash, null); urlCounter++;