changes towards better join-search

- added generation of a compressed index within remote peers during global search
- added selection of specific urls within remote peers during secondary global search


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2539 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 4a494464af
commit 74d1dea30b

@ -149,7 +149,7 @@ public class IndexControl_p {
if (delurl || delurlref) { if (delurl || delurlref) {
// generate an urlx array // generate an urlx array
indexContainer index = null; indexContainer index = null;
index = switchboard.wordIndex.getContainer(keyhash, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
Iterator en = index.entries(); Iterator en = index.entries();
int i = 0; int i = 0;
urlx = new String[index.size()]; urlx = new String[index.size()];
@ -252,7 +252,7 @@ public class IndexControl_p {
indexContainer index; indexContainer index;
String result; String result;
long starttime = System.currentTimeMillis(); long starttime = System.currentTimeMillis();
index = switchboard.wordIndex.getContainer(keyhash, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
// built urlCache // built urlCache
Iterator urlIter = index.entries(); Iterator urlIter = index.entries();
HashMap knownURLs = new HashMap(); HashMap knownURLs = new HashMap();
@ -424,7 +424,7 @@ public class IndexControl_p {
// search for a word hash and generate a list of url links // search for a word hash and generate a list of url links
indexContainer index = null; indexContainer index = null;
try { try {
index = switchboard.wordIndex.getContainer(keyhash, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
final StringBuffer result = new StringBuffer(1024); final StringBuffer result = new StringBuffer(1024);
if (index.size() == 0) { if (index.size() == 0) {

@ -8,4 +8,5 @@ references=#[references]#
joincount=#[joincount]# joincount=#[joincount]#
count=#[linkcount]# count=#[linkcount]#
#[links]# #[links]#
#[indexcount]# #[indexcount]#
#[indexabstract]#

@ -49,6 +49,7 @@
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import java.util.Set; import java.util.Set;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
@ -81,7 +82,8 @@ public final class search {
final String oseed = post.get("myseed", ""); // complete seed of the requesting peer final String oseed = post.get("myseed", ""); // complete seed of the requesting peer
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability // final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
final String key = post.get("key", ""); // transmission key for response final String key = post.get("key", ""); // transmission key for response
final String query = post.get("query", ""); // a string of word hashes final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined
final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results // final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
// final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping // final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping
final long duetime= post.getLong("duetime", 3000); final long duetime= post.getLong("duetime", 3000);
@ -117,34 +119,64 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH: " + squery.queryHashes + " - " + squery.wantedResults + " links"); yacyCore.log.logInfo("INIT HASH SEARCH: " + squery.queryHashes + " - " + squery.wantedResults + " links");
long timestamp1 = System.currentTimeMillis(); long timestamp1 = System.currentTimeMillis();
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY}); plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY});
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults); plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null; plasmaSearchTimingProfile remoteTiming = null;
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
Set containers = theSearch.localSearchContainers();
indexContainer localResults = theSearch.localSearchJoin(containers);
int joincount = localResults.size();
plasmaSearchResult acc = theSearch.order(localResults);
// set statistic details of search result // retrieve index containers from search request
prop.put("joincount", Integer.toString(joincount)); plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
Set urlselection = null;
if ((urls.length() > 0) && (urls.length() % 12 == 0)) {
for (int i = 0; i < (urls.length() / 12); i++) urlselection.add(urls.substring(i * 12, (i + 1 * 12)));
}
Map containers = theSearch.localSearchContainers(urlselection);
// set statistic details of search result and find best result index set
String maxcounthash = null;
if (containers == null) { if (containers == null) {
prop.put("indexcount", ""); prop.put("indexcount", "");
} else { } else {
Iterator ci = containers.iterator(); Iterator ci = containers.entrySet().iterator();
StringBuffer indexcount = new StringBuffer(); StringBuffer indexcount = new StringBuffer();
Map.Entry entry;
String wordhash;
int maxcount = -1;
while (ci.hasNext()) { while (ci.hasNext()) {
indexContainer container = (indexContainer) ci.next(); entry = (Map.Entry) ci.next();
wordhash = (String) entry.getKey();
indexContainer container = (indexContainer) entry.getValue();
if (container.size() > maxcount) maxcounthash = wordhash;
indexcount.append("indexcount.").append(container.getWordHash()).append('=').append(Integer.toString(container.size())).append(serverCore.crlfString); indexcount.append("indexcount.").append(container.getWordHash()).append('=').append(Integer.toString(container.size())).append(serverCore.crlfString);
} }
prop.put("indexcount", new String(indexcount)); prop.put("indexcount", new String(indexcount));
} }
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific urls, because it is a re-search
if ((maxcounthash == null) || (urls.length() != 0)) {
prop.put("indexabstract","");
} else {
String indexabstract = "indexabstract." + maxcounthash + "=" + ((indexContainer) containers.get(maxcounthash)).compressedIndex(1000);
yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract);
prop.put("indexabstract", indexabstract);
}
// join and order the result
indexContainer localResults = theSearch.localSearchJoin(containers.values());
int joincount = localResults.size();
prop.put("joincount", Integer.toString(joincount));
plasmaSearchResult acc = theSearch.order(localResults);
// prepare result
if ((joincount == 0) || (acc == null)) { if ((joincount == 0) || (acc == null)) {
// no results
prop.put("links", ""); prop.put("links", "");
prop.put("linkcount", "0"); prop.put("linkcount", "0");
prop.put("references", ""); prop.put("references", "");
} else { } else {
// result is a List of urlEntry elements // result is a List of urlEntry elements

@ -36,7 +36,7 @@ public abstract class indexAbstractRI implements indexRI {
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, false, -1); indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0; if (entries == null) return 0;
return entries.updated(); return entries.updated();
} }

@ -108,10 +108,11 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime) { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
try { try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty); kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
if (collection == null) return null; collection.select(urlselection);
if ((collection == null) || (collection.size() == 0)) return null;
return new indexRowSetContainer(wordHash, collection); return new indexRowSetContainer(wordHash, collection);
} catch (IOException e) { } catch (IOException e) {
return null; return null;

@ -32,6 +32,7 @@ import java.util.Iterator;
import java.util.Set; import java.util.Set;
import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroOrder;
import de.anomic.server.serverByteBuffer;
public interface indexContainer { public interface indexContainer {
@ -43,7 +44,9 @@ public interface indexContainer {
public void setWordHash(String newWordHash); public void setWordHash(String newWordHash);
public String getWordHash(); public String getWordHash();
public serverByteBuffer compressedIndex(long maxtime);
public void select(Set urlselection);
public void setOrdering(kelondroOrder newOrder, int newColumn); public void setOrdering(kelondroOrder newOrder, int newColumn);
public kelondroOrder order(); public kelondroOrder order();
public int orderColumn(); public int orderColumn();

@ -386,8 +386,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime; return (((long) intTime) * (long) 1000) + initTime;
} }
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
return (indexContainer) wCache.get(wordHash); if (urlselection == null) {
return (indexContainer) wCache.get(wordHash);
} else {
indexContainer ic = ((indexContainer) wCache.get(wordHash)).topLevelClone();
ic.select(urlselection);
return ic;
}
} }
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {

@ -53,7 +53,7 @@ public interface indexRI {
public long getUpdateTime(String wordHash); public long getUpdateTime(String wordHash);
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime); public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime);
public indexContainer deleteContainer(String wordHash); public indexContainer deleteContainer(String wordHash);
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);

@ -27,9 +27,11 @@
package de.anomic.index; package de.anomic.index;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.util.Collection;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Set;
import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
@ -37,6 +39,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroRowSet;
import de.anomic.server.serverByteBuffer;
public class indexRowSetContainer extends kelondroRowSet implements indexContainer { public class indexRowSetContainer extends kelondroRowSet implements indexContainer {
@ -64,6 +67,43 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
return newContainer; return newContainer;
} }
public serverByteBuffer compressedIndex(long maxtime) {
// collect references according to domains
long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
TreeMap doms = new TreeMap();
synchronized(this) {
Iterator i = entries();
indexEntry iEntry;
String dom, paths;
while (i.hasNext()) {
iEntry = (indexEntry) i.next();
dom = iEntry.urlHash().substring(6);
if ((paths = (String) doms.get(dom)) == null) {
doms.put(dom, iEntry.urlHash().substring(0, 6));
} else {
doms.put(dom, paths + iEntry.urlHash().substring(0, 6));
}
if (System.currentTimeMillis() > timeout) break;
}
}
// construct a result string
serverByteBuffer bb = new serverByteBuffer(this.size() * indexURLEntry.urlEntryRow.width(0) / 2);
bb.append('{');
Iterator i = doms.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
bb.append((String) entry.getKey());
bb.append(':');
bb.append((String) entry.getValue());
if (System.currentTimeMillis() > timeout) break;
if (i.hasNext()) bb.append(',');
}
bb.append('}');
bb.trim();
return bb;
}
public void setWordHash(String newWordHash) { public void setWordHash(String newWordHash) {
this.wordHash = newWordHash; this.wordHash = newWordHash;
} }
@ -94,15 +134,18 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
public int add(indexContainer c, long maxTime) { public int add(indexContainer c, long maxTime) {
// returns the number of new elements // returns the number of new elements
long startTime = System.currentTimeMillis(); long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
if (c == null) return 0; if (c == null) return 0;
int x = 0; int x = 0;
synchronized (c) { synchronized (c) {
Iterator i = c.entries(); Iterator i = c.entries();
while ((i.hasNext()) && ((maxTime < 0) || ((startTime + maxTime) > System.currentTimeMillis()))) { while (i.hasNext()) {
try { try {
if (addi((indexEntry) i.next())) x++; if (addi((indexEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {} } catch (ConcurrentModificationException e) {
e.printStackTrace();
}
if (System.currentTimeMillis() > timeout) break;
} }
} }
this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated());
@ -202,7 +245,7 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
return c; return c;
} }
public static indexContainer joinContainer(Set containers, long time, int maxDistance) { public static indexContainer joinContainer(Collection containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis(); long stamp = System.currentTimeMillis();

@ -25,6 +25,7 @@
package de.anomic.kelondro; package de.anomic.kelondro;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set;
public class kelondroRowCollection { public class kelondroRowCollection {
@ -293,6 +294,18 @@ public class kelondroRowCollection {
} }
} }
public void select(Set keys) {
// removes all entries but the ones given by urlselection
if (keys == null) return;
synchronized (this) {
Iterator i = rows();
kelondroRow.Entry row;
while (i.hasNext()) {
row = (kelondroRow.Entry) i.next();
if (!(keys.contains(row.getColString(0, null)))) i.remove();
}
}
}
protected final void sort(kelondroOrder newOrder, int newColumn) { protected final void sort(kelondroOrder newOrder, int newColumn) {
if ((this.sortOrder == null) || if ((this.sortOrder == null) ||

@ -42,9 +42,11 @@
package de.anomic.plasma; package de.anomic.plasma;
import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Map;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
@ -131,7 +133,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking); searchThreads = yacySearch.searchHashes(query.queryHashes, query.prefer, query.urlMask, query.maxDistance, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking);
// meanwhile do a local search // meanwhile do a local search
indexContainer rcLocal = localSearchJoin(localSearchContainers()); indexContainer rcLocal = localSearchJoin(localSearchContainers(null).values());
plasmaSearchResult localResult = orderLocal(rcLocal, timeout); plasmaSearchResult localResult = orderLocal(rcLocal, timeout);
// catch up global results: // catch up global results:
@ -161,7 +163,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
lastEvent = this; lastEvent = this;
return result; return result;
} else { } else {
indexContainer rcLocal = localSearchJoin(localSearchContainers()); indexContainer rcLocal = localSearchJoin(localSearchContainers(null).values());
plasmaSearchResult result = order(rcLocal); plasmaSearchResult result = order(rcLocal);
result.localContributions = rcLocal.size(); result.localContributions = rcLocal.size();
@ -173,13 +175,14 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
} }
} }
public Set localSearchContainers() { public Map localSearchContainers(Set urlselection) {
// search for the set of hashes and return the set of containers containing the seach result // search for the set of hashes and return the set of containers containing the seach result
// retrieve entities that belong to the hashes // retrieve entities that belong to the hashes
profileLocal.startTimer(); profileLocal.startTimer();
Set containers = wordIndex.getContainers( Map containers = wordIndex.getContainers(
query.queryHashes, query.queryHashes,
urlselection,
true, true,
true, true,
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION)); profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION));
@ -190,7 +193,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return containers; return containers;
} }
public indexContainer localSearchJoin(Set containers) { public indexContainer localSearchJoin(Collection containers) {
// join a search result and return the joincount (number of pages after join) // join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known // since this is a conjunction we return an empty entity if any word is not known

@ -49,6 +49,7 @@ package de.anomic.plasma;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.HashSet; import java.util.HashSet;
@ -321,11 +322,11 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return condenser.RESULT_SIMI_WORDS; return condenser.RESULT_SIMI_WORDS;
} }
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
// get from cache // get from cache
indexContainer container = ramCache.getContainer(wordHash, true, -1); indexContainer container = ramCache.getContainer(wordHash, urlselection, true, -1);
// We must not use the container from cache to store everything we find, // We must not use the container from cache to store everything we find,
// as that container remains linked to in the cache and might be changed later // as that container remains linked to in the cache and might be changed later
@ -336,18 +337,18 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// get from collection index // get from collection index
if (useCollectionIndex) { if (useCollectionIndex) {
if (container == null) { if (container == null) {
container = collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime); container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else { } else {
container.add(collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1); container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
} }
} }
// get from assortments // get from assortments
if (container == null) { if (container == null) {
container = assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime); container = assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else { } else {
// add containers from assortment cluster // add containers from assortment cluster
container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1); container.add(assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
} }
// get from backend // get from backend
@ -355,14 +356,14 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
maxTime = maxTime - (System.currentTimeMillis() - start); maxTime = maxTime - (System.currentTimeMillis() - start);
if (maxTime < 0) maxTime = 100; if (maxTime < 0) maxTime = 100;
} }
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1); container.add(backend.getContainer(wordHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1);
return container; return container;
} }
public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
// retrieve entities that belong to the hashes // retrieve entities that belong to the hashes
HashSet containers = new HashSet(); HashMap containers = new HashMap();
String singleHash; String singleHash;
indexContainer singleContainer; indexContainer singleContainer;
Iterator i = wordHashes.iterator(); Iterator i = wordHashes.iterator();
@ -378,12 +379,12 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
singleHash = (String) i.next(); singleHash = (String) i.next();
// retrieve index // retrieve index
singleContainer = getContainer(singleHash, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result // check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashSet(); if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
containers.add(singleContainer); containers.put(singleHash, singleContainer);
} }
return containers; return containers;
} }

@ -295,17 +295,17 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return initialSize - urlHashes.size(); return initialSize - urlHashes.size();
} }
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them // collect all records from all the assortments and return them
indexContainer buffer, record = new indexRowSetContainer(wordHash); indexContainer buffer, record = new indexRowSetContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) { for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].get(wordHash); buffer = assortments[i].get(wordHash);
remainingTime = limitTime - System.currentTimeMillis(); if (buffer != null) {
if (0 > remainingTime) break; buffer.select(urlselection);
if (buffer != null) record.add(buffer, remainingTime); record.add(buffer, -1);
}
if (System.currentTimeMillis() > timeout) break;
} }
return record; return record;
} }

@ -99,7 +99,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
} }
public Object next() { public Object next() {
return getContainer((String) wordIterator.next(), true, 100); return getContainer((String) wordIterator.next(), null, true, 100);
} }
public void remove() { public void remove() {
@ -225,7 +225,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
} }
} }
public synchronized indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists()) { if (plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists()) {
@ -235,7 +235,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
Iterator i = entity.elements(true); Iterator i = entity.elements(true);
while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) { while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) {
entry = (indexEntry) i.next(); entry = (indexEntry) i.next();
container.add(entry); if ((urlselection == null) || (urlselection.contains(entry.urlHash()))) container.add(entry);
} }
return container; return container;
} else { } else {

Loading…
Cancel
Save