- fixed a null pointer exception bug

- switched off more write caches
- re-enabled index-abstracts search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2885 6c8d7289-2bf4-0310-a012-ef5d649a1542
orbiter 19 years ago
parent 194d42b6a7
commit bd4f43cd66

@ -54,7 +54,7 @@ globalheader();
<p><b>Latest Release:</b>
The latest YaCy release version is 0.48<br>
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.de/">http://latest.yacy-forum.de/</a>.<br>
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.net">http://latest.yacy-forum.de/</a>.<br>
<li>Generic release of YaCy (all platforms with J2SE 1.4.2: Linux, Mac OS X, Windows, Solaris):</li>

@ -56,7 +56,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
if (indexfile.exists()) {
// use existing index file
System.out.println("*** Using File index " + indexfile);
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
RAMIndex = false;
} else if ((preloadTime >= 0) && (stt > preloadTime)) {
// generate new index file
@ -127,21 +127,25 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException {
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node;
kelondroRow.Entry indexentry;
int i;
int i, c = 0, all = super.col[0].size();
long start = System.currentTimeMillis();
long last = start;
while (content.hasNext()) {
node = (kelondroRecords.Node) content.next();
i = node.handle().hashCode();
indexentry = treeindex.row().newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setCol(1, i);
if ((i % 10000) == 0) {
if (System.currentTimeMillis() - last > 30000) {
System.out.println(".. generated " + c+ " entries, " + ((System.currentTimeMillis() - start) / c * (all - c) / 60000) + " minutes remaining");
last = System.currentTimeMillis();
return treeindex;

@ -44,6 +44,8 @@ public class URL {
public void parseURLString(String url) throws MalformedURLException {
// identify protocol
assert (url != null);
url = url.trim();
int p = url.indexOf(':');
if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'");
this.protocol = url.substring(0, p).toLowerCase().trim();
@ -104,6 +106,7 @@ public class URL {
public URL(URL baseURL, String relPath) throws MalformedURLException {
if (baseURL == null) throw new MalformedURLException("base URL is null");
if (relPath == null) throw new MalformedURLException("relPath is null");
int p = relPath.indexOf(':');
String relprotocol = (p < 0) ? null : relPath.substring(0, p).toLowerCase();
if (relprotocol != null) {

@ -117,7 +117,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
public plasmaCrawlLURLOldEntry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException {
try {
this.urlHash = entry.getColString(0, null);
this.url = new URL(entry.getColString(1, "UTF-8").trim());
this.url = new URL(entry.getColString(1, "UTF-8"));
this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
this.moddate = new Date(86400000 * entry.getColLong(3));
this.loaddate = new Date(86400000 * entry.getColLong(4));

@ -93,7 +93,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.snippetCache = snippetCache;
this.rcContainers = new indexContainer(null);
this.rcContainerFlushCount = 0;
this.rcAbstracts = new TreeMap();
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
this.postsort = postsort;
@ -134,7 +134,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// do a global search
// the result of the fetch is then in the rcGlobal
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2;
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts,
@ -144,7 +144,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
Map searchContainerMap = localSearchContainers(null);
// use the search containers to fill up rcAbstracts locally
if (searchContainerMap != null) {
if ((rcAbstracts != null) && (searchContainerMap != null)) {
Iterator i, ci = searchContainerMap.entrySet().iterator();
Map.Entry entry;
String wordhash;
@ -165,20 +166,19 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
prefetchLocal(rcLocal, secondaryTimeout);
// evaluate index abstracts and start a secondary search
// this is temporary debugging code to learn that the index abstracts are fetched correctly
while (System.currentTimeMillis() < secondaryTimeout + 10000) {
while (System.currentTimeMillis() < secondaryTimeout) {
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
if (query.size() > 1) prepareSecondarySearch();
// evaluate index abstracts and start a secondary search
if (rcAbstracts != null) prepareSecondarySearch();
// catch up global results:
// wait until primary timeout passed
@ -187,6 +187,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
int globalContributions = rcContainers.size();
// finished searching
@ -243,19 +244,22 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
String url, urls, peer, peers;
String mypeerhash = yacyCore.seedDB.mySeed.hash;
boolean mypeerinvolved = false;
int mypeercount;
while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next();
url = (String) entry1.getKey();
peers = (String) entry1.getValue();
System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
mypeercount = 0;
for (int j = 0; j < peers.length(); j = j + 12) {
peer = peers.substring(j, j + 12);
if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
if ((peer.equals(mypeerhash)) && (mypeercount++ > 1)) continue;
//if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
urls = (String) secondarySearchURLs.get(peer);
urls = (urls == null) ? url : urls + url;
secondarySearchURLs.put(peer, urls);
if (peer.equals(mypeerhash)) mypeerinvolved = true;
if (mypeercount == 1) mypeerinvolved = true;
// compute words for secondary search and start the secondary searches
@ -269,8 +273,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself
urls = (String) entry1.getValue();
words = wordsFromPeer(peer, urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " from words: " + words);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, urlStore, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
profileGlobal, ranking);

@ -433,7 +433,8 @@ public final class yacyClient {
obj.put("maxdist", maxDistance);
obj.put("rankingProfile", rankingProfile.toExternalString());
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
if (abstractCache != null) obj.put("abstracts", "auto");
//yacyCore.log.logDebug("yacyClient.search url=" + url);
final long timestamp = System.currentTimeMillis();
@ -553,26 +554,27 @@ public final class yacyClient {
for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); }
// read index abstract
Iterator i = result.entrySet().iterator();
Map.Entry entry;
TreeMap singleAbstract;
String wordhash;
serverByteBuffer ci;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
if (((String) entry.getKey()).startsWith("indexabstract.")) {
wordhash = ((String) entry.getKey()).substring(14);
synchronized (abstractCache) {
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
if (abstractCache != null) {
Iterator i = result.entrySet().iterator();
Map.Entry entry;
TreeMap singleAbstract;
String wordhash;
serverByteBuffer ci;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
if (((String) entry.getKey()).startsWith("indexabstract.")) {
wordhash = ((String) entry.getKey()).substring(14);
synchronized (abstractCache) {
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
// generate statistics
long searchtime;
try {

@ -237,6 +237,7 @@ public class yacySearch extends Thread {
if (searchThreads == null) return 0;
int alive = 0;
for (int i = 0; i < searchThreads.length; i++) {
if (searchThreads == null) break; // may occur
if (searchThreads[i].isAlive()) alive++;
return alive;
