diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java
index e1af6c413..cbaf95ac4 100644
--- a/htroot/CacheAdmin_p.java
+++ b/htroot/CacheAdmin_p.java
@@ -97,7 +97,7 @@ public class CacheAdmin_p {
// generate sorted dir/file listing
final String[] list = dir.list();
- final StringBuffer tree = new StringBuffer((list.length + 2) * 256);
+ final StringBuffer tree = new StringBuffer((list == null) ? 70 : (list.length + 2) * 256);
tree.append("Directory of
").append((pathString.length() == 0) ? "domain list" : linkPathString(pathString)).append("
");
if (list == null) {
tree.append("[empty]");
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 2fb4389f4..79126b664 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -47,6 +47,7 @@
// if the shell's current path is HTROOT
import java.io.IOException;
+import java.net.MalformedURLException;
import java.net.URL;
import java.util.Enumeration;
import java.util.HashSet;
@@ -91,10 +92,10 @@ public class IndexControl_p {
}
// default values
- String keystring = ((String) post.get("keystring")).trim();
- String keyhash = ((String) post.get("keyhash")).trim();
- String urlstring = ((String) post.get("urlstring")).trim();
- String urlhash = ((String) post.get("urlhash")).trim();
+ String keystring = ((String) post.get("keystring", "")).trim();
+ String keyhash = ((String) post.get("keyhash", "")).trim();
+ String urlstring = ((String) post.get("urlstring", "")).trim();
+ String urlhash = ((String) post.get("urlhash", "")).trim();
if (!urlstring.startsWith("http://") &&
!urlstring.startsWith("https://")) { urlstring = "http://" + urlstring; }
@@ -166,9 +167,7 @@ public class IndexControl_p {
}
}
if (delurlref) {
- for (int i = 0; i < urlx.length; i++) try {
- switchboard.removeAllUrlReferences(urlx[i], true);
- } catch (IOException e) {}
+ for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
}
if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) {
@@ -188,9 +187,7 @@ public class IndexControl_p {
// delete selected URLs
if (post.containsKey("keyhashdelete")) {
if (delurlref) {
- for (int i = 0; i < urlx.length; i++) try {
- switchboard.removeAllUrlReferences(urlx[i], true);
- } catch (IOException e) {}
+ for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
}
if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) {
@@ -211,12 +208,12 @@ public class IndexControl_p {
}
if (post.containsKey("urlhashdeleteall")) {
- try {
+ //try {
int i = switchboard.removeAllUrlReferences(urlhash, true);
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
- } catch (IOException e) {
- prop.put("result", "Deleted nothing because the url-hash could not be resolved");
- }
+ //} catch (IOException e) {
+ // prop.put("result", "Deleted nothing because the url-hash could not be resolved");
+ //}
}
if (post.containsKey("urlhashdelete")) {
@@ -311,11 +308,7 @@ public class IndexControl_p {
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
result.append("").append(hash).append(" ")
.append(((i + 1) % 8 == 0) ? "
" : "");
i++;
@@ -326,12 +319,15 @@ public class IndexControl_p {
if (post.containsKey("urlstringsearch")) {
try {
URL url = new URL(urlstring);
- urlhash = plasmaURL.urlHash(url);
- prop.put("urlhash", urlhash);
+ urlhash = plasmaURL.urlHash(url);
+ prop.put("urlhash", urlhash);
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.getEntry(urlhash);
prop.put("result", genUrlProfile(switchboard, entry, urlhash));
- } catch (Exception e) {
- prop.put("urlstring", "wrong url: " + urlstring);
+ } catch (MalformedURLException e) {
+ prop.put("urlstring", "bad url: " + urlstring);
+ prop.put("urlhash", "");
+ } catch (IOException e) {
+ prop.put("urlstring", "unknown url: " + urlstring);
prop.put("urlhash", "");
}
}
@@ -356,7 +352,10 @@ public class IndexControl_p {
int i = 0;
while (hashIt.hasNext() && i < 256) {
hash = (String) hashIt.next();
- result.append("").append(hash).append(" ").append(((i + 1) % 8 == 0) ? "
" : "");
+ result.append("").append(hash).append(" ")
+ .append(((i + 1) % 8 == 0) ? "
" : "");
i++;
}
prop.put("result", result.toString());
@@ -449,10 +448,10 @@ public class IndexControl_p {
final TreeMap tm = new TreeMap();
while (en.hasNext()) {
uh = ((plasmaWordIndexEntry)en.next()).getUrlHash();
- if (switchboard.urlPool.loadedURL.exists(uh)) {
+ try {
us = switchboard.urlPool.loadedURL.getEntry(uh).url().toString();
tm.put(us, uh);
- } else {
+ } catch (IOException e) {
tm.put("", uh);
}
}
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java
index b434ec579..d2a13a6bd 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@@ -468,6 +468,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
// if the url cannot be found, this returns null
this.urlHash = urlHash;
byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
+ if (entry == null) throw new IOException("url hash " + urlHash + " not found in LURL");
try {
if (entry != null) {
this.url = new URL(new String(entry[1]).trim());
diff --git a/source/de/anomic/plasma/plasmaDbImporter.java b/source/de/anomic/plasma/plasmaDbImporter.java
index 9545906cb..e6170b00a 100644
--- a/source/de/anomic/plasma/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/plasmaDbImporter.java
@@ -1,6 +1,7 @@
package de.anomic.plasma;
import java.io.File;
+import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;
@@ -247,17 +248,16 @@ public class plasmaDbImporter extends Thread {
entryCounter++;
importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next();
String urlHash = importWordIdxEntry.getUrlHash();
- if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) {
- urlCounter++;
-
+ if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) try {
// importing the new url
plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.getEntry(urlHash);
+ urlCounter++;
this.homeUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) {
this.log.logFine(urlCounter + " URLs processed so far.");
}
- }
+ } catch (IOException e) {}
// adding word index entity to container
newContainer.add(importWordIdxEntry,System.currentTimeMillis());
diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java
index a01130e21..92f1b5b6d 100644
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@@ -217,27 +217,31 @@ public final class plasmaSearchEvent {
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_PRESORT, rcLocal.size());
profileLocal.startTimer();
- plasmaSearchResult acc = new plasmaSearchResult(query);
- if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
+ plasmaSearchResult acc = new plasmaSearchResult(query);
+ if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
if (searchResult.size() == 0) return acc; // case that we have nothing to do
// start url-fetch
- plasmaWordIndexEntry entry;
+ plasmaWordIndexEntry entry;
long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + postorderTime;
plasmaCrawlLURL.Entry page;
int minEntries = profileLocal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT);
- try {
- while (preorder.hasNext()) {
+ try {
+ while (preorder.hasNext()) {
if ((acc.sizeFetched() >= minEntries) && (System.currentTimeMillis() >= postorderLimitTime)) break;
entry = preorder.next();
// find the url entry
- page = urlStore.getEntry(entry.getUrlHash());
- // add a result
- acc.addResult(entry, page);
- }
- } catch (kelondroException ee) {
- serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
- }
+ try {
+ page = urlStore.getEntry(entry.getUrlHash());
+ // add a result
+ acc.addResult(entry, page);
+ } catch (IOException e) {
+ // result was not found
+ }
+ }
+ } catch (kelondroException ee) {
+ serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
+ }
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_URLFETCH);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_URLFETCH, acc.sizeFetched());
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 2b038ab45..2d117e851 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1221,12 +1221,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// create index
String descr = document.getMainLongTitle();
String referrerHash;
- try {
- URL referrerURL = entry.referrerURL();
- referrerHash = plasmaURL.urlHash(referrerURL);
- } catch (IOException e) {
- referrerHash = plasmaURL.dummyHash;
- }
+ URL referrerURL = entry.referrerURL();
+ referrerHash = plasmaURL.urlHash(referrerURL);
+ if (referrerHash == null) referrerHash = plasmaURL.dummyHash;
+
String noIndexReason = "unspecified";
if (processCase == 4) {
// proxy-load
@@ -1825,26 +1823,32 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// method for index deletion
- public int removeAllUrlReferences(URL url, boolean fetchOnline) throws IOException {
+ public int removeAllUrlReferences(URL url, boolean fetchOnline) {
return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline);
}
- public int removeAllUrlReferences(String urlhash, boolean fetchOnline) throws IOException {
+ public int removeAllUrlReferences(String urlhash, boolean fetchOnline) {
// find all the words in a specific resource and remove the url reference from every word index
// finally, delete the url entry
// determine the url string
- plasmaCrawlLURL.Entry entry = urlPool.loadedURL.getEntry(urlhash);
- URL url = entry.url();
- if (url == null) return 0;
- // get set of words
- //Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
- Set words = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline)).getText());
- // delete all word references
- int count = removeReferences(urlhash, words);
- // finally delete the url entry itself
- urlPool.loadedURL.remove(urlhash);
- return count;
+ try {
+ plasmaCrawlLURL.Entry entry = urlPool.loadedURL.getEntry(urlhash);
+ URL url = entry.url();
+ if (url == null)
+ return 0;
+ // get set of words
+ // Set words = plasmaCondenser.getWords(getText(getResource(url,
+ // fetchOnline)));
+ Set words = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline)).getText());
+ // delete all word references
+ int count = removeReferences(urlhash, words);
+ // finally delete the url entry itself
+ urlPool.loadedURL.remove(urlhash);
+ return count;
+ } catch (IOException e) {
+ return 0;
+ }
}
public int removeReferences(URL url, Set words) {
diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
index aaf5ca73f..84a9746e1 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@@ -286,10 +286,15 @@ public class plasmaSwitchboardQueue {
return responseHeader;
}
- public URL referrerURL() throws IOException {
+ public URL referrerURL() {
if (referrerURL == null) {
if ((referrerHash == null) || (referrerHash.equals(plasmaURL.dummyHash))) return null;
- referrerURL = lurls.getEntry(referrerHash).url();
+ try {
+ referrerURL = lurls.getEntry(referrerHash).url();
+ } catch (IOException e) {
+ referrerURL = null;
+ return null;
+ }
}
return referrerURL;
}
diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java
index 0c8fdaf85..fe4d9bec3 100644
--- a/source/de/anomic/plasma/plasmaURL.java
+++ b/source/de/anomic/plasma/plasmaURL.java
@@ -473,6 +473,7 @@ public class plasmaURL {
}
public static final String urlHash(String url) {
+ if ((url == null) || (url.length() == 0)) return null;
try {
return urlHash(new URL(url));
} catch (MalformedURLException e) {
@@ -542,7 +543,7 @@ public class plasmaURL {
}
public Iterator urlHashes(String urlHash, boolean up) {
- return urlHashCache.rows(up, false, urlHash.getBytes());
+ return urlHashCache.keys(up, false, urlHash.getBytes());
}
}
diff --git a/source/de/anomic/plasma/plasmaURLPool.java b/source/de/anomic/plasma/plasmaURLPool.java
index ed3d96465..dbd5e9a31 100644
--- a/source/de/anomic/plasma/plasmaURLPool.java
+++ b/source/de/anomic/plasma/plasmaURLPool.java
@@ -75,8 +75,10 @@ public class plasmaURLPool {
if (urlhash.equals(plasmaURL.dummyHash)) return null;
plasmaCrawlNURL.Entry ne = noticeURL.getEntry(urlhash);
if (ne != null) return ne.url();
- plasmaCrawlLURL.Entry le = loadedURL.getEntry(urlhash);
- if (le != null) return le.url();
+ try {
+ plasmaCrawlLURL.Entry le = loadedURL.getEntry(urlhash);
+ if (le != null) return le.url();
+ } catch (IOException e) {}
plasmaCrawlEURL.Entry ee = errorURL.getEntry(urlhash);
if (ee != null) return ee.url();
return null;
diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
index b1ff11f12..f32409ebd 100644
--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@@ -330,12 +330,16 @@ public final class plasmaWordIndexDistribution {
urlIter = indexEntity.elements(true);
unknownURLEntries.clear();
while (urlIter.hasNext()) {
- indexEntry = (plasmaWordIndexEntry) urlIter.next();
- lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
- if ((lurl == null) || (lurl.url() == null)) {
+ indexEntry = (plasmaWordIndexEntry) urlIter.next();
+ try {
+ lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
+ if ((lurl == null) || (lurl.url() == null)) {
+ unknownURLEntries.add(indexEntry.getUrlHash());
+ } else {
+ knownURLs.put(indexEntry.getUrlHash(), lurl);
+ }
+ } catch (IOException e) {
unknownURLEntries.add(indexEntry.getUrlHash());
- } else {
- knownURLs.put(indexEntry.getUrlHash(), lurl);
}
}
// now delete all entries that have no url entry
@@ -367,13 +371,17 @@ public final class plasmaWordIndexDistribution {
unknownURLEntries.clear();
while ((urlIter.hasNext()) && (count > 0)) {
indexEntry = (plasmaWordIndexEntry) urlIter.next();
- lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
- if ((lurl == null) || (lurl.url()==null)) {
+ try {
+ lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
+ if ((lurl == null) || (lurl.url()==null)) {
+ unknownURLEntries.add(indexEntry.getUrlHash());
+ } else {
+ knownURLs.put(indexEntry.getUrlHash(), lurl);
+ tmpEntity.addEntry(indexEntry);
+ count--;
+ }
+ } catch (IOException e) {
unknownURLEntries.add(indexEntry.getUrlHash());
- } else {
- knownURLs.put(indexEntry.getUrlHash(), lurl);
- tmpEntity.addEntry(indexEntry);
- count--;
}
}
// now delete all entries that have no url entry
diff --git a/source/de/anomic/server/serverDate.java b/source/de/anomic/server/serverDate.java
index db2ccb70f..5fc05866d 100644
--- a/source/de/anomic/server/serverDate.java
+++ b/source/de/anomic/server/serverDate.java
@@ -166,6 +166,7 @@ public final class serverDate {
if (utime < utimeyearsacc[years]) years--; // the correction
long remain = utime - utimeyearsacc[years];
months = (int) (remain / (29 * dayMillis)); // a guess
+ if (months > 11) months = 11;
if ((years & 3) == 0) {
if (remain < dimleapacc[months]) months--; // correction
remain = remain - dimleapacc[months];
diff --git a/source/yacy.java b/source/yacy.java
index 62147f23f..e4cc57b79 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -797,17 +797,16 @@ public final class yacy {
entryCounter++;
importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next();
String urlHash = importWordIdxEntry.getUrlHash();
- if ((importUrlDB.exists(urlHash)) && (!homeUrlDB.exists(urlHash))) {
- urlCounter++;
-
+ if ((importUrlDB.exists(urlHash)) && (!homeUrlDB.exists(urlHash))) try {
// importing the new url
plasmaCrawlLURL.Entry urlEntry = importUrlDB.getEntry(urlHash);
+ urlCounter++;
homeUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) {
log.logFine(urlCounter + " URLs processed so far.");
}
- }
+ } catch (IOException e) {}
// adding word index entity to container
newContainer.add(importWordIdxEntry,System.currentTimeMillis());
@@ -906,14 +905,14 @@ public final class yacy {
while (wordIdxEntries.hasNext()) {
wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next();
String urlHash = wordIdxEntry.getUrlHash();
- if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) {
- urlCounter++;
+ if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
plasmaCrawlLURL.Entry urlEntry = currentUrlDB.getEntry(urlHash);
+ urlCounter++;
/*plasmaCrawlLURL.Entry newEntry =*/ minimizedUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) {
log.logInfo(urlCounter + " URLs found so far.");
}
- }
+ } catch (IOException e) {}
}
// we have read all elements, now we can close it
wordIdxEntity.close(); wordIdxEntity = null;