From c7a614830af9d6cca94a0e7ec9de0b2315602c4a Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Fri, 15 Jun 2007 17:45:49 +0000
Subject: [PATCH] several bugfixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3899 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/AccessTracker_p.java                     | 12 +++++++++---
 htroot/CrawlStartExpert_p.html                  |  2 +-
 htroot/CrawlStartExpert_p.java                  |  2 +-
 htroot/CrawlStartSimple_p.html                  |  2 +-
 htroot/IndexControl_p.java                      | 16 +++++++++++++---
 source/de/anomic/plasma/plasmaCrawlProfile.java |  4 ++--
 source/de/anomic/plasma/plasmaCrawlStacker.java |  4 ++--
 source/de/anomic/plasma/plasmaSwitchboard.java  |  7 +++++++
 source/de/anomic/server/serverMemory.java       | 10 ++++------
 9 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java
index 7a6874269..169c2f0af 100644
--- a/htroot/AccessTracker_p.java
+++ b/htroot/AccessTracker_p.java
@@ -72,7 +72,7 @@ public class AccessTracker_p {
                 prop.put("page_list_" + entCount + "_countHour", access.tailMap(new Long(System.currentTimeMillis() - 1000 * 60 * 60)).size());
                 entCount++;
             }
-            } catch (ConcurrentModificationException e) {} // we dont want to serialize this
+            } catch (ConcurrentModificationException e) {} // we dont want to synchronize this
             prop.put("page_list", entCount);
             prop.put("page_num", entCount);
         }
@@ -84,6 +84,7 @@ public class AccessTracker_p {
             if (host.length() > 0) {
                 access = switchboard.accessTrack(host);
                 if (access != null) {
+                    try {
                     Iterator ii = access.entrySet().iterator();
                     while (ii.hasNext()) {
                         entry = (Map.Entry) ii.next();
@@ -91,13 +92,15 @@ public class AccessTracker_p {
                         prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
                         prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
                         entCount++;
-                    }
+                    }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
+                    
                 }
             } else {
                 Iterator i = switchboard.accessHosts();
                 while ((entCount < maxCount) && (i.hasNext())) {
                     host = (String) i.next();
                     access = switchboard.accessTrack(host);
+                    try {
                     Iterator ii = access.entrySet().iterator();
                     while (ii.hasNext()) {
                         entry = (Map.Entry) ii.next();
@@ -105,7 +108,8 @@ public class AccessTracker_p {
                         prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
                         prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
                         entCount++;
-                    }
+                    }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
+                    
                 }
             }
             prop.put("page_list", entCount);
@@ -149,6 +153,7 @@ public class AccessTracker_p {
             TreeSet handles;
             int entCount = 0;
             Map.Entry entry;
+            try {
             while ((entCount < maxCount) && (i.hasNext())) {
                 entry = (Map.Entry) i.next();
                 host = (String) entry.getKey();
@@ -177,6 +182,7 @@ public class AccessTracker_p {
                 // next
                 entCount++;
             }
+            } catch (ConcurrentModificationException e) {} // we dont want to synchronize this
             prop.put("page_list", entCount);
             prop.put("page_num", entCount);
             prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
diff --git a/htroot/CrawlStartExpert_p.html b/htroot/CrawlStartExpert_p.html
index 60e0fa073..f5ca9628b 100644
--- a/htroot/CrawlStartExpert_p.html
+++ b/htroot/CrawlStartExpert_p.html
@@ -16,7 +16,7 @@
     You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
     </p>
     
-    <form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
+    <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
       <table border="0" cellpadding="5" cellspacing="1">
         <tr class="TableHeader">
           <td><strong>Attribut</strong></td>
diff --git a/htroot/CrawlStartExpert_p.java b/htroot/CrawlStartExpert_p.java
index dc825b869..cfa5cb92b 100644
--- a/htroot/CrawlStartExpert_p.java
+++ b/htroot/CrawlStartExpert_p.java
@@ -48,7 +48,7 @@ public class CrawlStartExpert_p {
         prop.put("crawlingIfOlderUnitHourCheck", 0);
         prop.put("crawlingIfOlderUnitMinuteCheck", 0);
         if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
-            prop.put("crawlingIfOlderNumber", -1);
+            prop.put("crawlingIfOlderNumber", 1);
             prop.put("crawlingIfOlderUnitYearCheck", 1);
         } else if (crawlingIfOlder >= 60*24*365) {
             prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));
diff --git a/htroot/CrawlStartSimple_p.html b/htroot/CrawlStartSimple_p.html
index 57b85eb1e..a86c0a4de 100644
--- a/htroot/CrawlStartSimple_p.html
+++ b/htroot/CrawlStartSimple_p.html
@@ -19,7 +19,7 @@
     This is repeated as long as specified under "Crawling Depth".
     </p>
     
-    <form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
+    <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
       <input type="hidden" name="crawlingFilter" value=".*" />
       <input type="hidden" name="crawlingIfOlderCheck" value="off" />
       <input type="hidden" name="crawlingDomFilterCheck" value="off" />
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 4aa95927a..e2e8ca31c 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -137,7 +137,7 @@ public class IndexControl_p {
             }
             if (delurl || delurlref) {
                 for (int i = 0; i < urlx.length; i++) {
-                    switchboard.wordIndex.loadedURL.remove(urlx[i]);
+                    switchboard.urlRemove(urlx[i]);
                 }
             }
             switchboard.wordIndex.deleteContainer(keyhash);
@@ -157,7 +157,7 @@ public class IndexControl_p {
             }
             if (delurl || delurlref) {
                 for (int i = 0; i < urlx.length; i++) {
-                    switchboard.wordIndex.loadedURL.remove(urlx[i]);
+                    switchboard.urlRemove(urlx[i]);
                 }
             }
             Set urlHashes = new HashSet();
@@ -190,7 +190,17 @@ public class IndexControl_p {
             } else {
                 urlstring = entry.comp().url().toNormalform();
                 prop.put("urlstring", "");
-                switchboard.wordIndex.loadedURL.remove(urlhash);
+                switchboard.urlRemove(urlhash);
+                prop.put("result", "Removed URL " + urlstring);
+            }
+        }
+
+        if (post.containsKey("urldelete")) {
+            urlhash = plasmaURL.urlHash(urlstring);
+            if ((urlhash == null) || (urlstring == null)) {
+                prop.put("result", "No input given; nothing deleted.");
+            } else {
+                switchboard.urlRemove(urlhash);
                 prop.put("result", "Removed URL " + urlstring);
             }
         }
diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java
index a5b1f08ed..089b22d00 100644
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@@ -346,8 +346,8 @@ public class plasmaCrawlProfile {
             // an antry must have to be re-crawled
             String r = (String) mem.get(RECRAWL_IF_OLDER);
             if (r == null) return Long.MAX_VALUE; else try {
-                long l = Long.parseLong(r) * ((long) 60000);
-                if (l < 0) return Long.MAX_VALUE; else return l;
+                long l = Long.parseLong(r) * 60000L;
+                return (l < 0) ? Long.MAX_VALUE : l;
             } catch (NumberFormatException e) {
                 return 0;
             }
diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java
index 79aaf4c80..8122f8fad 100644
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@@ -379,8 +379,8 @@ public final class plasmaCrawlStacker {
         String dbocc = this.sb.urlExists(nexturlhash);
         indexURLEntry oldEntry = null;
         oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null);
-        boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
-        // FIXME: this does not work correctly?
+        boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder());
+        // apply recrawl rule
         if ((dbocc != null) && (!(recrawl))) {
             reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
             //this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index ab4e222de..38db5cdf8 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1423,6 +1423,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         return null;
     }
     
+    public void urlRemove(String hash) {
+        wordIndex.loadedURL.remove(hash);
+        noticeURL.remove(hash);
+        delegatedURL.remove(hash);
+        errorURL.remove(hash);
+    }
+    
     public URL getURL(String urlhash) throws IOException {
         if (urlhash.equals(plasmaURL.dummyHash)) return null;
         plasmaCrawlEntry ne = noticeURL.get(urlhash);
diff --git a/source/de/anomic/server/serverMemory.java b/source/de/anomic/server/serverMemory.java
index 9e5a27576..a813e65b8 100644
--- a/source/de/anomic/server/serverMemory.java
+++ b/source/de/anomic/server/serverMemory.java
@@ -119,17 +119,15 @@ public class serverMemory {
      * @return whether enough memory could be freed (or is free) or not
      */
     public static boolean request(final long size, final boolean force) {
-        long avail;
+        long avail = available();
+        if (avail >= size) return true;
         if (log.isFine()) {
             String t = new Throwable("Stack trace").getStackTrace()[1].toString();
-            avail = available();
             log.logFine(t + " requested " + (size >>> 10) + " KB, got " + (avail >>> 10) + " KB");
-        } else {
-            avail = available();
-        }
-        if (avail >= size) return true;
+        } 
         final long avg = getAverageGCFree();
         if (force || avg == 0 || avg + avail >= size) {
+            // this is only called if we expect that an allocation of <size> bytes would cause the jvm to call the GC anyway
             final long freed = runGC(!force);
             avail = available();
             log.logInfo("performed " + ((force) ? "explicit" : "necessary") + " GC, freed " + (freed >>> 10)