several bugfixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3899 6c8d7289-2bf4-0310-a012-ef5d649a1542
18 years ago · c7a614830a
parent 465145cb6f
commit c7a614830a
9 changed files with 40 additions and 19 deletions
--- a/htroot/AccessTracker_p.java
+++ b/htroot/AccessTracker_p.java
@ -72,7 +72,7 @@ public class AccessTracker_p {
                prop.put("page_list_" + entCount + "_countHour", access.tailMap(new Long(System.currentTimeMillis() - 1000 * 60 * 60)).size());
                entCount++;
            }
-            } catch (ConcurrentModificationException e) {} // we dont want to serialize this
+            } catch (ConcurrentModificationException e) {} // we dont want to synchronize this
            prop.put("page_list", entCount);
            prop.put("page_num", entCount);
        }
@ -84,6 +84,7 @@ public class AccessTracker_p {
            if (host.length() > 0) {
                access = switchboard.accessTrack(host);
                if (access != null) {
                    try {
                    Iterator ii = access.entrySet().iterator();
                    while (ii.hasNext()) {
                        entry = (Map.Entry) ii.next();
@ -91,13 +92,15 @@ public class AccessTracker_p {
                        prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
                        prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
                        entCount++;
-                    }
+                    }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
                }
            } else {
                Iterator i = switchboard.accessHosts();
                while ((entCount < maxCount) && (i.hasNext())) {
                    host = (String) i.next();
                    access = switchboard.accessTrack(host);
                    try {
                    Iterator ii = access.entrySet().iterator();
                    while (ii.hasNext()) {
                        entry = (Map.Entry) ii.next();
@ -105,7 +108,8 @@ public class AccessTracker_p {
                        prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
                        prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
                        entCount++;
-                    }
+                    }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
                }
            }
            prop.put("page_list", entCount);
@ -149,6 +153,7 @@ public class AccessTracker_p {
            TreeSet handles;
            int entCount = 0;
            Map.Entry entry;
            try {
            while ((entCount < maxCount) && (i.hasNext())) {
                entry = (Map.Entry) i.next();
                host = (String) entry.getKey();
@ -177,6 +182,7 @@ public class AccessTracker_p {
                // next
                entCount++;
            }
            } catch (ConcurrentModificationException e) {} // we dont want to synchronize this
            prop.put("page_list", entCount);
            prop.put("page_num", entCount);
            prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
--- a/htroot/CrawlStartExpert_p.html
+++ b/htroot/CrawlStartExpert_p.html
@ -16,7 +16,7 @@
    You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
    </p>
-    <form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
+    <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
      <table border="0" cellpadding="5" cellspacing="1">
        <tr class="TableHeader">
          <td><strong>Attribut</strong></td>
--- a/htroot/CrawlStartExpert_p.java
+++ b/htroot/CrawlStartExpert_p.java
@ -48,7 +48,7 @@ public class CrawlStartExpert_p {
        prop.put("crawlingIfOlderUnitHourCheck", 0);
        prop.put("crawlingIfOlderUnitMinuteCheck", 0);
        if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
-            prop.put("crawlingIfOlderNumber", -1);
+            prop.put("crawlingIfOlderNumber", 1);
            prop.put("crawlingIfOlderUnitYearCheck", 1);
        } else if (crawlingIfOlder >= 60*24*365) {
            prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));
--- a/htroot/CrawlStartSimple_p.html
+++ b/htroot/CrawlStartSimple_p.html
@ -19,7 +19,7 @@
    This is repeated as long as specified under "Crawling Depth".
    </p>
-    <form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
+    <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
      <input type="hidden" name="crawlingFilter" value=".*" />
      <input type="hidden" name="crawlingIfOlderCheck" value="off" />
      <input type="hidden" name="crawlingDomFilterCheck" value="off" />
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@ -137,7 +137,7 @@ public class IndexControl_p {
            }
            if (delurl || delurlref) {
                for (int i = 0; i < urlx.length; i++) {
-                    switchboard.wordIndex.loadedURL.remove(urlx[i]);
+                    switchboard.urlRemove(urlx[i]);
                }
            }
            switchboard.wordIndex.deleteContainer(keyhash);
@ -157,7 +157,7 @@ public class IndexControl_p {
            }
            if (delurl || delurlref) {
                for (int i = 0; i < urlx.length; i++) {
-                    switchboard.wordIndex.loadedURL.remove(urlx[i]);
+                    switchboard.urlRemove(urlx[i]);
                }
            }
            Set urlHashes = new HashSet();
@ -190,7 +190,17 @@ public class IndexControl_p {
            } else {
                urlstring = entry.comp().url().toNormalform();
                prop.put("urlstring", "");
-                switchboard.wordIndex.loadedURL.remove(urlhash);
+                switchboard.urlRemove(urlhash);
                prop.put("result", "Removed URL " + urlstring);
            }
        }
        if (post.containsKey("urldelete")) {
            urlhash = plasmaURL.urlHash(urlstring);
            if ((urlhash == null) || (urlstring == null)) {
                prop.put("result", "No input given; nothing deleted.");
            } else {
                switchboard.urlRemove(urlhash);
                prop.put("result", "Removed URL " + urlstring);
            }
        }
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@ -346,8 +346,8 @@ public class plasmaCrawlProfile {
            // an antry must have to be re-crawled
            String r = (String) mem.get(RECRAWL_IF_OLDER);
            if (r == null) return Long.MAX_VALUE; else try {
-                long l = Long.parseLong(r) * ((long) 60000);
+                long l = Long.parseLong(r) * 60000L;
-                if (l < 0) return Long.MAX_VALUE; else return l;
+                return (l < 0) ? Long.MAX_VALUE : l;
            } catch (NumberFormatException e) {
                return 0;
            }
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@ -379,8 +379,8 @@ public final class plasmaCrawlStacker {
        String dbocc = this.sb.urlExists(nexturlhash);
        indexURLEntry oldEntry = null;
        oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null);
-        boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
+        boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder());
-        // FIXME: this does not work correctly?
+        // apply recrawl rule
        if ((dbocc != null) && (!(recrawl))) {
            reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
            //this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -1423,6 +1423,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        return null;
    }
    public void urlRemove(String hash) {
        wordIndex.loadedURL.remove(hash);
        noticeURL.remove(hash);
        delegatedURL.remove(hash);
        errorURL.remove(hash);
    }
    public URL getURL(String urlhash) throws IOException {
        if (urlhash.equals(plasmaURL.dummyHash)) return null;
        plasmaCrawlEntry ne = noticeURL.get(urlhash);
--- a/source/de/anomic/server/serverMemory.java
+++ b/source/de/anomic/server/serverMemory.java
@ -119,17 +119,15 @@ public class serverMemory {
     * @return whether enough memory could be freed (or is free) or not
     */
    public static boolean request(final long size, final boolean force) {
-        long avail;
+        long avail = available();
        if (avail >= size) return true;
        if (log.isFine()) {
            String t = new Throwable("Stack trace").getStackTrace()[1].toString();
            avail = available();
            log.logFine(t + " requested " + (size >>> 10) + " KB, got " + (avail >>> 10) + " KB");
-        } else {
+        } 
            avail = available();
        }
        if (avail >= size) return true;
        final long avg = getAverageGCFree();
        if (force || avg == 0 || avg + avail >= size) {
            // this is only called if we expect that an allocation of <size> bytes would cause the jvm to call the GC anyway
            final long freed = runGC(!force);
            avail = available();
            log.logInfo("performed " + ((force) ? "explicit" : "necessary") + " GC, freed " + (freed >>> 10)