From bead8a32aa48318aa6c47f15a55892c16426cdfd Mon Sep 17 00:00:00 2001 From: theli Date: Thu, 1 Sep 2005 07:52:46 +0000 Subject: [PATCH] *) IndexCreate_p.java: Crawler StartURLs will now also added to the errorURL-DB if an error occures on this url *) kelondroStack.java, plasmaSwitchboardQueue.java Adding method which returns a list of all entries in the queue. This list is used by IndexCreate_p.java instead of an iterator to display the indexing-list. Advantages: avoid concurrent modifications of the list while displaying it. Speedup because now we have to access only one sync function instead of multiple ones (one for each entry) *) IndexCreateIndexingQueue_p.java Using new list() function of plasmaSwitchboardQueue *) httpdFileHandler.java If a servelet returns the special value "LOCATION" the httpFileHandler does a Redirection of the Browser to the URL specified by the servelet. This can e.g. be used when a http get request is used insead of a post request, but a refresh should not be allowed. *) IndexCreateWWWLocalQueue_p.html Now it's possible to delete single entries of the local crawler queue git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@626 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateIndexingQueue_p.java | 31 ++++++++++--------- htroot/IndexCreateWWWLocalQueue_p.html | 2 ++ htroot/IndexCreateWWWLocalQueue_p.java | 6 ++++ htroot/IndexCreate_p.java | 3 ++ source/de/anomic/http/httpdFileHandler.java | 10 ++++-- source/de/anomic/kelondro/kelondroStack.java | 10 ++++++ .../anomic/plasma/plasmaSwitchboardQueue.java | 15 +++++++++ .../server/serverPortForwardingSch.java | 1 + 8 files changed, 62 insertions(+), 16 deletions(-) diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index 2c8dfc8e4..26cbeccfa 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -44,6 +44,7 @@ // if the shell's current path is HTROOT import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.Locale; import java.io.IOException; @@ -83,7 +84,7 @@ public class IndexCreateIndexingQueue_p { yacySeed initiator; boolean dark; - int i; + int i=0; if (switchboard.sbQueue.size() == 0) { prop.put("indexing-queue", 0); //is empty @@ -92,20 +93,22 @@ public class IndexCreateIndexingQueue_p { prop.put("indexing-queue_num", switchboard.sbQueue.size());//num entries in queue dark = true; plasmaSwitchboardQueue.Entry pcentry; - for (i = 0; i < switchboard.sbQueue.size(); i++) try { - pcentry = (plasmaSwitchboardQueue.Entry) switchboard.sbQueue.get(i); - if (pcentry != null) { - initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); - prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); - prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); - prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth()); - prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "null" : daydate(pcentry.responseHeader().lastModified())); - prop.put("indexing-queue_list_"+i+"_anchor", pcentry.anchorName()); - prop.put("indexing-queue_list_"+i+"_url", pcentry.normalizedURLString()); - dark = !dark; + try { + ArrayList entryList = switchboard.sbQueue.list(0); + for (i = 0; i < entryList.size(); i++) { + pcentry = (plasmaSwitchboardQueue.Entry) entryList.get(i); + if (pcentry != null) { + initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); + prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); + prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth()); + prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "null" : daydate(pcentry.responseHeader().lastModified())); + prop.put("indexing-queue_list_"+i+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName()); + prop.put("indexing-queue_list_"+i+"_url", pcentry.normalizedURLString()); + dark = !dark; + } } - } catch (IOException e) { - } + } catch (IOException e) {} prop.put("indexing-queue_list", i); } diff --git a/htroot/IndexCreateWWWLocalQueue_p.html b/htroot/IndexCreateWWWLocalQueue_p.html index d666b8b85..4274b572d 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.html +++ b/htroot/IndexCreateWWWLocalQueue_p.html @@ -29,6 +29,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most Modified Date Anchor Name URL +Delete #{list}# @@ -37,6 +38,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most #[modified]# #[anchor]# #[url]# +[Delete] #{/list}# diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index c0104e8ab..8f37ac047 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -76,6 +76,11 @@ public class IndexCreateWWWLocalQueue_p { prop.put("info", 3);//crawling queue cleared prop.put("info_numEntries", c); + } else if (post.containsKey("deleteEntry")) { + String urlHash = (String) post.get("deleteEntry"); + switchboard.urlPool.noticeURL.remove(urlHash); + prop.put("LOCATION",""); + return prop; } } @@ -101,6 +106,7 @@ public class IndexCreateWWWLocalQueue_p { prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); prop.put("crawler-queue_list_"+i+"_url", urle.url()); + prop.put("crawler-queue_list_"+i+"_hash", urle.hash()); dark = !dark; } } diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index 4a6d5e1c2..656c9b2fd 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -186,6 +186,9 @@ public class IndexCreate_p { prop.put("error", 5); //Crawling failed prop.put("error_crawlingURL", ((String) post.get("crawlingURL"))); prop.put("error_reasonString", reasonString); + + switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash, + crawlingStartURL.getHost(), reasonString, new bitfield(plasmaURL.urlFlagLength), false); } } catch (Exception e) { // mist diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index afb95f3ee..f0e77c1db 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -182,8 +182,6 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http } catch (NoSuchAlgorithmException e) { serverLog.logWarning("HTTPDFileHandler", "Content-MD5 support not availabel ..."); } - - serverLog.logConfig("HTTPDFileHandler", "File Handler Initialized"); } // private void textMessage(OutputStream out, int retcode, String body) throws IOException { @@ -436,6 +434,14 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http headers.put(httpHeader.WWW_AUTHENTICATE,"Basic realm=\"" + tp.get("AUTHENTICATE", "") + "\""); httpd.sendRespondHeader(conProp,out,httpVersion,401,headers); return; + } else if (tp.containsKey("LOCATION")) { + String location = tp.get("LOCATION",""); + if (location.length() == 0) location = path; + + httpHeader headers = getDefaultHeaders(); + headers.put(httpHeader.LOCATION,location); + httpd.sendRespondHeader(conProp,out,httpVersion,307,headers); + return; } // add the application version, the uptime and the client name to every rewrite table tp.put("version", switchboard.getConfig("version", "")); diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java index f6881f422..501c9e0a2 100644 --- a/source/de/anomic/kelondro/kelondroStack.java +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -50,7 +50,9 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.RandomAccessFile; +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedList; import java.util.StringTokenizer; public class kelondroStack extends kelondroRecords { @@ -200,6 +202,14 @@ public class kelondroStack extends kelondroRecords { return n.getValues(); } + public synchronized ArrayList botList(int dist) throws IOException { + ArrayList botList = new ArrayList(size()); + for (int i=dist; i < size(); i++) { + botList.add(bot(i)); + } + return botList; + } + private void unlinkNode(Node n) throws IOException { // join chaines over node Handle l = n.getOHHandle()[left]; diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index b6ceaa3e1..202bea467 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -45,7 +45,9 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.Date; +import java.util.LinkedList; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpHeader; @@ -110,6 +112,19 @@ public class plasmaSwitchboardQueue { return new Entry(sbQueueStack.bot(index)); } + public ArrayList list() throws IOException { + return list(0); + } + + public ArrayList list(int index) throws IOException { + if ((index < 0) || (index >= sbQueueStack.size())) throw new ArrayIndexOutOfBoundsException(); + ArrayList list = sbQueueStack.botList(index); + for (int i=0; i < list.size(); i++) { + list.set(i,new Entry((byte[][])list.get(i))); + } + return list; + } + public void close() { if (sbQueueStack != null) try { sbQueueStack.close(); diff --git a/source/de/anomic/server/serverPortForwardingSch.java b/source/de/anomic/server/serverPortForwardingSch.java index 5301476cb..a7ae1cab9 100644 --- a/source/de/anomic/server/serverPortForwardingSch.java +++ b/source/de/anomic/server/serverPortForwardingSch.java @@ -158,6 +158,7 @@ public class serverPortForwardingSch implements serverPortForwarding{ public synchronized boolean reconnect() throws IOException { if ((!this.isConnected()) && (!Thread.currentThread().isInterrupted())) { this.log.logFine("Trying to reconnect to port forwarding host."); + this.disconnect(); this.connect(); return this.isConnected(); }