*) IndexCreate_p.java:

Crawler StartURLs will now also added to the errorURL-DB if an error occures on this url
*) kelondroStack.java, plasmaSwitchboardQueue.java
   Adding method which returns a list of all entries in the queue. This list is used by IndexCreate_p.java 
   instead of an iterator to display the indexing-list. 
   Advantages: avoid concurrent modifications of the list while displaying it. 
               Speedup because now we have to access only one sync function instead of multiple ones 
               (one for each entry)
*) IndexCreateIndexingQueue_p.java
   Using new list() function of plasmaSwitchboardQueue
*) httpdFileHandler.java
   If a servelet returns the special value "LOCATION" the httpFileHandler does a Redirection of 
   the Browser to the URL specified by the servelet. This can e.g. be used when a http get request is
   used insead of a post request, but a refresh should not be allowed.
*) IndexCreateWWWLocalQueue_p.html
   Now it's possible to delete single entries of the local crawler queue

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@626 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 48aaf703cc
commit bead8a32aa

@ -44,6 +44,7 @@
// if the shell's current path is HTROOT
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Locale;
import java.io.IOException;
@ -83,7 +84,7 @@ public class IndexCreateIndexingQueue_p {
yacySeed initiator;
boolean dark;
int i;
int i=0;
if (switchboard.sbQueue.size() == 0) {
prop.put("indexing-queue", 0); //is empty
@ -92,20 +93,22 @@ public class IndexCreateIndexingQueue_p {
prop.put("indexing-queue_num", switchboard.sbQueue.size());//num entries in queue
dark = true;
plasmaSwitchboardQueue.Entry pcentry;
for (i = 0; i < switchboard.sbQueue.size(); i++) try {
pcentry = (plasmaSwitchboardQueue.Entry) switchboard.sbQueue.get(i);
if (pcentry != null) {
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "null" : daydate(pcentry.responseHeader().lastModified()));
prop.put("indexing-queue_list_"+i+"_anchor", pcentry.anchorName());
prop.put("indexing-queue_list_"+i+"_url", pcentry.normalizedURLString());
dark = !dark;
try {
ArrayList entryList = switchboard.sbQueue.list(0);
for (i = 0; i < entryList.size(); i++) {
pcentry = (plasmaSwitchboardQueue.Entry) entryList.get(i);
if (pcentry != null) {
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+i+"_modified", (pcentry.responseHeader() == null) ? "null" : daydate(pcentry.responseHeader().lastModified()));
prop.put("indexing-queue_list_"+i+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName());
prop.put("indexing-queue_list_"+i+"_url", pcentry.normalizedURLString());
dark = !dark;
}
}
} catch (IOException e) {
}
} catch (IOException e) {}
prop.put("indexing-queue_list", i);
}

@ -29,6 +29,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most
<th class="small">Modified Date</th>
<th class="small">Anchor Name</th>
<th class="small">URL</th>
<th class="small">Delete</th>
</tr>
#{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#" class="small">
@ -37,6 +38,7 @@ There are #[num]# entries in the local crawler queue. Showing #[show-num]# most
<td width="80" class="small">#[modified]#</td>
<td width="180" class="small">#[anchor]#</td>
<td class="small"><a class="small" href="#[url]#">#[url]#</a></td>
<td width="10" class="small"><a class="small" href="IndexCreateWWWLocalQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
</tr>
#{/list}#
</table>

@ -76,6 +76,11 @@ public class IndexCreateWWWLocalQueue_p {
prop.put("info", 3);//crawling queue cleared
prop.put("info_numEntries", c);
} else if (post.containsKey("deleteEntry")) {
String urlHash = (String) post.get("deleteEntry");
switchboard.urlPool.noticeURL.remove(urlHash);
prop.put("LOCATION","");
return prop;
}
}
@ -101,6 +106,7 @@ public class IndexCreateWWWLocalQueue_p {
prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+i+"_anchor", urle.name());
prop.put("crawler-queue_list_"+i+"_url", urle.url());
prop.put("crawler-queue_list_"+i+"_hash", urle.hash());
dark = !dark;
}
}

@ -186,6 +186,9 @@ public class IndexCreate_p {
prop.put("error", 5); //Crawling failed
prop.put("error_crawlingURL", ((String) post.get("crawlingURL")));
prop.put("error_reasonString", reasonString);
switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
crawlingStartURL.getHost(), reasonString, new bitfield(plasmaURL.urlFlagLength), false);
}
} catch (Exception e) {
// mist

@ -182,8 +182,6 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
} catch (NoSuchAlgorithmException e) {
serverLog.logWarning("HTTPDFileHandler", "Content-MD5 support not availabel ...");
}
serverLog.logConfig("HTTPDFileHandler", "File Handler Initialized");
}
// private void textMessage(OutputStream out, int retcode, String body) throws IOException {
@ -436,6 +434,14 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
headers.put(httpHeader.WWW_AUTHENTICATE,"Basic realm=\"" + tp.get("AUTHENTICATE", "") + "\"");
httpd.sendRespondHeader(conProp,out,httpVersion,401,headers);
return;
} else if (tp.containsKey("LOCATION")) {
String location = tp.get("LOCATION","");
if (location.length() == 0) location = path;
httpHeader headers = getDefaultHeaders();
headers.put(httpHeader.LOCATION,location);
httpd.sendRespondHeader(conProp,out,httpVersion,307,headers);
return;
}
// add the application version, the uptime and the client name to every rewrite table
tp.put("version", switchboard.getConfig("version", ""));

@ -50,7 +50,9 @@ import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.StringTokenizer;
public class kelondroStack extends kelondroRecords {
@ -200,6 +202,14 @@ public class kelondroStack extends kelondroRecords {
return n.getValues();
}
public synchronized ArrayList botList(int dist) throws IOException {
ArrayList botList = new ArrayList(size());
for (int i=dist; i < size(); i++) {
botList.add(bot(i));
}
return botList;
}
private void unlinkNode(Node n) throws IOException {
// join chaines over node
Handle l = n.getOHHandle()[left];

@ -45,7 +45,9 @@ import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
@ -110,6 +112,19 @@ public class plasmaSwitchboardQueue {
return new Entry(sbQueueStack.bot(index));
}
public ArrayList list() throws IOException {
return list(0);
}
public ArrayList list(int index) throws IOException {
if ((index < 0) || (index >= sbQueueStack.size())) throw new ArrayIndexOutOfBoundsException();
ArrayList list = sbQueueStack.botList(index);
for (int i=0; i < list.size(); i++) {
list.set(i,new Entry((byte[][])list.get(i)));
}
return list;
}
public void close() {
if (sbQueueStack != null) try {
sbQueueStack.close();

@ -158,6 +158,7 @@ public class serverPortForwardingSch implements serverPortForwarding{
public synchronized boolean reconnect() throws IOException {
if ((!this.isConnected()) && (!Thread.currentThread().isInterrupted())) {
this.log.logFine("Trying to reconnect to port forwarding host.");
this.disconnect();
this.connect();
return this.isConnected();
}

Loading…
Cancel
Save