some patches to get the torrent parser working

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6551 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · dff4f95c78
parent 75dfe4098c
commit dff4f95c78
6 changed files with 66 additions and 19 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -127,7 +127,7 @@ update.onlySignedFiles = 1
 # a peer can be re-started periodically
 # restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
 restart.process = off
-# the restart.cycle is the number of hours that must pass bevore a restart is done
+# the restart.cycle is the number of hours that must pass before a restart is done
 restart.cycle = 20
 # the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
 # when the restart should be performed
--- a/source/de/anomic/crawler/retrieval/Response.java
+++ b/source/de/anomic/crawler/retrieval/Response.java
@ -654,7 +654,7 @@ public class Response {

        // check profile
        if (!profile().indexText() && !profile().indexMedia()) {
-            return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name()+ ")";
+            return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name() + ")";
        }

        // -CGI access in request
@ -670,17 +670,19 @@ public class Response {
        // -ranges in request
        // we checked that in shallStoreCache

-        // check if pictures can be indexed
+        // check if document can be indexed
        if (responseHeader != null) {
            final String mimeType = responseHeader.mime();
            String parserError = TextParser.supportsMime(mimeType);
-            if (parserError != null) { return "Media_Content, parser error: " + parserError; }
+            if (parserError != null && TextParser.supportsExtension(url()) != null)  return "no parser available: " + parserError;
        }
+        /*
        if (Classification.isMediaExtension(url().getFileExtension()) &&
           !Classification.isImageExtension((url().getFileExtension()))) {
            return "Media_Content_(forbidden)";
        }
-
+         */
+        
        // -if-modified-since in request
        // if the page is fresh at the very moment we can index it
        // -> this does not apply for the crawler
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -1200,7 +1200,7 @@ public final class Switchboard extends serverSwitch {
            if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
            addURLtoErrorDB(response.url(), (referrerURL == null) ? "" : referrerURL.hash(), response.initiator(), response.name(), noIndexReason);
            // finish this entry
-            return "not indexed any word in URL " + response.url() + "; cause: " + noIndexReason;
+            return "not allowed: " + noIndexReason;
        }

        // put document into the concurrent processing queue
--- a/source/net/yacy/ai/example/testorder.java
+++ b/source/net/yacy/ai/example/testorder.java
@ -0,0 +1,36 @@
+package net.yacy.ai.example;
+
+import java.util.Random;
+import java.util.concurrent.PriorityBlockingQueue;
+
+public class testorder implements Comparable<testorder> {
+
+    public int x;
+    public testorder(int x) {
+        this.x = x;
+    }
+    public String toString() {
+        return Integer.toString(this.x);
+    }
+
+    public int compareTo(testorder o) {
+        if (this.x > o.x) return 1;
+        if (this.x < o.x) return -1;
+        return 0;
+    }
+    
+    public static void main(String[] args) {
+        PriorityBlockingQueue<testorder> q = new PriorityBlockingQueue<testorder>();
+        Random r = new Random();
+        for (int i = 0; i < 10; i++) {
+            q.add(new testorder(r.nextInt(20)));
+        }
+        while (!q.isEmpty())
+            try {
+                System.out.println(q.take().toString());
+            } catch (InterruptedException e) {
+
+                e.printStackTrace();
+            }
+    }
+}
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -391,13 +391,10 @@ dc_rights
                        else if (Classification.isAudioExtension(ext)) audiolinks.put(url, entry.getValue());
                        else if (Classification.isVideoExtension(ext)) videolinks.put(url, entry.getValue());
                        else if (Classification.isApplicationExtension(ext)) applinks.put(url, entry.getValue());
-                    } else {
-                        hyperlinks.put(url, entry.getValue());
                    }
-                } else {
-                    // a path to a directory
-                    hyperlinks.put(url, entry.getValue());
                }
+                // in any case we consider this as a link and let the parser decide if that link can be followed
+                hyperlinks.put(url, entry.getValue());
            }
        }
        
--- a/source/net/yacy/document/parser/torrentParser.java
+++ b/source/net/yacy/document/parser/torrentParser.java
@ -87,16 +87,28 @@ public class torrentParser extends AbstractParser implements Idiom {
        if (bo == null) throw new ParserException("BDecoder.parse returned null", location);
        if (bo.getType() != BType.dictionary) throw new ParserException("BDecoder object is not a dictionary", location);
        Map<String, BObject> map = bo.getMap();
-        String comment = map.get("comment").getString();
+        BObject commento = map.get("comment");
+        String comment = (commento == null) ? "" : commento.getString();
        //Date creation = new Date(map.get("creation date").getInteger());
-        Map<String, BObject> info = map.get("info").getMap();
-        List<BObject> filelist = info.get("files").getList();
-        StringBuilder filenames = new StringBuilder(40 * filelist.size());
-        for (BObject fo: filelist) {
-            List<BObject> l = fo.getMap().get("path").getList(); // one file may have several names
-            for (BObject fl: l) filenames.append(fl.toString()).append(" ");
+        BObject infoo = map.get("info");
+        StringBuilder filenames = new StringBuilder();
+        String name = "";
+        if (infoo != null) {
+            Map<String, BObject> info = infoo.getMap();
+            BObject fileso = info.get("files");
+            if (fileso != null) {
+                List<BObject> filelist = fileso.getList();
+                for (BObject fo: filelist) {
+                    BObject patho = fo.getMap().get("path");
+                    if (patho != null) {
+                        List<BObject> l = patho.getList(); // one file may have several names
+                        for (BObject fl: l) filenames.append(fl.toString()).append(" ");
+                    }
+                }
+            }
+            BObject nameo = info.get("name");
+            if (nameo != null) name = nameo.getString();
        }
-        String name = info.get("name").getString();
        try {
            return new Document(
                    location,