From b769cce433e58b77ba47e133575f476750d1af74 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 30 Nov 2010 16:13:55 +0000
Subject: [PATCH] - added a catch-all parser for all documents that cannot be
 parsed: they will contributed with their document url for the search index
 only - enhanced the pdf and torrent parser: better documents titles -
 enhanced the ftp client: more time-out time - fixed bugs in json for search
 results - enhanced yacyinteractive.html: added a file type navigator and a
 download-script generator for search result files

Please have a look at yacyinteractive.html: this will become the hacker-download tool for 27c3!

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7355 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/Network.html                           |   3 +-
 htroot/js/yacyinteractive.js                  | 114 ++++++++++++++++--
 htroot/yacyinteractive.html                   |   8 +-
 htroot/yacysearchtrailer.java                 |  10 +-
 .../anomic/crawler/retrieval/FTPLoader.java   |   4 -
 .../net/yacy/cora/protocol/ftp/FTPClient.java |  18 +--
 source/net/yacy/document/Condenser.java       |   2 +-
 source/net/yacy/document/Document.java        |   4 +-
 source/net/yacy/document/TextParser.java      |  11 +-
 .../yacy/document/parser/genericParser.java   |  60 +++++++++
 .../parser/images/genericImageParser.java     |   2 +-
 .../net/yacy/document/parser/pdfParser.java   |   1 +
 .../yacy/document/parser/torrentParser.java   |   7 +-
 13 files changed, 191 insertions(+), 53 deletions(-)
 create mode 100644 source/net/yacy/document/parser/genericParser.java
diff --git a/htroot/Network.html b/htroot/Network.html
index e21ccca66..8a7495225 100644
--- a/htroot/Network.html
+++ b/htroot/Network.html
@@ -49,8 +49,7 @@
   <body id="Network">
 
 <div id="api">
-<script type="text/javascript" src="/js/sorttable.js">
-</script>
+<script type="text/javascript" src="/js/sorttable.js"></script>
 <a href="Network.xml" id="apilink"><img src="/env/grafics/api.png" width="60" height="40" alt="API"/></a>
 <script type="text/javascript">
 //<![CDATA[
diff --git a/htroot/js/yacyinteractive.js b/htroot/js/yacyinteractive.js
index a7771a41f..070dede3b 100644
--- a/htroot/js/yacyinteractive.js
+++ b/htroot/js/yacyinteractive.js
@@ -5,6 +5,7 @@ function xmlhttpPost() {
 
 function search(query) {
 //    var xmlHttpReq = false;
+    start = new Date();
     var self = this;
     if (window.XMLHttpRequest) { // Mozilla/Safari
         self.xmlHttpReq = new XMLHttpRequest(); 
@@ -12,11 +13,12 @@ function search(query) {
     else if (window.ActiveXObject) { // IE
         self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
     }
-    self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&maximumRecords=100&nav=none&query=" + query, true);
+    self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&maximumRecords=1000&nav=all&query=" + query, true);
     self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
     self.xmlHttpReq.onreadystatechange = function() {
         if (self.xmlHttpReq.readyState == 4) {
-            updatepage(self.xmlHttpReq.responseText);
+            stop = new Date();
+            updatepage(query, self.xmlHttpReq.responseText, stop.getTime() - start.getTime());
         }
     }
     self.xmlHttpReq.send(null);
@@ -28,44 +30,130 @@ function navget(list, name) {
   }
 }
 
-function updatepage(str) {
+var searchresult;
+
+function makeDownloadScript() {
+  script = "<div style=\"float:left\"><pre>";
+  for (var i = 0; i < searchresult.length; i++) {
+        var item = searchresult[i];
+        script += "curl -OL \"" + item.link + "\"\n";
+  }
+  script += "</pre></div>";
+  document.getElementById("downloadscript").innerHTML = script;
+  document.getElementById("downloadbutton").innerHTML = "<input id=\"downloadbutton\" type=\"button\" value=\"hide the download script\" onClick=\"hideDownloadScript();\"/></form>";
+}
+
+function hideDownloadScript() {
+  document.getElementById("downloadscript").innerHTML = "";
+  var dlb = document.getElementById("downloadbutton");
+  if (dlb) dlb.innerHTML = "<input type=\"button\" value=\"create a download script\" onClick=\"makeDownloadScript();\"/></form>";
+}
+
+function updatepage(query, str, time) {
   var raw = document.getElementById("raw");
   if (raw != null) raw.innerHTML = str;
   var rsp = eval("("+str+")");
   var firstChannel = rsp.channels[0];
+  searchresult = firstChannel.items;
   var totalResults = firstChannel.totalResults.replace(/[,.]/,"");
 //  var startIndex = firstChannel.startIndex;
 //  var itemsPerPage = firstChannel.itemsPerPage;
   var navigation = firstChannel.navigation;
   var topics = navget(navigation, "topics");
   
-  var html = "<span id=\"resCounter\" style=\"display: inline;\">total results = " + totalResults;
+  // analyse the search result
+  var filetypes = {};
+  for (var i = 0; i < firstChannel.items.length; i++) {
+    item = firstChannel.items[i];
+    if (item.link && item.link.length > 4) {
+      ext = item.link.substring(item.link.length - 4);
+      if (ext.charAt(0) == ".") {
+        ext = ext.substring(1).toLowerCase();
+        var count = filetypes[ext];
+        if (count) filetypes[ext]++; else filetypes[ext] = 1;
+      }
+    }
+  }
+  for (var key in filetypes) {
+    if (query.indexOf("filetype:" + key) >= 0) delete filetypes[key];
+  }
+
+  // show statistics
+  var html = "<span id=\"resCounter\" style=\"display: inline;\">";
+  if (firstChannel.items.length > 0) {
+      html += "<form><div style=\"float:left\">" + firstChannel.items.length + " results from a total of " + totalResults + " docs in index; search time: " + time + " milliseconds. </div>";
+      html += "<div id=\"downloadbutton\" style=\"float:left\"></div></form>";
+  } else {
+      if (query == "") {
+         html += "please enter some search words";
+      } else {
+         html += "no results";
+      }
+  }
+  html += "<br>";
+
+  // add extension navigation
+  var extnav = "";
+  for (var key in filetypes) {
+      if (filetypes[key] > 0)  { extnav += "<a style=\"text-decoration:underline\" href=\"/yacyinteractive.html?query=" + query + "+filetype:"+ key + "\">" + key + "</a>(" + filetypes[key] + ")&nbsp;&nbsp;";}
+  }
+  if (extnav.length > 0) {
+	  html += "apply a <b>filter</b> by filetype:&nbsp;&nbsp;&nbsp;&nbsp;" + extnav;
+  }
+
+  // add topic navigation  
   if (topics && topics.length > 0) {
     var topwords = "";
     for (var i = 0; i < topics.elements.length; i++) {
-        topwords += "<a href=\"yacyinteractive.html?query=" + firstChannel.searchTerms + "+" + topics.elements[i].name + "\">" + topics.elements[i].name + "</a> ";
+        topwords += "<a href=\"/yacyinteractive.html?query=" + query + "+" + topics.elements[i].name + "\">" + topics.elements[i].name + "</a> ";
         if (i > 10) break;
     }
     html += "&nbsp;&nbsp;&nbsp;topwords: " + topwords;
   }
-  html += "</span><br>";
+  html += "<br><div id=\"downloadscript\"></div></span><br>";
   
-  if (totalResults > 0) {
+  // display result
+  if (firstChannel.items.length > 0) {
     var item;
-    html += "<table class=\"sortable\" border=\"0\" cellpadding=\"2\" cellspacing=\"1\" width=\"99%\">";
+    html += "<table class=\"sortable\" id=\"sortable\" border=\"0\" cellpadding=\"2\" cellspacing=\"1\" width=\"99%\">";
     html += "<tr class=\"TableHeader\" valign=\"bottom\">";
-    html += "<td>Name</td>";
-    html += "<td width=\"60\">Size</td>";
+    html += "<td width=\"40\">Protocol</td>";
+    html += "<td width=\"60\">Host</td>";
+    html += "<td width=\"60\">Path</td>";
+    html += "<td width=\"60\">Name</td>";
+    html += "<td width=\"50\">Size</td>";
     //html += "<td>Description</td>";
-    html += "<td width=\"180\">Date</td></tr>";
+    html += "<td width=\"50\">Date</td></tr>";
     for (var i = 0; i < firstChannel.items.length; i++) {
         item = firstChannel.items[i];
-        html += "<tr class=\"TableCellLight\"><td align=\"left\"><a href=\"" + item.link + "\">" + item.title + "</a></td>";
+        p = item.link.indexOf("//");
+	    protocol = "";
+        host = "";
+        path = item.link;
+        if (p > 0) {
+        	q = item.link.indexOf("/", p + 2);
+            protocol = item.link.substring(0, p - 1);
+            host = item.link.substring(p + 2, q);
+            path = item.link.substring(q + 1);
+        }
+        html += "<tr class=\"TableCellLight\">";
+        html += "<td align=\"left\">" + protocol + "</td>";
+        html += "<td align=\"left\"><a href=\"" + protocol + "://" + host + "/" + "\">" + host + "</a></td>";
+        html += "<td align=\"left\"><a href=\"" + item.link + "\">" + path + "</a></td>";
+        title = item.title;
+        if (title == "") title = path;
+        html += "<td align=\"left\"><a href=\"" + item.link + "\">" + title + "</a></td>";
         html += "<td align=\"right\">" + item.sizename + "</td>";
         //html += "<td>" + item.description + "</td>";
-        html += "<td align=\"right\">" + item.pubDate + "</td></tr>";
+        pd = item.pubDate;
+        if (pd.substring(pd.length - 6) == " +0000") pd = pd.substring(0, pd.length - 6);
+        if (pd.substring(pd.length - 9) == " 00:00:00") pd = pd.substring(0, pd.length - 9);
+        if (pd.substring(pd.length - 5) == " 2010") pd = pd.substring(0, pd.length - 5);
+        html += "<td align=\"right\">" + pd + "</td>";
+        html += "</tr>";
     }
     html += "</table>";
   }
   document.getElementById("searchresults").innerHTML = html;
+  hideDownloadScript();
 }
\ No newline at end of file
diff --git a/htroot/yacyinteractive.html b/htroot/yacyinteractive.html
index f003dc5b0..fa54b8088 100644
--- a/htroot/yacyinteractive.html
+++ b/htroot/yacyinteractive.html
@@ -35,12 +35,16 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
   <div class="yacylogo">
     <a href="#[promoteSearchPageGreeting.homepage]#" class="yacylogo"><img src="#[promoteSearchPageGreeting.smallImage]#" alt="yacysearch"/></a>
   </div>
-  <fieldset class="yacys"><input name="query" type="text" value="#[query]#" size="50" maxlength="80" /></fieldset>
+  <fieldset class="yacys"><input id="query" name="query" type="text" value="#[query]#" size="50" maxlength="80" /></fieldset>
 
 <!--<pre>Raw JSON String: <div id="raw"></div></pre>-->
 </form>
 <div id="searchresults"></div>
-
+<script type="text/javascript">
+//<![CDATA[
+document.getElementById("query").focus();
+//]]>
+</script>
 #%env/templates/footer.template%#
 </body>
 </html>
\ No newline at end of file
diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java
index 499138b27..a4f6d4860 100644
--- a/htroot/yacysearchtrailer.java
+++ b/htroot/yacysearchtrailer.java
@@ -73,7 +73,7 @@ public class yacysearchtrailer {
             while (i < 10 && navigatorIterator.hasNext()) {
                 name = navigatorIterator.next();
                 count = namespaceNavigator.get(name);
-                prop.put("nav-namespace_element_" + i + "_name", name);
+                prop.putJSON("nav-namespace_element_" + i + "_name", name);
                 prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
                 prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + name, theQuery.urlMask.toString(), theQuery.navigators));
                 prop.put("nav-namespace_element_" + i + "_count", count);
@@ -97,7 +97,7 @@ public class yacysearchtrailer {
             while (i < 20 && navigatorIterator.hasNext()) {
                 name = navigatorIterator.next();
                 count = hostNavigator.get(name);
-                prop.put("nav-domains_element_" + i + "_name", name);
+                prop.putJSON("nav-domains_element_" + i + "_name", name);
                 prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
                 prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + name, theQuery.urlMask.toString(), theQuery.navigators));
                 prop.put("nav-domains_element_" + i + "_count", count);
@@ -120,10 +120,10 @@ public class yacysearchtrailer {
             int i = 0;
             String anav;
             while (i < 20 && navigatorIterator.hasNext()) {
-                name = navigatorIterator.next();
+                name = navigatorIterator.next().trim();
                 count = authorNavigator.get(name);
                 anav = (name.indexOf(' ') < 0) ? "author:" + name : "author:'" + name.replace(" ", "+") + "'";
-                prop.put("nav-authors_element_" + i + "_name", name);
+                prop.putJSON("nav-authors_element_" + i + "_name", name);
                 prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
                 prop.putJSON("nav-authors_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators));
                 prop.put("nav-authors_element_" + i + "_count", count);
@@ -149,7 +149,7 @@ public class yacysearchtrailer {
                 count = topicNavigator.get(name);
                 if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break;
                 if (name != null) {
-                    prop.putHTML("nav-topics_element_" + i + "_name", name);
+                    prop.putJSON("nav-topics_element_" + i + "_name", name);
                     prop.put("nav-topics_element_" + i + "_url",
                             "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + "</a>");
                             //+"<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+-" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">-</a>")*/;
diff --git a/source/de/anomic/crawler/retrieval/FTPLoader.java b/source/de/anomic/crawler/retrieval/FTPLoader.java
index 6d25d5c16..1eda7c0a3 100644
--- a/source/de/anomic/crawler/retrieval/FTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/FTPLoader.java
@@ -95,7 +95,6 @@ public class FTPLoader {
 
         // create new ftp client
         final FTPClient ftpClient = new FTPClient();
-        ftpClient.setDataTimeoutByMaxFilesize(maxFileSize);
         
         // get a connection
         if (openConnection(ftpClient, entryUrl)) {
@@ -250,9 +249,6 @@ public class FTPLoader {
                     url.toNormalform(true, true).getBytes());
             return response;
         }
-
-        // timeout for download
-        ftpClient.setDataTimeoutByMaxFilesize(size);
         
         // download the remote file
         byte[] b = ftpClient.get(path);
diff --git a/source/net/yacy/cora/protocol/ftp/FTPClient.java b/source/net/yacy/cora/protocol/ftp/FTPClient.java
index 17bf656df..ca8916f9b 100644
--- a/source/net/yacy/cora/protocol/ftp/FTPClient.java
+++ b/source/net/yacy/cora/protocol/ftp/FTPClient.java
@@ -86,7 +86,7 @@ public class FTPClient {
     private Socket ControlSocket = null;
 
     // socket timeout
-    private static final int ControlSocketTimeout = 1000;
+    private static final int ControlSocketTimeout = 10000;
 
     // data socket timeout
     private int DataSocketTimeout = 0; // in seconds (default infinite)
@@ -2450,22 +2450,6 @@ public class FTPClient {
         return ControlSocketTimeout;
     }
 
-    /**
-     * set timeout for data connections calculated for a minimum data rate
-     * 
-     * @param maxFilesize
-     * @return timeout in seconds
-     */
-    public void setDataTimeoutByMaxFilesize(final int maxFilesize) {
-        int timeout = 1;
-        if (DataSocketRate > 0) {
-            // calculate by minDataRate and MaxFTPFileSize
-            timeout = maxFilesize / DataSocketRate;
-        }
-
-        setDataSocketTimeout(timeout);
-    }
-
     /**
      * after this time the data connection is closed
      * 
diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java
index 67c25c4c6..cbd2dfe0d 100644
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@@ -132,7 +132,7 @@ public final class Condenser {
         
         Map.Entry<MultiProtocolURI, String> entry;
         if (indexText) {
-            createCondensement(document.getText(), meaningLib);        
+            createCondensement(document.getText(), meaningLib);
             // the phrase counter:
             // phrase   0 are words taken from the URL
             // phrase   1 is the MainTitle
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 6d1b2ca5d..081d4e94d 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -232,7 +232,7 @@ dc_rights
 
     public InputStream getText() {
         try {
-            if (this.text == null) return null;
+            if (this.text == null) return new ByteArrayInputStream("".getBytes());
 
             if (this.text instanceof File) {
                 this.textStream = new BufferedInputStream(new FileInputStream((File)this.text));
@@ -245,7 +245,7 @@ dc_rights
         } catch (final Exception e) {
             Log.logException(e);
         }
-        return null; 
+        return new ByteArrayInputStream("".getBytes());
     }
     
     public byte[] getTextBytes() {
diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index 98e5dede5..f63c72681 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -38,6 +38,7 @@ import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.bzipParser;
 import net.yacy.document.parser.csvParser;
 import net.yacy.document.parser.docParser;
+import net.yacy.document.parser.genericParser;
 import net.yacy.document.parser.gzipParser;
 import net.yacy.document.parser.htmlParser;
 import net.yacy.document.parser.odtParser;
@@ -64,6 +65,7 @@ public final class TextParser {
     private static final Log log = new Log("PARSER");
     private static final Object v = new Object();
 
+    private static final Parser genericIdiom = new genericParser();
     private static final Map<String, Parser> mime2parser = new ConcurrentHashMap<String, Parser>();
     private static final Map<String, Parser> ext2parser = new ConcurrentHashMap<String, Parser>();
     private static final Map<String, String> ext2mime = new ConcurrentHashMap<String, String>();
@@ -196,11 +198,13 @@ public final class TextParser {
         
         // in case that we know more parsers we first transform the content into a byte[] and use that as base
         // for a number of different parse attempts.
+        byte[] b = null;
         try {
-            return parseSource(location, mimeType, idioms, charset, FileUtils.read(sourceStream, (int) contentLength));
+            b = FileUtils.read(sourceStream, (int) contentLength);
         } catch (IOException e) {
             throw new Parser.Failure(e.getMessage(), location);
         }
+        return parseSource(location, mimeType, idioms, charset, b);
     }
 
     private static Document[] parseSource(
@@ -325,8 +329,9 @@ public final class TextParser {
         idiom = mime2parser.get(mimeType2);
         if (idiom != null && !idioms.contains(idiom)) idioms.add(idiom);
         
-        // finall check if we found any parser
-        if (idioms.isEmpty()) throw new Parser.Failure("no parser found for extension '" + ext + "' and mime type '" + mimeType1 + "'", url);
+        // always add the generic parser
+        idioms.add(genericIdiom);
+        //if (idioms.isEmpty()) throw new Parser.Failure("no parser found for extension '" + ext + "' and mime type '" + mimeType1 + "'", url);
         
         return idioms;
     }
diff --git a/source/net/yacy/document/parser/genericParser.java b/source/net/yacy/document/parser/genericParser.java
new file mode 100644
index 000000000..eb0259603
--- /dev/null
+++ b/source/net/yacy/document/parser/genericParser.java
@@ -0,0 +1,60 @@
+/**
+ *  genericParser
+ *  Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
+ *  First released 30.11.2010 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *  
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.document.parser;
+
+import java.io.InputStream;
+
+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.document.AbstractParser;
+import net.yacy.document.Document;
+import net.yacy.document.Parser;
+
+/**
+ * this parser can parse just anything because it uses only the uri/file/path information
+ */
+public class genericParser extends AbstractParser implements Parser {
+
+    public genericParser() {
+        super("Generic Parser");
+        // no SUPPORTED_EXTENSIONS and no SUPPORTED_MIME_TYPES
+        // this parser is used if no other fits. This parser fits all
+    }
+    
+    public Document[] parse(MultiProtocolURI location, String mimeType, String charset, InputStream source1) throws Parser.Failure, InterruptedException {
+
+        return new Document[]{new Document(
+                location,
+                mimeType,
+                charset,
+                null,
+                null,
+                location.getFileName(), // title
+                "", // author 
+                location.getHost(),
+                null,
+                null,
+                "",
+                null,
+                null,
+                null,
+                false)};
+    }
+}
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index 484211e9b..9561acf5c 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -185,7 +185,7 @@ public class genericImageParser extends AbstractParser implements Parser {
         String infoString = ii.info.toString();
         images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
         
-        if (title == null) title = location.toNormalform(true, true);
+        if (title == null || title.length() == 0) title = location.getFileName();
         
         return new Document[]{new Document(
              location,
diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java
index a813d6fee..eb69e3f1a 100644
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@@ -118,6 +118,7 @@ public class pdfParser extends AbstractParser implements Parser {
             // info.getModificationDate();
         }
         
+        if (docTitle == null || docTitle.length() == 0) docTitle = location.getFileName();
         CharBuffer writer = null;
         try {
             // create a writer for output
diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java
index 810b73e20..4c7babdd6 100644
--- a/source/net/yacy/document/parser/torrentParser.java
+++ b/source/net/yacy/document/parser/torrentParser.java
@@ -67,7 +67,7 @@ public class torrentParser extends AbstractParser implements Parser {
         //Date creation = new Date(map.get("creation date").getInteger());
         BObject infoo = map.get("info");
         StringBuilder filenames = new StringBuilder();
-        String name = "";
+        String title = "";
         if (infoo != null) {
             Map<String, BObject> info = infoo.getMap();
             BObject fileso = info.get("files");
@@ -82,8 +82,9 @@ public class torrentParser extends AbstractParser implements Parser {
                 }
             }
             BObject nameo = info.get("name");
-            if (nameo != null) name = new String(nameo.getString());
+            if (nameo != null) title = new String(nameo.getString());
         }
+        if (title == null || title.length() == 0) title = location.getFileName();
         try {
             return new Document[]{new Document(
                     location,
@@ -91,7 +92,7 @@ public class torrentParser extends AbstractParser implements Parser {
                     charset,
                     null,
                     null,
-                    name, // title
+                    title, // title
                     comment, // author 
                     location.getHost(),
                     null,