added option to distinguish between text crawl and media crawl

- for each crawl start, there is now a flag for text and media - the localCrawl flag is superfluous - added new crawl profiles - if an image search is done, only media links are crawled for the snippets git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3100 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · 61798f0ae6
parent 6866bcd0e0
commit 61798f0ae6
24 changed files with 251 additions and 223 deletions
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@ -151,7 +151,7 @@ public class Bookmarks {
                        plasmaParserDocument document = null;
                        if (urlentry != null) {
                            indexURLEntry.Components comp = urlentry.comp();
-                            document = switchboard.snippetCache.retrieveDocument(comp.url(), true, 5000);
+                            document = switchboard.snippetCache.retrieveDocument(comp.url(), true, 5000, true);
                            prop.put("mode_edit", 0); // create mode
                            prop.put("mode_url", comp.url().toNormalform());
                            prop.put("mode_title", comp.descr());
--- a/htroot/IndexCreate_p.html
+++ b/htroot/IndexCreate_p.html
@ -106,7 +106,8 @@
        </tr>
        <tr valign="top" class="TableCellDark">
          <td>Do Local Indexing:</td>
-          <td><input type="checkbox" name="localIndexing" #(localIndexingChecked)#::checked="checked"#(/localIndexingChecked)# /></td>
+          <td>index text:<input type="checkbox" name="indexText" #(indexingTextChecked)#::checked="checked"#(/indexingTextChecked)# />&nbsp;&nbsp;&nbsp;
+              index media:<input type="checkbox" name="indexMedia" #(indexingMediaChecked)#::checked="checked"#(/indexingMediaChecked)# /></td>
          <td>
            This enables indexing of the wepages the crawler will download. This should be switched on by default, unless you want to crawl only to fill the
            <a href="CacheAdmin_p.html">Proxy Cache</a> without indexing.
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@ -141,7 +141,8 @@ public class IndexCreate_p {
        prop.put("crawlingDomMaxPages", (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
        prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0);
        prop.put("storeHTCacheChecked", env.getConfig("storeHTCache", "").equals("true") ? 1 : 0);
-        prop.put("localIndexingChecked", env.getConfig("localIndexing", "").equals("true") ? 1 : 0);
+        prop.put("indexingTextChecked", env.getConfig("indexText", "").equals("true") ? 1 : 0);
+        prop.put("indexingMediaChecked", env.getConfig("indexMedia", "").equals("true") ? 1 : 0);
        prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0);
        long busySleep = Integer.parseInt(env.getConfig("62_remotetriggeredcrawl_busysleep", "100"));
        if (busySleep < 100) {
--- a/htroot/QuickCrawlLink_p.html
+++ b/htroot/QuickCrawlLink_p.html
@ -15,7 +15,7 @@
      If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.
    </p>
    <p>
-      <a class="BookmarkLink" href="javascript:w = window.open('http://#[host]#:#[port]#/QuickCrawlLink_p.html?localIndexing=on&amp;crawlingQ=on&amp;xdstopw=on&amp;title='+escape(document.title)+'&amp;url='+escape(location.href),'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no');w.focus();">Crawl with YaCy</a>
+      <a class="BookmarkLink" href="javascript:w = window.open('http://#[host]#:#[port]#/QuickCrawlLink_p.html?indexText=on&indexMedia=on&amp;crawlingQ=on&amp;xdstopw=on&amp;title='+escape(document.title)+'&amp;url='+escape(location.href),'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no');w.focus();">Crawl with YaCy</a>
    </p>
    
    ::<!-- 1 -->
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -66,7 +66,7 @@ public class QuickCrawlLink_p {
    
    /**
     * Example Javascript to call this servlet:
-     * <code>javascript:w = window.open('http://user:pwd@localhost:8080/QuickCrawlLink_p.html?localIndexing=on&crawlingQ=on&xdstopw=on&title=' + escape(document.title) + '&url=' + location.href,'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no'); w.focus();</code>
+     * <code>javascript:w = window.open('http://user:pwd@localhost:8080/QuickCrawlLink_p.html?indexText=on&indexMedia=on&crawlingQ=on&xdstopw=on&title=' + escape(document.title) + '&url=' + location.href,'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no'); w.focus();</code>
     * @param header
     * @param post
     * @param env
@ -114,8 +114,9 @@ public class QuickCrawlLink_p {
        String crawlingFilter  = post.get("crawlingFilter", ".*");
        int CrawlingDepth      = Integer.parseInt(post.get("crawlingDepth", "0"));        
        boolean crawlDynamic   = post.get("crawlingQ", "").equals("on");
+        boolean indexText      = post.get("indexText", "on").equals("on");
+        boolean indexMedia      = post.get("indexMedia", "on").equals("on");
        boolean storeHTCache   = post.get("storeHTCache", "").equals("on");
-        boolean localIndexing  = post.get("localIndexing", "").equals("on");
        boolean remoteIndexing = post.get("crawlOrder", "").equals("on");
        boolean xsstopw        = post.get("xsstopw", "").equals("on");
        boolean xdstopw        = post.get("xdstopw", "").equals("on");
@ -166,13 +167,14 @@ public class QuickCrawlLink_p {
                        60 * 24 * 30, // recrawlIfOlder (minutes); here: one month
                        -1, // domFilterDepth, if negative: no auto-filter
                        -1, // domMaxPages, if negative: no count restriction
-                        crawlDynamic, 
+                        crawlDynamic,
+                        indexText,
+                        indexMedia,
                        storeHTCache,
-                        true, 
-                        localIndexing, 
-                        remoteIndexing, 
-                        xsstopw, 
-                        xdstopw, 
+                        true,
+                        remoteIndexing,
+                        xsstopw,
+                        xdstopw,
                        xpstopw
                );
            } catch (Exception e) {
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -171,7 +171,7 @@ public class ViewFile {
            if (resource == null) {
                plasmaHTCache.Entry entry = null;
                try {
-                    entry = sb.snippetCache.loadResourceFromWeb(url, 5000, false);
+                    entry = sb.snippetCache.loadResourceFromWeb(url, 5000, false, true);
                } catch (plasmaCrawlerException e) {
                    prop.put("error", 4);
                    prop.put("error_errorText", e.getMessage());
--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@ -74,7 +74,7 @@ public class ViewImage {
        int timeout = post.getInt("timeout", 5000);
        
        // getting the image as stream
-        Object[] resource = sb.snippetCache.getResource(url, true, timeout);
+        Object[] resource = sb.snippetCache.getResource(url, true, timeout, false);
        if (resource == null) return null;
        InputStream imgStream = (InputStream) resource[0];
        if (imgStream == null) return null;
--- a/htroot/WatchCrawler_p.html
+++ b/htroot/WatchCrawler_p.html
@ -118,7 +118,8 @@
        <td><strong>Max Page Per Domain</strong></td>
        <td><strong>Accept '?' URLs</strong></td>
        <td><strong>Fill Proxy Cache</strong></td>
-        <td><strong>Local Indexing</strong></td>
+        <td><strong>Local Text Indexing</strong></td>
+        <td><strong>Local Media Indexing</strong></td>
        <td><strong>Remote Indexing</strong></td>
        <td></td>
      </tr>
@ -134,7 +135,8 @@
        <td>#[crawlingDomMaxPages]#</td>
        <td>#(withQuery)#no::yes#(/withQuery)#</td>
        <td>#(storeCache)#no::yes#(/storeCache)#</td>
-        <td>#(localIndexing)#no::yes#(/localIndexing)#</td>
+        <td>#(indexText)#no::yes#(/indexText)#</td>
+        <td>#(indexMedia)#no::yes#(/indexMedia)#</td>
        <td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
        <td>#(deleteButton)#::<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data"><input type="hidden" name="handle" value="#[handle]#" /><input type="submit" name="deleteprofile" value="Delete" /></form>#(/deleteButton)#</td>
      </tr>
@ -147,6 +149,7 @@
      <tbody>
        <tr class="TableHeader">
          <th>Queue</th>
+          <th>Profile</th>
          <th>Initiator</th>
          <th>Depth</th>
          <th>Modified Date</th>
--- a/htroot/WatchCrawler_p.java
+++ b/htroot/WatchCrawler_p.java
@ -102,10 +102,10 @@ public class WatchCrawler_p {
                    boolean crawlingQ = post.get("crawlingQ", "off").equals("on");
                    env.setConfig("crawlingQ", (crawlingQ) ? "true" : "false");
                    
-                    boolean indexText = post.get("indexText", "on").equals("on");
+                    boolean indexText = post.get("indexText", "off").equals("on");
                    env.setConfig("indexText", (indexText) ? "true" : "false");
                    
-                    boolean indexMedia = post.get("indexMedia", "on").equals("on");
+                    boolean indexMedia = post.get("indexMedia", "off").equals("on");
                    env.setConfig("indexMedia", (indexMedia) ? "true" : "false");
                    
                    boolean storeHTCache = post.get("storeHTCache", "off").equals("on");
@ -181,7 +181,8 @@ public class WatchCrawler_p {
                                if (crawlOrder) {
                                    Map m = new HashMap(pe.map()); // must be cloned
                                    m.remove("specificDepth");
-                                    m.remove("localIndexing");
+                                    m.remove("indexText");
+                                    m.remove("indexMedia");
                                    m.remove("remoteIndexing");
                                    m.remove("xsstopw");
                                    m.remove("xpstopw");
@ -328,7 +329,8 @@ public class WatchCrawler_p {
            prop.put("crawlProfiles_"+count+"_remoteIndexing", ((profile.remoteIndexing()) ? 1 : 0));
            prop.put("crawlProfiles_"+count+"_deleteButton", (((profile.name().equals("remote")) ||
                                                               (profile.name().equals("proxy")) ||
-                                                               (profile.name().equals("snippet"))) ? 0 : 1));
+                                                               (profile.name().equals("snippetText")) ||
+                                                               (profile.name().equals("snippetMedia")) ? 0 : 1)));
            prop.put("crawlProfiles_"+count+"_deleteButton_handle", profile.handle());
            
            dark = !dark;
--- a/htroot/js/WatchCrawler.js
+++ b/htroot/js/WatchCrawler.js
@ -145,6 +145,7 @@ function updateTable(indexingqueue, tablename){
        
    dark=false;
    for(i=0;i<entries.length;i++){
+		profile=getValue(getFirstChild(entries[i], "profile"));
 		initiator=getValue(getFirstChild(entries[i], "initiator"));
 		depth=getValue(getFirstChild(entries[i], "depth"));
 		modified=getValue(getFirstChild(entries[i], "modified"));
@ -160,7 +161,7 @@ function updateTable(indexingqueue, tablename){
 			deletebutton=createLinkCol("IndexCreateIndexingQueue_p.html?deleteEntry="+hash, DELETE_STRING);
 		else
 			deletebutton=createCol("");
-		row=createIndexingRow(tablename, initiator, depth, modified, anchor, url, size, deletebutton);
+		row=createIndexingRow(tablename, profile, initiator, depth, modified, anchor, url, size, deletebutton);
 		
 		//create row
 		if(inProcess){
@ -175,10 +176,11 @@ function updateTable(indexingqueue, tablename){
    }
 }

-function createIndexingRow(queue, initiator, depth, modified, anchor, url, size, deletebutton){
+function createIndexingRow(queue, profile, initiator, depth, modified, anchor, url, size, deletebutton){
    row=document.createElement("tr");
    row.setAttribute("height", 10);
    row.appendChild(createCol(queue));
+    row.appendChild(createCol(profile));
 	row.appendChild(createCol(initiator));
 	row.appendChild(createCol(depth));
 	row.appendChild(createCol(modified));
--- a/htroot/xml/queues_p.java
+++ b/htroot/xml/queues_p.java
@ -115,6 +115,7 @@ public class queues_p {
                    totalSize += entrySize;
                    if ((pcentry != null)&&(pcentry.url() != null)) {
                        initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
+                        prop.put("list-indexing_"+i+"_profile", pcentry.profile().name());
                        prop.putNoHTML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())));
                        prop.put("list-indexing_"+i+"_depth", pcentry.depth());
                        prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate());
@ -144,7 +145,7 @@ public class queues_p {
                CrawlWorker theWorker = (CrawlWorker)threadList[i];
                plasmaCrawlLoaderMessage theMsg = theWorker.theMsg;
                if (theMsg == null) continue;
-                
+                prop.put("list-loader_"+count+"_profile", theMsg.profile.name());
                initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
                prop.putNoHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
                prop.put("list-loader_"+count+"_depth", theMsg.depth );
@ -183,6 +184,7 @@ public class queues_p {
            urle = crawlerList[i];
            if ((urle != null) && (urle.url() != null)) {
                initiator = yacyCore.seedDB.getConnected(urle.initiator());
+                prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle());
                prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
                prop.put(tableName + "_" + showNum + "_depth", urle.depth());
                prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
--- a/htroot/xml/queues_p.xml
+++ b/htroot/xml/queues_p.xml
@ -4,6 +4,7 @@
  <max>#[indexingMax]#</max>
 #{list-indexing}#
  <entry>
+    <profile>#[profile]#</profile>
    <initiator>#[initiator]#</initiator>
    <depth>#[depth]#</depth>
    <modified>#[modified]#</modified>
@ -20,6 +21,7 @@
  <max>#[loaderMax]#</max>
 #{list-loader}#
  <entry>
+    <profile>#[profile]#</profile>
    <initiator>#[initiator]#</initiator>
    <depth>#[depth]#</depth>
    <url>#[url]#</url>
@ -30,6 +32,7 @@
  <size>#[localCrawlSize]#</size>
 #{list-local}#
  <entry>
+    <profile>#[profile]#</profile>
    <initiator>#[initiator]#</initiator>
    <depth>#[depth]#</depth>
    <modified>#[modified]#</modified>
@ -44,6 +47,7 @@
  <size>#[remoteCrawlSize]#</size>
 #{list-remote}#
  <entry>
+    <profile>#[profile]#</profile>
 	<initiator>#[initiator]#</initiator>
    <depth>#[depth]#</depth>
    <modified>#[modified]#</modified>
--- a/htroot/xml/snippet.java
+++ b/htroot/xml/snippet.java
@ -26,6 +26,7 @@ public class snippet {
        // getting url
        String urlString = post.get("url", "");
        URL url = new URL(urlString);
+        prop.put("urlHash",plasmaURL.urlHash(url));
        
        // if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed
        boolean remove = post.get("remove", "false").equals("true");
@ -33,11 +34,15 @@ public class snippet {
        // boolean line_end_with_punctuation
        boolean pre = post.get("pre", "false").equals("true");
        
+        // type of media
+        String media = post.get("media", "text");
+        
        String querystring = post.get("search", "").trim();
        if ((querystring.length() > 2) && (querystring.charAt(0) == '"') && (querystring.charAt(querystring.length() - 1) == '"')) {
            querystring = querystring.substring(1, querystring.length() - 1).trim();
        }        
        final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
+        Set queryHashes = plasmaCondenser.words2hashes(query);
        
        // filter out stopwords
        final TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords);
@ -46,36 +51,39 @@ public class snippet {
        }        
        
        // find snippet
-        Set queryHashes = plasmaCondenser.words2hashes(query);        
-        plasmaSnippetCache.TextSnippet snippet = switchboard.snippetCache.retrieveTextSnippet(url, queryHashes, true, pre, 260, 10000);
-        prop.put("status",snippet.getSource());
-        if (snippet.getSource() < 11) {
-            //prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
-            prop.put("text", (snippet.exists()) ? "<![CDATA["+snippet.getLineMarked(queryHashes)+"]]>" : "unknown"); 
+        if (media.equals("text")) {
+            // attach text snippet
+            plasmaSnippetCache.TextSnippet snippet = switchboard.snippetCache.retrieveTextSnippet(url, queryHashes, true, pre, 260, 10000);
+            prop.put("status",snippet.getSource());
+            if (snippet.getSource() < 11) {
+                //prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
+                prop.put("text", (snippet.exists()) ? "<![CDATA["+snippet.getLineMarked(queryHashes)+"]]>" : "unknown");
+            } else {
+                String error = snippet.getError();
+                if ((remove) && (error.equals("no matching snippet found"))) {
+                    serverLog.logInfo("snippet-fetch", "no snippet found, remove words '" + querystring + "' for url = " + url.toNormalform());
+                    switchboard.wordIndex.removeReferences(query, plasmaURL.urlHash(url));
+                }
+                prop.put("text", error);
+            }
+            prop.put("link", 0);
+            prop.put("links", 0);
        } else {
-            String error = snippet.getError();
-            if ((remove) && (error.equals("no matching snippet found"))) {
-                serverLog.logInfo("snippet-fetch", "no snippet found, remove words '" + querystring + "' for url = " + url.toNormalform());
-                switchboard.wordIndex.removeReferences(query, plasmaURL.urlHash(url));
+            // attach media information
+            ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
+            plasmaSnippetCache.MediaSnippet ms;
+            for (int i = 0; i < mediaSnippets.size(); i++) {
+                ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
+                prop.put("link_" + i + "_type", ms.type);
+                prop.put("link_" + i + "_href", ms.href);
+                prop.put("link_" + i + "_name", ms.name);
+                prop.put("link_" + i + "_attr", ms.attr);
            }
-            prop.put("text", error);
-        }
-        prop.put("urlHash",plasmaURL.urlHash(url));
-        
-
-        // attach link information
-        ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
-        plasmaSnippetCache.MediaSnippet ms;
-        for (int i = 0; i < mediaSnippets.size(); i++) {
-            ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
-            prop.put("link_" + i + "_type", ms.type);
-            prop.put("link_" + i + "_href", ms.href);
-            prop.put("link_" + i + "_name", ms.name);
-            prop.put("link_" + i + "_attr", ms.attr);
+            System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
+            prop.put("text", "");
+            prop.put("link", mediaSnippets.size());
+            prop.put("links", mediaSnippets.size());
        }
-        System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
-        prop.put("link", mediaSnippets.size());
-        prop.put("links", mediaSnippets.size());
        
        
        // return rewrite properties
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -222,7 +222,7 @@ public class yacysearch {
                if (urlentry != null) {
                    indexURLEntry.Components comp = urlentry.comp();
                    plasmaParserDocument document;
-                    document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000);
+                    document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000, true);
                    if (document != null) {
                        // create a news message
                        HashMap map = new HashMap();
--- a/source/de/anomic/plasma/plasmaCondenser.java
+++ b/source/de/anomic/plasma/plasmaCondenser.java
@ -114,97 +114,113 @@ public final class plasmaCondenser {
    //public int RESULT_NUMB_TEXT_BYTES = -1;
    public int RESULT_NUMB_WORDS = -1;
    public int RESULT_DIFF_WORDS = -1;
-    public int RESULT_SIMI_WORDS = -1;
    public int RESULT_NUMB_SENTENCES = -1;
    public int RESULT_DIFF_SENTENCES = -1;
-    public int RESULT_SIMI_SENTENCES = -1;
    public kelondroBitfield RESULT_FLAGS = new kelondroBitfield(4);
    
-    public plasmaCondenser(plasmaParserDocument document, boolean addMedia) throws UnsupportedEncodingException {
+    public plasmaCondenser(plasmaParserDocument document, boolean indexText, boolean indexMedia) throws UnsupportedEncodingException {
        // if addMedia == true, then all the media links are also parsed and added to the words
        // added media words are flagged with the approriate media flag
-        this(document.getText(), document.getCharset());
+        this.wordminsize = 3;
+        this.wordcut = 2;
+        this.words = new TreeMap();
+        this.sentences = new HashMap();
        
-        kelondroBitfield wflags = (kelondroBitfield) RESULT_FLAGS.clone(); // the template for the word flags, only from position 0..19
-        // construct flag set for document
-        if (document.getImages().size() > 0) RESULT_FLAGS.set(flag_cat_hasimage, true);
-        if (document.getAudiolinks().size() > 0) RESULT_FLAGS.set(flag_cat_hasaudio, true);
-        if (document.getVideolinks().size() > 0) RESULT_FLAGS.set(flag_cat_hasvideo, true);
-        if (document.getApplinks().size()   > 0) RESULT_FLAGS.set(flag_cat_hasapp,   true);
+        System.out.println("DEBUG: condensing " + document.getMainLongTitle() + ", indexText=" + Boolean.toString(indexText) + ", indexMedia=" + Boolean.toString(indexMedia));
        
-        // the phrase counter:
-        // phrase   0 are words taken from the URL
-        // phrase   1 is the MainLongTitle
-        // phrase   2 is the MainShortTitle
-        // phrase   3 is the Document Abstract
-        // phrase   4 is the Document Author
-        // phrase   5 are the tags specified in document
-        // phrase  10 and above are the section headlines/titles (88 possible)
-        // phrase  98 is taken from the embedded anchor/hyperlinks description
-        // phrase  99 is taken from the media Link url and anchor description
-        // phrase 100 and above are lines from the text
+        Map.Entry entry;
+        if (indexText) {
+            createCondensement(document.getText(), document.getCharset());
+
+            kelondroBitfield wflags = (kelondroBitfield) RESULT_FLAGS.clone(); // the template for the word flags, only from position 0..19
+            // construct flag set for document
+            if (document.getImages().size() > 0) RESULT_FLAGS.set(flag_cat_hasimage, true);
+            if (document.getAudiolinks().size() > 0) RESULT_FLAGS.set(flag_cat_hasaudio, true);
+            if (document.getVideolinks().size() > 0) RESULT_FLAGS.set(flag_cat_hasvideo, true);
+            if (document.getApplinks().size()   > 0) RESULT_FLAGS.set(flag_cat_hasapp,   true);
+        
+            // the phrase counter:
+            // phrase   0 are words taken from the URL
+            // phrase   1 is the MainLongTitle
+            // phrase   2 is the MainShortTitle
+            // phrase   3 is the Document Abstract
+            // phrase   4 is the Document Author
+            // phrase   5 are the tags specified in document
+            // phrase  10 and above are the section headlines/titles (88 possible)
+            // phrase  98 is taken from the embedded anchor/hyperlinks description
+            // phrase  99 is taken from the media Link url and anchor description
+            // phrase 100 and above are lines from the text
      
-        insertTextToWords(document.getMainLongTitle(),  1, indexRWIEntryNew.flag_app_descr, wflags);
-        insertTextToWords(document.getMainShortTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags);
-        insertTextToWords(document.getAbstract(),       3, indexRWIEntryNew.flag_app_descr, wflags);
-        // missing: author!
-        // missing: tags!
-        String[] titles = document.getSectionTitles();
-        for (int i = 0; i < titles.length; i++) {
-            insertTextToWords(titles[i], i + 10, indexRWIEntryNew.flag_app_emphasized, wflags);
-        }
+            insertTextToWords(document.getMainLongTitle(),  1, indexRWIEntryNew.flag_app_descr, wflags);
+            insertTextToWords(document.getMainShortTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags);
+            insertTextToWords(document.getAbstract(),       3, indexRWIEntryNew.flag_app_descr, wflags);
+            // missing: author!
+            // missing: tags!
+            String[] titles = document.getSectionTitles();
+            for (int i = 0; i < titles.length; i++) {
+                insertTextToWords(titles[i], i + 10, indexRWIEntryNew.flag_app_emphasized, wflags);
+            }
        
-        // anchors
-        Iterator i = document.getAnchors().entrySet().iterator();
-        Map.Entry entry;
-        while (i.hasNext()) {
-            entry = (Map.Entry) i.next();
-            insertTextToWords((String) entry.getKey(), 98, indexRWIEntryNew.flag_app_url, wflags);
-            insertTextToWords((String) entry.getValue(), 98, indexRWIEntryNew.flag_app_url, wflags);
+            // anchors
+            Iterator i = document.getAnchors().entrySet().iterator();
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                insertTextToWords((String) entry.getKey(), 98, indexRWIEntryNew.flag_app_url, wflags);
+                insertTextToWords((String) entry.getValue(), 98, indexRWIEntryNew.flag_app_url, wflags);
+            }
+        } else {
+            this.RESULT_NUMB_WORDS = 0;
+            this.RESULT_DIFF_WORDS = 0;
+            this.RESULT_NUMB_SENTENCES = 0;
+            this.RESULT_DIFF_SENTENCES = 0;
        }
        
-        // audio
-        i = document.getAudiolinks().entrySet().iterator();
-        while (i.hasNext()) {
-            entry = (Map.Entry) i.next();
-            insertTextToWords((String) entry.getKey(), 99, flag_cat_hasaudio, wflags);
-            insertTextToWords((String) entry.getValue(), 99, flag_cat_hasaudio, wflags);
-        }
-
-        // video
-        i = document.getVideolinks().entrySet().iterator();
-        while (i.hasNext()) {
-            entry = (Map.Entry) i.next();
-            insertTextToWords((String) entry.getKey(), 99, flag_cat_hasvideo, wflags);
-            insertTextToWords((String) entry.getValue(), 99, flag_cat_hasvideo, wflags);
-        }
-
-        // applications
-        i = document.getApplinks().entrySet().iterator();
-        while (i.hasNext()) {
-            entry = (Map.Entry) i.next();
-            insertTextToWords((String) entry.getKey(), 99, flag_cat_hasapp, wflags);
-            insertTextToWords((String) entry.getValue(), 99, flag_cat_hasapp, wflags);
-        }
-
-        // images
-        i = document.getImages().iterator();
-        htmlFilterImageEntry ientry;
-        while (i.hasNext()) {
-            ientry = (htmlFilterImageEntry) i.next();
-            insertTextToWords((String) ientry.url().toNormalform(), 99, flag_cat_hasimage, wflags);
-            insertTextToWords((String) ientry.alt(), 99, flag_cat_hasimage, wflags);
-        }
+        if (indexMedia) {
+            kelondroBitfield wflags = (kelondroBitfield) RESULT_FLAGS.clone(); // the template for the word flags, only from position 0..19
+            
+            // audio
+            Iterator i = document.getAudiolinks().entrySet().iterator();
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                insertTextToWords((String) entry.getKey(), 99, flag_cat_hasaudio, wflags);
+                insertTextToWords((String) entry.getValue(), 99, flag_cat_hasaudio, wflags);
+            }
+
+            // video
+            i = document.getVideolinks().entrySet().iterator();
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                insertTextToWords((String) entry.getKey(), 99, flag_cat_hasvideo, wflags);
+                insertTextToWords((String) entry.getValue(), 99, flag_cat_hasvideo, wflags);
+            }
+
+            // applications
+            i = document.getApplinks().entrySet().iterator();
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                insertTextToWords((String) entry.getKey(), 99, flag_cat_hasapp, wflags);
+                insertTextToWords((String) entry.getValue(), 99, flag_cat_hasapp, wflags);
+            }
+
+            // images
+            i = document.getImages().iterator();
+            htmlFilterImageEntry ientry;
+            while (i.hasNext()) {
+                ientry = (htmlFilterImageEntry) i.next();
+                insertTextToWords((String) ientry.url().toNormalform(), 99, flag_cat_hasimage, wflags);
+                insertTextToWords((String) ientry.alt(), 99, flag_cat_hasimage, wflags);
+            }
        
-        // finally check all words for missing flag entry
-        i = words.entrySet().iterator();
-        wordStatProp wprop;
-        while (i.hasNext()) {
-            entry = (Map.Entry) i.next();
-            wprop = (wordStatProp) entry.getValue();
-            if (wprop.flags == null) {
-                wprop.flags = (kelondroBitfield) wflags.clone();
-                words.put(entry.getKey(), wprop);
+            // finally check all words for missing flag entry
+            i = words.entrySet().iterator();
+            wordStatProp wprop;
+            while (i.hasNext()) {
+                entry = (Map.Entry) i.next();
+                wprop = (wordStatProp) entry.getValue();
+                if (wprop.flags == null) {
+                    wprop.flags = (kelondroBitfield) wflags.clone();
+                    words.put(entry.getKey(), wprop);
+                }
            }
        }
    }
@ -229,6 +245,8 @@ public final class plasmaCondenser {
            wprop.flags.set(flagpos, true);
            words.put(word, wprop);
            pip++;
+            this.RESULT_NUMB_WORDS++;
+            this.RESULT_DIFF_WORDS++;
        }
    }

@ -282,6 +300,10 @@ public final class plasmaCondenser {
        return words;
    }
    
+    public Map sentences() {
+        return sentences;
+    }
+    
    public static class wordStatProp {
        // object carries statistics for words and sentences
        
@ -534,14 +556,12 @@ public final class plasmaCondenser {
        //this.RESULT_NUMB_TEXT_BYTES = wordenum.count();
        this.RESULT_NUMB_WORDS = allwordcounter;
        this.RESULT_DIFF_WORDS = wordHandleCount;
-        this.RESULT_SIMI_WORDS = words.size();
        this.RESULT_NUMB_SENTENCES = allsentencecounter;
        this.RESULT_DIFF_SENTENCES = sentenceHandleCount;
-        this.RESULT_SIMI_SENTENCES = sentences.size();
    }

    public void print() {
-        String[] s = sentences();
+        String[] s = sentenceReconstruction();

        // printout a reconstruction of the text
        for (int i = 0; i < s.length; i++) {
@ -549,7 +569,7 @@ public final class plasmaCondenser {
        }
    }

-    public String[] sentences() {
+    private String[] sentenceReconstruction() {
        // we reconstruct the word hashtable
        // and order the entries by the number of the sentence
        // this structure is only needed to reconstruct the text
@ -613,49 +633,6 @@ public final class plasmaCondenser {
        return orderedSentences;
    }

-    /*
-    public void writeMapToFile(File out) throws IOException {
-        Map.Entry entry;
-        String k;
-        String word;
-        Iterator it;
-        wordStatProp wsp;
-
-        Object[] orderedSentences = makeOrderedSentences();
-
-        // we reconstruct the word hashtable
-        // and sort the entries by the number of occurrences
-        // this structure is needed to print out a sorted list of words
-        TreeMap sortedWords = new TreeMap(); //kelondroNaturalOrder.naturalOrder
-        it = words.entrySet().iterator(); // enumerates the keys in ascending order
-        while (it.hasNext()) {
-            entry = (Map.Entry) it.next();
-            word = (String) entry.getKey();
-            wsp = (wordStatProp) entry.getValue();
-            sortedWords.put(intString(wsp.count, numlength) + intString(wsp.posInText, numlength), word);
-        }
-
-        // start writing of words and sentences
-        FileWriter writer = new FileWriter(out);
-        writer.write("\r\n");
-        it = sortedWords.entrySet().iterator(); // enumerates the keys in descending order
-        while (it.hasNext()) {
-            entry = (Map.Entry) it.next();
-            k = (String) entry.getKey();            
-            writer.write("#W " + k.substring(numlength) + " " + k.substring(0, numlength) + " " + ((String) entry.getValue()) + "\r\n");
-        }
-        for (int i = 0; i < orderedSentences.length; i++) {
-            if (orderedSentences[i] != null) {
-                writer.write("#S " + intString(i, numlength) + " ");
-                for (int j = 0; j < ((String[]) orderedSentences[i]).length; j++) {
-                    writer.write(((String[]) orderedSentences[i])[j] + " ");
-                }
-                writer.write("\r\n");
-            }
-        }
-        writer.close();
-    }
-*/
    public final static boolean invisible(char c) {
        // TODO: Bugfix for UTF-8: does this work for non ISO-8859-1 chars?
        if ((c < ' ') || (c > 'z')) return true;
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@ -186,14 +186,18 @@ public class plasmaCrawlProfile {
                           int generalDepth, int specificDepth,
                           int recrawlIfOlder /*minutes*/, int domFilterDepth,  int domMaxPages,
                           boolean crawlingQ,
+                           boolean indexText, boolean indexMedia,
                           boolean storeHTCache, boolean storeTXCache,
-                           boolean localIndexing, boolean remoteIndexing,
+                           boolean remoteIndexing,
                           boolean xsstopw, boolean xdstopw, boolean xpstopw) {
        
        entry ne = new entry(name, startURL, generalFilter, specificFilter,
                             generalDepth, specificDepth,
                             recrawlIfOlder, domFilterDepth, domMaxPages,
-                             crawlingQ, storeHTCache, storeTXCache, localIndexing, remoteIndexing,
+                             crawlingQ,
+                             indexText, indexMedia,
+                             storeHTCache, storeTXCache,
+                             remoteIndexing,
                             xsstopw, xdstopw, xpstopw);
        try {
            profileTable.set(ne.handle(), ne.map());
@ -254,8 +258,9 @@ public class plasmaCrawlProfile {
                     int generalDepth, int specificDepth,
                     int recrawlIfOlder /*minutes*/, int domFilterDepth, int domMaxPages,
                     boolean crawlingQ,
+                     boolean indexText, boolean indexMedia,
                     boolean storeHTCache, boolean storeTXCache,
-                     boolean localIndexing, boolean remoteIndexing,
+                     boolean remoteIndexing,
                     boolean xsstopw, boolean xdstopw, boolean xpstopw) {
            String handle = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, crawlProfileHandleLength);
            mem = new HashMap();
@ -270,9 +275,10 @@ public class plasmaCrawlProfile {
            mem.put("domFilterDepth", Integer.toString(domFilterDepth));
            mem.put("domMaxPages", Integer.toString(domMaxPages));
            mem.put("crawlingQ", (crawlingQ) ? "true" : "false"); // crawling of urls with '?'
+            mem.put("indexText", (indexText) ? "true" : "false");
+            mem.put("indexMedia", (indexMedia) ? "true" : "false");
            mem.put("storeHTCache", (storeHTCache) ? "true" : "false");
            mem.put("storeTXCache", (storeTXCache) ? "true" : "false");
-            mem.put("localIndexing", (localIndexing) ? "true" : "false");
            mem.put("remoteIndexing", (remoteIndexing) ? "true" : "false");
            mem.put("xsstopw", (xsstopw) ? "true" : "false"); // exclude static stop-words
            mem.put("xdstopw", (xdstopw) ? "true" : "false"); // exclude dynamic stop-word
@ -376,6 +382,14 @@ public class plasmaCrawlProfile {
            String r = (String) mem.get("crawlingQ");
            if (r == null) return false; else return (r.equals("true"));
        }
+        public boolean indexText() {
+            String r = (String) mem.get("indexText");
+            if (r == null) return false; else return (r.equals("true"));
+        }
+        public boolean indexMedia() {
+            String r = (String) mem.get("indexMedia");
+            if (r == null) return false; else return (r.equals("true"));
+        }
        public boolean storeHTCache() {
            String r = (String) mem.get("storeHTCache");
            if (r == null) return false; else return (r.equals("true"));
@ -384,10 +398,6 @@ public class plasmaCrawlProfile {
            String r = (String) mem.get("storeTXCache");
            if (r == null) return false; else return (r.equals("true"));
        }
-        public boolean localIndexing() {
-            String r = (String) mem.get("localIndexing");
-            if (r == null) return false; else return (r.equals("true"));
-        }
        public boolean remoteIndexing() {
            String r = (String) mem.get("remoteIndexing");
            if (r == null) return false; else return (r.equals("true"));
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@ -61,7 +61,7 @@ public final class plasmaSearchImages {
        long start = System.currentTimeMillis();
        this.images = new TreeSet();
        if (maxTime > 10) {
-            Object[] resource = sc.getResource(url, true, (int) maxTime);
+            Object[] resource = sc.getResource(url, true, (int) maxTime, false);
            InputStream res = (InputStream) resource[0];
            Long resLength = (Long) resource[1];
            if (res != null) {
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -256,7 +256,7 @@ public class plasmaSnippetCache {
                // if not found try to download it
                
                // download resource using the crawler and keep resource in memory if possible
-                plasmaHTCache.Entry entry = loadResourceFromWeb(url, timeout, true);
+                plasmaHTCache.Entry entry = loadResourceFromWeb(url, timeout, true, true);
                
                // getting resource metadata (e.g. the http headers for http resources)
                if (entry != null) {
@ -341,7 +341,7 @@ public class plasmaSnippetCache {
     * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache
     * @return the parsed document as {@link plasmaParserDocument}
     */
-    public plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline, int timeout) {
+    public plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline, int timeout, boolean forText) {

        // load resource
        long resContentLength = 0;
@ -357,7 +357,7 @@ public class plasmaSnippetCache {
                // if not found try to download it
                
                // download resource using the crawler and keep resource in memory if possible
-                plasmaHTCache.Entry entry = loadResourceFromWeb(url, timeout, true);
+                plasmaHTCache.Entry entry = loadResourceFromWeb(url, timeout, true, forText);
                
                // getting resource metadata (e.g. the http headers for http resources)
                if (entry != null) {
@ -593,7 +593,7 @@ public class plasmaSnippetCache {
            return new ArrayList();
        }

-        plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout);
+        plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false);
        ArrayList a = new ArrayList();
        if (document != null) {
            a.addAll(computeMediaSnippets(document, queryhashes, "audio"));
@ -783,7 +783,7 @@ public class plasmaSnippetCache {
     * <tr><td>[1]</td><td>the content-length as {@link Integer}</td></tr>
     * </table>
     */
-    public Object[] getResource(URL url, boolean fetchOnline, int socketTimeout) {
+    public Object[] getResource(URL url, boolean fetchOnline, int socketTimeout, boolean forText) {
        // load the url as resource from the web
        try {
            long contentLength = -1;
@ -796,7 +796,7 @@ public class plasmaSnippetCache {
                // if the content is not available in cache try to download it from web
                
                // try to download the resource using a crawler
-                plasmaHTCache.Entry entry = loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true);
+                plasmaHTCache.Entry entry = loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true, forText);
                
                // read resource body (if it is there)
                byte[] resourceArray = entry.cacheArray();
@ -821,7 +821,8 @@ public class plasmaSnippetCache {
    public plasmaHTCache.Entry loadResourceFromWeb(
            URL url, 
            int socketTimeout,
-            boolean keepInMemory
+            boolean keepInMemory,
+            boolean forText
    ) throws plasmaCrawlerException {
        
        plasmaHTCache.Entry result = this.sb.cacheLoader.loadSync(
@ -830,7 +831,7 @@ public class plasmaSnippetCache {
                null,                        // referer
                yacyCore.seedDB.mySeed.hash, // initiator
                0,                           // depth
-                sb.defaultSnippetProfile,    // crawl profile
+                (forText) ? sb.defaultTextSnippetProfile : sb.defaultMediaSnippetProfile, // crawl profile
                socketTimeout,
                keepInMemory
        );
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -221,7 +221,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
    public  plasmaCrawlProfile          profiles;
    public  plasmaCrawlProfile.entry    defaultProxyProfile;
    public  plasmaCrawlProfile.entry    defaultRemoteProfile;
-    public  plasmaCrawlProfile.entry    defaultSnippetProfile;
+    public  plasmaCrawlProfile.entry    defaultTextSnippetProfile;
+    public  plasmaCrawlProfile.entry    defaultMediaSnippetProfile;
    public  boolean                     rankingOn;
    public  plasmaRankingDistribution   rankingOwnDistribution;
    public  plasmaRankingDistribution   rankingOtherDistribution;
@ -777,7 +778,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
    private void initProfiles() {
        this.defaultProxyProfile = null;
        this.defaultRemoteProfile = null;
-        this.defaultSnippetProfile = null;
+        this.defaultTextSnippetProfile = null;
+        this.defaultMediaSnippetProfile = null;
        Iterator i = this.profiles.profiles(true);
        plasmaCrawlProfile.entry profile;
        String name;
@ -786,24 +788,30 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            name = profile.name();
            if (name.equals("proxy")) this.defaultProxyProfile = profile;
            if (name.equals("remote")) this.defaultRemoteProfile = profile;
-            if (name.equals("snippet")) this.defaultSnippetProfile = profile;
+            if (name.equals("snippetText")) this.defaultTextSnippetProfile = profile;
+            if (name.equals("snippetMedia")) this.defaultMediaSnippetProfile = profile;
        }
        if (this.defaultProxyProfile == null) {
            // generate new default entry for proxy crawling
            this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*",
                    Integer.parseInt(getConfig("proxyPrefetchDepth", "0")),
                    Integer.parseInt(getConfig("proxyPrefetchDepth", "0")),
-                    60 * 24, -1, -1, false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
+                    60 * 24, -1, -1, false, true, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
        }
        if (this.defaultRemoteProfile == null) {
            // generate new default entry for remote crawling
            defaultRemoteProfile = this.profiles.newEntry("remote", "", ".*", ".*", 0, 0,
-                    -1, -1, -1, true, false, true, true, false, true, true, false);
+                    -1, -1, -1, true, true, true, false, true, false, true, true, false);
        }
-        if (this.defaultSnippetProfile == null) {
+        if (this.defaultTextSnippetProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
-            defaultSnippetProfile = this.profiles.newEntry("snippet", "", ".*", ".*", 0, 0,
-                    60 * 24 * 30, -1, -1, true, true, true, true, false, true, true, false);
+            defaultTextSnippetProfile = this.profiles.newEntry("snippetText", "", ".*", ".*", 0, 0,
+                    60 * 24 * 30, -1, -1, true, true, true, true, true, false, true, true, false);
+        }
+        if (this.defaultMediaSnippetProfile == null) {
+            // generate new default entry for snippet fetch and optional crawling
+            defaultMediaSnippetProfile = this.profiles.newEntry("snippetMedia", "", ".*", ".*", 0, 0,
+                    60 * 24 * 30, -1, -1, true, false, true, true, true, false, true, true, false);
        }
    }

@ -830,7 +838,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                entry = (plasmaCrawlProfile.entry) iter.next();
                if (!((entry.name().equals("proxy"))  ||
                      (entry.name().equals("remote")) ||
-                      (entry.name().equals("snippet")))) {
+                      (entry.name().equals("snippetText")) ||
+                      (entry.name().equals("snippetMedia")))) {
                    iter.remove();
                    hasDoneSomething = true;
                }
@ -1575,7 +1584,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                
                checkInterruption();
                log.logFine("Condensing for '" + entry.normalizedURLString() + "'");
-                plasmaCondenser condenser = new plasmaCondenser(document, true);
+                plasmaCondenser condenser = new plasmaCondenser(document, entry.profile().indexText(), entry.profile().indexMedia());
                
                // generate citation reference
                Integer[] ioLinks = generateCitationReference(entry.urlHash(), docDate, document, condenser); // [outlinksSame, outlinksOther]
@ -1632,7 +1641,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                                    (processCase == PROCESSCASE_5_LOCAL_CRAWLING) || 
                                    (processCase == PROCESSCASE_6_GLOBAL_CRAWLING)
                            ) && 
-                            (entry.profile().localIndexing())
+                            ((entry.profile().indexText()) || (entry.profile().indexMedia()))
                    ) {
                        String urlHash = newEntry.hash();
                        
@ -1673,7 +1682,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                            HashMap urlCache = new HashMap(1);
                            urlCache.put(newEntry.hash(),newEntry);
                            
-                            ArrayList tmpContainers = new ArrayList(condenser.RESULT_SIMI_WORDS);
+                            ArrayList tmpContainers = new ArrayList(condenser.words().size());
                            
                            String language = plasmaURL.language(entry.url());                            
                            char doctype = plasmaURL.docType(document.getMimeType());
@ -1695,8 +1704,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                                            urlLength, urlComps,
                                            wordStat.count,
                                            document.getMainLongTitle().length(),
-                                            condenser.RESULT_SIMI_WORDS,
-                                            condenser.RESULT_SIMI_SENTENCES,
+                                            condenser.words().size(),
+                                            condenser.sentences().size(),
                                            wordStat.posInText,
                                            wordStat.posInPhrase,
                                            wordStat.numOfPhrase,
@ -1715,7 +1724,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                                tmpContainers.add(wordIdxContainer);
                            }
                            //System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries");
-                            words = condenser.RESULT_SIMI_WORDS;
+                            words = condenser.words().size();
                            
                            // transfering the index to the storage peer
                            indexContainer[] indexData = (indexContainer[]) tmpContainers.toArray(new indexContainer[tmpContainers.size()]);
@ -1875,7 +1884,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        kelondroBase64Order.enhancedCoder.encodeLongSmart(0, 2) +       // count of links to other documents
        kelondroBase64Order.enhancedCoder.encodeLongSmart(document.getTextLength(), 3) +   // length of plain text in bytes
        kelondroBase64Order.enhancedCoder.encodeLongSmart(condenser.RESULT_NUMB_WORDS, 3) + // count of all appearing words
-        kelondroBase64Order.enhancedCoder.encodeLongSmart(condenser.RESULT_SIMI_WORDS, 3) + // count of all unique words
+        kelondroBase64Order.enhancedCoder.encodeLongSmart(condenser.words().size(), 3) + // count of all unique words
        kelondroBase64Order.enhancedCoder.encodeLongSmart(0, 1); // Flags (update, popularity, attention, vote)
        
        //crl.append(head); crl.append ('|'); crl.append(cpl); crl.append((char) 13); crl.append((char) 10);
@ -2249,7 +2258,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        InputStream resourceContent = null;
        try {
            // get the resource content
-            Object[] resource = snippetCache.getResource(comp.url(), fetchOnline, 10000);
+            Object[] resource = snippetCache.getResource(comp.url(), fetchOnline, 10000, true);
            resourceContent = (InputStream) resource[0];
            Long resourceContentLength = (Long) resource[1];
            
@ -2259,7 +2268,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            // get the word set
            Set words = null;
            try {
-                words = new plasmaCondenser(document, true).words().keySet();
+                words = new plasmaCondenser(document, true, true).words().keySet();
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@ -365,7 +365,7 @@ public class plasmaSwitchboardQueue {
            }

            // check profile
-            if (!profile().localIndexing()) {
+            if ((!profile().indexText()) && (!profile().indexMedia())) {
                return "Indexing_Not_Allowed";
            }

@ -420,7 +420,7 @@ public class plasmaSwitchboardQueue {
            }

            // check profile
-            if (!profile().localIndexing()) { return "Indexing_Not_Allowed"; }
+            if ((!profile().indexText()) && (!profile().indexMedia())) { return "Indexing_Not_Allowed"; }

            final String nURL = normalizedURLString();
            // -CGI access in request
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -269,8 +269,8 @@ public final class plasmaWordIndex implements indexRI {
            ientry = new indexRWIEntryNew(urlHash,
                        urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(),
                        wprop.count,
-                        condenser.RESULT_SIMI_WORDS,
-                        condenser.RESULT_SIMI_SENTENCES,
+                        condenser.words().size(),
+                        condenser.sentences().size(),
                        wprop.posInText,
                        wprop.posInPhrase,
                        wprop.numOfPhrase,
--- a/source/de/anomic/soap/services/CrawlService.java
+++ b/source/de/anomic/soap/services/CrawlService.java
@ -72,7 +72,7 @@ public class CrawlService extends AbstractService {
     * Function to crawl a single link with depth <code>0</code>
     */
    public Document crawlSingleUrl(String crawlingURL) throws AxisFault {
-        return this.crawling(crawlingURL, "CRAWLING-ROOT", new Integer(0), ".*", Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, null, Boolean.TRUE);
+        return this.crawling(crawlingURL, "CRAWLING-ROOT", new Integer(0), ".*", Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, null, Boolean.TRUE);
    }
    
    public Document crawling(
@ -80,7 +80,8 @@ public class CrawlService extends AbstractService {
            String crawljobTitel,
            Integer crawlingDepth,
            String crawlingFilter,
-            Boolean localIndexing,
+            Boolean indexText,
+            Boolean indexMedia,
            Boolean crawlingQ,
            Boolean storeHTCache,
            Boolean crawlOrder,
@ -100,8 +101,10 @@ public class CrawlService extends AbstractService {
                args.put("crawlingFilter",crawlingFilter); 
            if (crawlingDepth != null && crawlingDepth.intValue() > 0) 
                args.put("crawlingDepth",crawlingDepth.toString());   
-            if (localIndexing != null) 
-                args.put("localIndexinglingQ",localIndexing.booleanValue()?"on":"off");               
+            if (indexText != null) 
+                args.put("indexText",indexText.booleanValue()?"on":"off");               
+            if (indexMedia != null) 
+                args.put("indexMedia",indexMedia.booleanValue()?"on":"off");               
            if (crawlingQ != null) 
                args.put("crawlingQ",crawlingQ.booleanValue()?"on":"off");              
            if (storeHTCache != null) 
--- a/source/de/anomic/urlRedirector/urlRedirectord.java
+++ b/source/de/anomic/urlRedirector/urlRedirectord.java
@ -53,13 +53,15 @@ public class urlRedirectord implements serverHandler {
                            // domMaxPages, if negative: no count restriction
                            -1,
                            // crawlDynamic
-                            false, 
+                            false,
+                            // indexText
+                            true,
+                            // indexMedia
+                            true,
                            // storeHTCache
                            false,
                            // storeTxCache
                            true, 
-                            //localIndexing
-                            true, 
                            // remoteIndexing
                            false, 
                            // xsstopw
--- a/yacy.init
+++ b/yacy.init
@ -430,7 +430,8 @@ crawlingDepth=2
 crawlingIfOlder=525600
 crawlingDomFilterDepth=-1
 crawlingDomMaxPages=-1
-localIndexing=true
+indexText=true
+indexMedia=true

 # Filter for crawlinig; may be used to restrict a crawl to a specific domain
 # URLs are only indexed and further crawled if they match this filter