diff --git a/.classpath b/.classpath index 0dd721534..3d14a80c5 100644 --- a/.classpath +++ b/.classpath @@ -93,13 +93,13 @@ - - - - - - - + + + + + + + diff --git a/build.xml b/build.xml index 006c458f9..a6a7a5e04 100644 --- a/build.xml +++ b/build.xml @@ -165,9 +165,9 @@ - - - + + + @@ -182,10 +182,10 @@ - - - - + + + + diff --git a/defaults/httpd.mime b/defaults/httpd.mime index 9b763ccb5..813cae3cc 100644 --- a/defaults/httpd.mime +++ b/defaults/httpd.mime @@ -23,8 +23,9 @@ csv = text/csv db = application/octet-stream dll = application/octet-stream doc = application/msword -docx = application/msword +docx = application/vnd.openxmlformats-officedocument.wordprocessingml.document dot = application/msword +dotx = application/vnd.openxmlformats-officedocument.wordprocessingml.template dvi = application/x-dvi eps = application/postscript exe = application/octet-stream @@ -82,9 +83,11 @@ phtml = application/x-httpd-php pl = text/plain png = image/png pot = application/mspowerpoint +potx = application/vnd.openxmlformats-officedocument.presentationml.template pps = application/mspowerpoint +ppsx = application/vnd.openxmlformats-officedocument.presentationml.slideshow ppt = application/mspowerpoint -pptx = application/mspowerpoint +pptx = application/vnd.openxmlformats-officedocument.presentationml.presentation ppz = application/mspowerpoint ps = application/postscript py = text/html @@ -128,7 +131,8 @@ wmv = video/x-ms-wmv xhtml = text/xhtml+xml xla = application/msexcel xls = application/msexcel -xlsx = application/msexcel +xlsx = application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +xltx = application/vnd.openxmlformats-officedocument.spreadsheetml.template xpi = application/x-xpinstall xsl = text/xml xml = text/xml diff --git a/defaults/solr.collection.schema b/defaults/solr.collection.schema index 06f7b4ccb..71ab7c6cb 100644 --- a/defaults/solr.collection.schema +++ b/defaults/solr.collection.schema @@ -178,13 +178,13 @@ linksnofollowcount_i inboundlinkscount_i ## number of outgoing inbound (to same domain) links with nofollow tag, int -inboundlinksnofollowcount_i +#inboundlinksnofollowcount_i ## number of outgoing outbound (to other domain) links, including outboundlinksnofollowcount_i, int outboundlinkscount_i ## number of outgoing outbound (to other domain) links with nofollow tag, int -outboundlinksnofollowcount_i +#outboundlinksnofollowcount_i ## number of images, int imagescount_i diff --git a/htroot/AugmentedBrowsing_p.html b/htroot/AugmentedBrowsing_p.html index 32efd617b..cf6a45c7a 100644 --- a/htroot/AugmentedBrowsing_p.html +++ b/htroot/AugmentedBrowsing_p.html @@ -54,10 +54,9 @@ Define URL substitution rules which allow navigating in proxy environment. Possible values: all, domainlist. Default: domainlist.

- + - - +
diff --git a/htroot/BlacklistCleaner_p.html b/htroot/BlacklistCleaner_p.html index fad4bbd08..112a44dbb 100644 --- a/htroot/BlacklistCleaner_p.html +++ b/htroot/BlacklistCleaner_p.html @@ -21,7 +21,7 @@ Allow regular expressions in host part of blacklist entries.

- + ::

The blacklist-cleaner only works for the following blacklist-engines up to now:

    #{engines}# @@ -58,7 +58,7 @@
    #{/entries}#
     
    - +
    ::#(/disabled)# diff --git a/htroot/BlacklistImpExp_p.html b/htroot/BlacklistImpExp_p.html index 9c0d19357..48ac2558d 100644 --- a/htroot/BlacklistImpExp_p.html +++ b/htroot/BlacklistImpExp_p.html @@ -23,7 +23,7 @@ #{/otherHosts}# - + @@ -33,7 +33,7 @@ URL: - + @@ -45,7 +45,7 @@ - + @@ -57,7 +57,7 @@ - + ::#(/disabled)# @@ -76,7 +76,7 @@ #{/blackListNames}# - + @@ -92,7 +92,7 @@ - + diff --git a/htroot/BlacklistTest_p.html b/htroot/BlacklistTest_p.html index 61f15aba2..d9788c608 100644 --- a/htroot/BlacklistTest_p.html +++ b/htroot/BlacklistTest_p.html @@ -15,7 +15,7 @@
    Test list: - + #(testlist)#::

    diff --git a/htroot/Blacklist_p.html b/htroot/Blacklist_p.html index aab112a26..2f7b6691f 100644 --- a/htroot/Blacklist_p.html +++ b/htroot/Blacklist_p.html @@ -42,7 +42,7 @@ #{/blackLists}# - +
    :: #(/disabled)# @@ -60,7 +60,7 @@ #(/error)#
    - +


    @@ -75,7 +75,7 @@
    - +

    The right '*', after the '/', can be replaced by a regular expression.

      @@ -119,7 +119,7 @@ #{/blackListsMove}#   - + @@ -129,18 +129,18 @@

      Show entries: #{subListOffset}# - + #{/subListOffset}#
        Entries per page: - #{subListSize}# #{/subListSize}#   - +

      @@ -155,7 +155,7 @@ #{/editList}#

      - + #(/edit)# @@ -167,8 +167,8 @@
      - - + +
      @@ -183,23 +183,11 @@ #{/currentActiveFor}# - +
      ::#(/disabled)# - #%env/templates/footer.template%# diff --git a/htroot/Blog.html b/htroot/Blog.html index 39af1537a..baae0ff90 100644 --- a/htroot/Blog.html +++ b/htroot/Blog.html @@ -77,9 +77,9 @@ - - - + + + :: @@ -121,9 +121,9 @@ - - - + + + :: @@ -138,10 +138,10 @@
      - +
      - +
      :: @@ -157,7 +157,7 @@
      - +
      #(/mode)# diff --git a/htroot/BlogComments.html b/htroot/BlogComments.html index c78933870..42b8d1e4d 100644 --- a/htroot/BlogComments.html +++ b/htroot/BlogComments.html @@ -70,9 +70,9 @@ - - - + + + #(/allow)# @@ -109,9 +109,9 @@ - - - + + + diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index a5a4b3342..905787de4 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -76,6 +76,9 @@ public class Bookmarks { final static boolean TAGS = false; final static boolean FOLDERS = true; + final static float TAGCLOUD_FONTSIZE_MIN = 0.75f; // min font-size in em + final static float TAGCLOUD_FONTSIZE_MAX = 2.0f; // max font-size in em + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { int max_count = 10; @@ -455,7 +458,7 @@ public class Bookmarks { } } else { // font-size is pseudo-rounded to 2 decimals - prop.put("display_"+id+"_"+count+"_size", Math.round((1.1f+Math.log(tag.size())/4f)*100.0f)/100.0f); + prop.put("display_"+id+"_"+count+"_size", Math.min(TAGCLOUD_FONTSIZE_MAX, Math.round((TAGCLOUD_FONTSIZE_MIN + Math.log(tag.size())/4f)*100.0f)/100.0f)); } count++; } diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html index 760d9ad17..26d183beb 100644 --- a/htroot/ConfigPortal.html +++ b/htroot/ConfigPortal.html @@ -78,7 +78,7 @@
      Default maximum number of results per page
      -
      +
      max = 100 (with CACHEONLY=5000)
      Default index.html Page (by forwarder)
      diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java index 2205e2fa9..003404d0d 100644 --- a/htroot/ConfigPortal.java +++ b/htroot/ConfigPortal.java @@ -171,12 +171,6 @@ public class ConfigPortal { prop.put(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, sb.getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true) ? 1 : 0); - prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0); - prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0); - prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0); - prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0); - prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0); - prop.put("search.verify.nocache", sb.getConfig("search.verify", "").equals("nocache") ? 1 : 0); prop.put("search.verify.iffresh", sb.getConfig("search.verify", "").equals("iffresh") ? 1 : 0); prop.put("search.verify.ifexist", sb.getConfig("search.verify", "").equals("ifexist") ? 1 : 0); diff --git a/htroot/ConfigUpdate_p.html b/htroot/ConfigUpdate_p.html index 2ad4a8b3b..6c12e6318 100644 --- a/htroot/ConfigUpdate_p.html +++ b/htroot/ConfigUpdate_p.html @@ -28,8 +28,8 @@ #{/availreleases}# -    -    +    +   


      Downloaded Releases

      @@ -43,14 +43,14 @@ #{/downloadedreleases}# #(downloadsAvailable)#::#(/downloadsAvailable)# #(deployenabled)#::no automated installation on development environments:: -    -    +    +    #(/deployenabled)#


      Automatic Update

       check for new releases, download if available and restart with downloaded release
      -   
      +   
      #(autoUpdate)#::

      Download of release #[downloadedRelease]# finished. Restart Initiated.
      ::
      No more recent release found.
      :: @@ -99,7 +99,7 @@ only accept signed files
      -
      +
      #(configCommit)# ::
      Accepted Changes.
      #(/configCommit)#
      diff --git a/htroot/ContentAnalysis_p.html b/htroot/ContentAnalysis_p.html index 8296157f7..b69fa680f 100644 --- a/htroot/ContentAnalysis_p.html +++ b/htroot/ContentAnalysis_p.html @@ -28,8 +28,8 @@
      - - + +
      diff --git a/htroot/ContentIntegrationPHPBB3_p.html b/htroot/ContentIntegrationPHPBB3_p.html index dfe74937c..c22651f80 100644 --- a/htroot/ContentIntegrationPHPBB3_p.html +++ b/htroot/ContentIntegrationPHPBB3_p.html @@ -69,7 +69,7 @@
      Import a database dump,
      -
      +
       
      diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 7b5d705ba..4f440a0ac 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -377,7 +377,7 @@ public class Crawler_p { try { scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, agent); // get links and generate filter - for (DigestURL u: scraper.getAnchors()) { + for (DigestURL u: scraper.getHyperlinks().keySet()) { newRootURLs.add(u); } } catch (final IOException e) { diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html index 364c56a65..95446429b 100644 --- a/htroot/DictionaryLoader_p.html +++ b/htroot/DictionaryLoader_p.html @@ -34,11 +34,11 @@
      #(geon0Status)#
      not loaded
      ::
      loaded
      ::deactivated#(/geon0Status)#
      Action
      #(geon0Status)# - :: - - :: - - + :: + + :: + + #(/geon0Status)#
      #(geon0ActionLoaded)#::
      Result
      loaded and activated dictionary file
      :: @@ -68,11 +68,11 @@
      #(geon1Status)#
      not loaded
      ::
      loaded
      ::deactivated#(/geon1Status)#
      Action
      #(geon1Status)# - :: - - :: - - + :: + + :: + + #(/geon1Status)#
      #(geon1ActionLoaded)#::
      Result
      loaded and activated dictionary file
      :: @@ -102,11 +102,11 @@
      #(geon2Status)#
      not loaded
      ::
      loaded
      ::deactivated#(/geon2Status)#
      Action
      #(geon2Status)# - :: - - :: - - + :: + + :: + + #(/geon2Status)#
      #(geon2ActionLoaded)#::
      Result
      loaded and activated dictionary file
      :: @@ -138,11 +138,11 @@
      #(geo1Status)#
      not loaded
      ::
      loaded
      ::deactivated#(/geo1Status)#
      Action
      #(geo1Status)# - :: - - :: - - + :: + + :: + + #(/geo1Status)#
      #(geo1ActionLoaded)#::
      Result
      loaded and activated dictionary file
      :: @@ -181,11 +181,11 @@
      #(drw0Status)#
      not loaded
      ::
      loaded
      ::deactivated#(/drw0Status)#
      Action
      #(drw0Status)# - :: - - :: - - + :: + + :: + + #(/drw0Status)#
      #(drw0ActionLoaded)#::
      Result
      loaded and activated dictionary file
      :: @@ -219,7 +219,7 @@
      #(syn0Status)#
      Deactivated
      ::
      Activated
      #(/syn0Status)#
      Action
      -
      #(syn0Status)#::#(/syn0Status)#
      +
      #(syn0Status)#::#(/syn0Status)#

      Moby Lexicon - English Thesaurus from http://icon.shef.ac.uk/Moby/

      @@ -229,7 +229,7 @@
      #(syn1Status)#
      Deactivated
      ::
      Activated
      #(/syn1Status)#
      Action
      -
      #(syn1Status)#::#(/syn1Status)#
      +
      #(syn1Status)#::#(/syn1Status)#
      diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 9830a02ab..baf170002 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -659,8 +659,6 @@ public class IndexControlRWIs_p { sb.index, sb.getRanking(), "",//userAgent - false, - false, 0.0d, 0.0d, 0.0d, new String[0]); final SearchEvent theSearch = SearchEventCache.getEvent(query, sb.peers, sb.tables, null, false, sb.loader, Integer.MAX_VALUE, Long.MAX_VALUE); @@ -671,8 +669,7 @@ public class IndexControlRWIs_p { } else { prop.put("searchresult", 3); prop.put("searchresult_allurl", theSearch.local_rwi_available.get()); - prop - .put("searchresult_description", theSearch.flagCount()[WordReferenceRow.flag_app_dc_description]); + prop.put("searchresult_description", theSearch.flagCount()[WordReferenceRow.flag_app_dc_description]); prop.put("searchresult_title", theSearch.flagCount()[WordReferenceRow.flag_app_dc_title]); prop.put("searchresult_creator", theSearch.flagCount()[WordReferenceRow.flag_app_dc_creator]); prop.put("searchresult_subject", theSearch.flagCount()[WordReferenceRow.flag_app_dc_subject]); diff --git a/htroot/IndexCreateParserErrors_p.html b/htroot/IndexCreateParserErrors_p.html index b34d1cf7d..e334f99dc 100644 --- a/htroot/IndexCreateParserErrors_p.html +++ b/htroot/IndexCreateParserErrors_p.html @@ -19,9 +19,9 @@

      Showing latest #[num]# entries.

      -   +   #(/only-latest)# - +

      There are #[num]# entries in the rejected-queue:

      diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html index 336383e20..95dc1e1f7 100644 --- a/htroot/IndexFederated_p.html +++ b/htroot/IndexFederated_p.html @@ -74,7 +74,7 @@
      write-enabled (if unchecked, the remote server(s) will only be used as search peers)
      -
      +
      @@ -95,7 +95,7 @@
      use webgraph search index (rich information in second Solr core)
      -
      +
      @@ -111,7 +111,7 @@
      support peer-to-peer index transmission (DHT RWI index)
      -
      +
      diff --git a/htroot/IndexImportMediawiki_p.html b/htroot/IndexImportMediawiki_p.html index 36d796df2..73f278e20 100644 --- a/htroot/IndexImportMediawiki_p.html +++ b/htroot/IndexImportMediawiki_p.html @@ -24,8 +24,13 @@
      Dumps must be stored in the local file system in XML format and may be compressed in gz or bz2.
      - - +
      + + +
      + +
      +

      diff --git a/htroot/IndexImportOAIPMHList_p.html b/htroot/IndexImportOAIPMHList_p.html index 2da1a4cd6..e58cb70b5 100644 --- a/htroot/IndexImportOAIPMHList_p.html +++ b/htroot/IndexImportOAIPMHList_p.html @@ -25,7 +25,7 @@

      - +

      @@ -41,7 +41,7 @@ #{/table}#

      - +

      #(/source)# diff --git a/htroot/IndexImportOAIPMH_p.html b/htroot/IndexImportOAIPMH_p.html index 757645fe6..671d3acdd 100644 --- a/htroot/IndexImportOAIPMH_p.html +++ b/htroot/IndexImportOAIPMH_p.html @@ -17,7 +17,7 @@ Single request import This will submit only a single request as given here to a OAI-PMH server and imports records into the index
      - + #(import-one)#::

      Source:
      #[source]#
      @@ -35,8 +35,8 @@ Import all Records from a server Import all records that follow according to resumption elements into index
      - - #(optiongetlist)#::or #(/optiongetlist)# + + #(optiongetlist)#::or #(/optiongetlist)# #(status)#::

      Import started!

      ::

      Bad input data: #[message]#

      #(/status)# diff --git a/htroot/IndexSchema_p.html b/htroot/IndexSchema_p.html index cc0a2ae7b..b47c920de 100644 --- a/htroot/IndexSchema_p.html +++ b/htroot/IndexSchema_p.html @@ -54,8 +54,8 @@
      - - + +


      @@ -66,7 +66,7 @@ To physically remove them from the index you need to reindex the documents. Here you can reindex all documents with inactive fields.

      - +

      You may monitor progress (or stop the job) under IndexReIndexMonitor_p.html

      diff --git a/htroot/Load_MediawikiWiki.html b/htroot/Load_MediawikiWiki.html index 50a309958..00b0a8a63 100644 --- a/htroot/Load_MediawikiWiki.html +++ b/htroot/Load_MediawikiWiki.html @@ -30,10 +30,10 @@ - - - - + + + + @@ -51,7 +51,7 @@
       
      -
      +
      diff --git a/htroot/Load_PHPBB3.html b/htroot/Load_PHPBB3.html index 297f61e6f..cf61e5878 100644 --- a/htroot/Load_PHPBB3.html +++ b/htroot/Load_PHPBB3.html @@ -35,7 +35,7 @@
       
      -
      +
      diff --git a/htroot/Load_RSS_p.html b/htroot/Load_RSS_p.html index 79249373b..2352ab9aa 100644 --- a/htroot/Load_RSS_p.html +++ b/htroot/Load_RSS_p.html @@ -32,7 +32,7 @@
      URL of the RSS feed
      Preview
      -
      +
      Indexing
      #(showload)#Available after successful loading of rss feed in preview::
      @@ -57,7 +57,7 @@
      collection
      - + #(/showload)#
      #(showerrmsg)#::
      #[msgtxt]#
      #(/showerrmsg)# @@ -95,8 +95,8 @@

      - - + +

      #(/showscheduledfeeds)# @@ -122,9 +122,9 @@

      - - - + + +

      #(/shownewfeeds)# @@ -168,7 +168,7 @@

      - +

      #(/showitems)# diff --git a/htroot/Messages_p.html b/htroot/Messages_p.html index ada0b2bd6..e4a53f9bf 100644 --- a/htroot/Messages_p.html +++ b/htroot/Messages_p.html @@ -17,7 +17,7 @@ - + diff --git a/htroot/Network.html b/htroot/Network.html index 10ddee8c1..3b7ea9201 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -102,7 +102,7 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window. : - + @@ -354,7 +354,7 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window. - +
       
      diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index 43f150785..1f655361b 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -158,6 +158,7 @@ + + Flushes #{ObjectList}# @@ -206,13 +207,13 @@ #[objectMissCacheWriteUnique]# #[objectMissCacheWriteDouble]# #[objectMissCacheDeletes]# - + #[objectMissCacheFlushes]# #{/ObjectList}# Total Mem: #[objectHitCacheTotalMem]# MB (hit), #[objectMissCacheTotalMem]# MB (miss); Stop Grow when less than #[objectCacheStopGrow]# MB available left; Start Shrink when less than #[objectCacheStartShrink]# MB availabe left - + -->

      Other Caching Structures:

      @@ -225,32 +226,6 @@ Insert Delete - DNSCache/Hit #[namecacheHit.size]# diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 7558fd2f5..50535d2ea 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -36,7 +36,6 @@ import org.apache.solr.search.SolrCache; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.kelondro.index.Cache; import net.yacy.kelondro.index.RAMIndex; import net.yacy.kelondro.table.Table; import net.yacy.kelondro.util.Formatter; @@ -183,67 +182,51 @@ public class PerformanceMemory_p { prop.put("indexcache", c); prop.putNum("indexcacheTotalMem", totalhitmem / (1024d * 1024d)); - // write object cache table - i = Cache.filenames(); - c = 0; - long missmem, totalmissmem = 0; - totalhitmem = 0; - Map mapy; - while (i.hasNext()) { - filename = i.next(); - mapy = Cache.memoryStats(filename); - prop.put("ObjectList_" + c + "_objectCachePath", ((p = filename.indexOf("DATA",0)) < 0) ? filename : filename.substring(p)); - - // hit cache - hitmem = Long.parseLong(mapy.get(Cache.StatKeys.objectHitMem)); - totalhitmem += hitmem; - prop.put("ObjectList_" + c + "_objectHitChunkSize", mapy.get(Cache.StatKeys.objectHitChunkSize)); - prop.putNum("ObjectList_" + c + "_objectHitCacheCount", mapy.get(Cache.StatKeys.objectHitCacheCount)); - prop.put("ObjectList_" + c + "_objectHitCacheMem", Formatter.bytesToString(hitmem)); - prop.putNum("ObjectList_" + c + "_objectHitCacheReadHit", mapy.get(Cache.StatKeys.objectHitCacheReadHit)); - prop.putNum("ObjectList_" + c + "_objectHitCacheReadMiss", mapy.get(Cache.StatKeys.objectHitCacheReadMiss)); - prop.putNum("ObjectList_" + c + "_objectHitCacheWriteUnique", mapy.get(Cache.StatKeys.objectHitCacheWriteUnique)); - prop.putNum("ObjectList_" + c + "_objectHitCacheWriteDouble", mapy.get(Cache.StatKeys.objectHitCacheWriteDouble)); - prop.putNum("ObjectList_" + c + "_objectHitCacheDeletes", mapy.get(Cache.StatKeys.objectHitCacheDeletes)); - prop.putNum("ObjectList_" + c + "_objectHitCacheFlushes", mapy.get(Cache.StatKeys.objectHitCacheFlushes)); - - // miss cache - missmem = Long.parseLong(mapy.get(Cache.StatKeys.objectMissMem)); - totalmissmem += missmem; - prop.put("ObjectList_" + c + "_objectMissChunkSize", mapy.get(Cache.StatKeys.objectMissChunkSize)); - prop.putNum("ObjectList_" + c + "_objectMissCacheCount", mapy.get(Cache.StatKeys.objectMissCacheCount)); - prop.putHTML("ObjectList_" + c + "_objectMissCacheMem", Formatter.bytesToString(missmem)); - prop.putNum("ObjectList_" + c + "_objectMissCacheReadHit", mapy.get(Cache.StatKeys.objectMissCacheReadHit)); - prop.putNum("ObjectList_" + c + "_objectMissCacheReadMiss", mapy.get(Cache.StatKeys.objectMissCacheReadMiss)); - prop.putNum("ObjectList_" + c + "_objectMissCacheWriteUnique", mapy.get(Cache.StatKeys.objectMissCacheWriteUnique)); - prop.putNum("ObjectList_" + c + "_objectMissCacheWriteDouble", mapy.get(Cache.StatKeys.objectMissCacheWriteDouble)); - prop.putNum("ObjectList_" + c + "_objectMissCacheDeletes", mapy.get(Cache.StatKeys.objectMissCacheDeletes)); - //prop.put("ObjectList_" + c + "_objectMissCacheFlushes", mapy.get(Cache.StatKeys.objectMissCacheFlushes)); - - c++; - } - prop.put("ObjectList", c); - prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024d * 1024d)); - prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024d * 1024d)); - prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024d * 1024d)); - prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024d * 1024d)); - - // other caching structures -// final CachedSolrConnector solr = (CachedSolrConnector) Switchboard.getSwitchboard().index.fulltext().getDefaultConnector(); -// prop.putNum("solrcacheHit.size", solr.nameCacheHitSize()); -// prop.putNum("solrcacheHit.Hit", solr.hitCache_Hit); -// prop.putNum("solrcacheHit.Miss", solr.hitCache_Miss); -// prop.putNum("solrcacheHit.Insert", solr.hitCache_Insert); -// -// prop.putNum("solrcacheMiss.size", solr.nameCacheMissSize()); -// prop.putNum("solrcacheMiss.Hit", solr.missCache_Hit); -// prop.putNum("solrcacheMiss.Miss", solr.missCache_Miss); -// prop.putNum("solrcacheMiss.Insert", solr.missCache_Insert); -// -// prop.putNum("solrcacheDocument.size", solr.nameCacheDocumentSize()); -// prop.putNum("solrcacheDocument.Hit", solr.documentCache_Hit); -// prop.putNum("solrcacheDocument.Miss", solr.documentCache_Miss); -// prop.putNum("solrcacheDocument.Insert", solr.documentCache_Insert); +// this cache table wasn't used for years +// // write object cache table +// i = Cache.filenames(); +// c = 0; +// long missmem, totalmissmem = 0; +// totalhitmem = 0; +// Map mapy; +// while (i.hasNext()) { +// filename = i.next(); +// mapy = Cache.memoryStats(filename); +// prop.put("ObjectList_" + c + "_objectCachePath", ((p = filename.indexOf("DATA",0)) < 0) ? filename : filename.substring(p)); +// +// // hit cache +// hitmem = Long.parseLong(mapy.get(Cache.StatKeys.objectHitMem)); +// totalhitmem += hitmem; +// prop.put("ObjectList_" + c + "_objectHitChunkSize", mapy.get(Cache.StatKeys.objectHitChunkSize)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheCount", mapy.get(Cache.StatKeys.objectHitCacheCount)); +// prop.put("ObjectList_" + c + "_objectHitCacheMem", Formatter.bytesToString(hitmem)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheReadHit", mapy.get(Cache.StatKeys.objectHitCacheReadHit)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheReadMiss", mapy.get(Cache.StatKeys.objectHitCacheReadMiss)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheWriteUnique", mapy.get(Cache.StatKeys.objectHitCacheWriteUnique)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheWriteDouble", mapy.get(Cache.StatKeys.objectHitCacheWriteDouble)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheDeletes", mapy.get(Cache.StatKeys.objectHitCacheDeletes)); +// prop.putNum("ObjectList_" + c + "_objectHitCacheFlushes", mapy.get(Cache.StatKeys.objectHitCacheFlushes)); +// +// // miss cache +// missmem = Long.parseLong(mapy.get(Cache.StatKeys.objectMissMem)); +// totalmissmem += missmem; +// prop.put("ObjectList_" + c + "_objectMissChunkSize", mapy.get(Cache.StatKeys.objectMissChunkSize)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheCount", mapy.get(Cache.StatKeys.objectMissCacheCount)); +// prop.putHTML("ObjectList_" + c + "_objectMissCacheMem", Formatter.bytesToString(missmem)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheReadHit", mapy.get(Cache.StatKeys.objectMissCacheReadHit)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheReadMiss", mapy.get(Cache.StatKeys.objectMissCacheReadMiss)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheWriteUnique", mapy.get(Cache.StatKeys.objectMissCacheWriteUnique)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheWriteDouble", mapy.get(Cache.StatKeys.objectMissCacheWriteDouble)); +// prop.putNum("ObjectList_" + c + "_objectMissCacheDeletes", mapy.get(Cache.StatKeys.objectMissCacheDeletes)); +// //prop.put("ObjectList_" + c + "_objectMissCacheFlushes", mapy.get(Cache.StatKeys.objectMissCacheFlushes)); +// +// c++; +// } +// prop.put("ObjectList", c); +// prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024d * 1024d)); +// prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024d * 1024d)); +// prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024d * 1024d)); +// prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024d * 1024d)); prop.putNum("namecacheHit.size", Domains.nameCacheHitSize()); prop.putNum("namecacheHit.Hit", Domains.cacheHit_Hit); diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index 379ca9f2a..d438097fe 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -60,8 +60,8 @@ #{/table}# - - + + Changes take effect immediately
      @@ -121,7 +121,7 @@ - + Changes take effect immediately @@ -145,7 +145,7 @@ #{/pool}# - + Changes take effect immediately diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html index 68980fa3f..498ae8421 100644 --- a/htroot/Performance_p.html +++ b/htroot/Performance_p.html @@ -28,7 +28,7 @@
      MByte  -   +   #(setStartupCommit)#::
      Accepted change. This will take effect after restart of YaCy.
      restart now
      ::
      #(/setStartupCommit)#
      @@ -47,7 +47,7 @@
      disable DHT-in below MiB free space
       
      -
      +
      @@ -79,7 +79,7 @@ - + Changes take effect immediately diff --git a/htroot/ProxyIndexingMonitor_p.html b/htroot/ProxyIndexingMonitor_p.html index e44062e41..ba6b6d8f4 100644 --- a/htroot/ProxyIndexingMonitor_p.html +++ b/htroot/ProxyIndexingMonitor_p.html @@ -95,7 +95,7 @@   - + diff --git a/htroot/RankingRWI_p.html b/htroot/RankingRWI_p.html index b2a631b82..4dd1fe3b4 100644 --- a/htroot/RankingRWI_p.html +++ b/htroot/RankingRWI_p.html @@ -60,8 +60,8 @@
      - - + +
      diff --git a/htroot/RankingSolr_p.html b/htroot/RankingSolr_p.html index 8ac304321..169a35d54 100644 --- a/htroot/RankingSolr_p.html +++ b/htroot/RankingSolr_p.html @@ -33,8 +33,8 @@
      - - + +

      #(boosthint)#::You can boost with vocabularies, use the occurrence counters #[vocabulariesvoccount]# and #[vocabulariesvoclogcount]#.#(/boosthint)# @@ -54,8 +54,8 @@
      - - + +

      #(boosthint)#:: @@ -91,8 +91,8 @@
      - - + +
      @@ -113,8 +113,8 @@ #{/boosts}#
      - - + +
      diff --git a/htroot/RegexTest.html b/htroot/RegexTest.html index 55bf350a7..a04bd34dd 100644 --- a/htroot/RegexTest.html +++ b/htroot/RegexTest.html @@ -30,7 +30,7 @@ #(match)#
      no match
      ::
      match
      ::
      error in expression: #[error]#
      #(/match)#
      - +
      diff --git a/htroot/ServerScannerList.html b/htroot/ServerScannerList.html index d84acb0f0..e23cc7fc3 100644 --- a/htroot/ServerScannerList.html +++ b/htroot/ServerScannerList.html @@ -49,7 +49,7 @@ #(edit)#::

      - +

      #(/edit)# diff --git a/htroot/Settings_Crawler.inc b/htroot/Settings_Crawler.inc index 112fa9f0a..c451e046b 100644 --- a/htroot/Settings_Crawler.inc +++ b/htroot/Settings_Crawler.inc @@ -44,7 +44,7 @@   - + Changes will take effect immediately. diff --git a/htroot/Settings_MessageForwarding.inc b/htroot/Settings_MessageForwarding.inc index 958f71aec..c0634d39c 100644 --- a/htroot/Settings_MessageForwarding.inc +++ b/htroot/Settings_MessageForwarding.inc @@ -24,7 +24,7 @@ - Changes will take effect immediately. + Changes will take effect immediately. diff --git a/htroot/Settings_Proxy.inc b/htroot/Settings_Proxy.inc index 84f469752..365cfa8ea 100644 --- a/htroot/Settings_Proxy.inc +++ b/htroot/Settings_Proxy.inc @@ -51,7 +51,7 @@ - Changes will take effect immediately. + Changes will take effect immediately. diff --git a/htroot/Settings_Seed.inc b/htroot/Settings_Seed.inc index 1c23fde62..aef5d71ae 100644 --- a/htroot/Settings_Seed.inc +++ b/htroot/Settings_Seed.inc @@ -19,7 +19,7 @@ #{/seedUploadMethods}# - + Here you can specify which upload method should be used. Select 'none' to deactivate uploading. @@ -34,7 +34,7 @@ - + diff --git a/htroot/Settings_Seed_UploadFile.inc b/htroot/Settings_Seed_UploadFile.inc index 6350408b1..5079fdf9a 100644 --- a/htroot/Settings_Seed_UploadFile.inc +++ b/htroot/Settings_Seed_UploadFile.inc @@ -5,11 +5,11 @@ - + - +
      : Here you can specify the path within the filesystem where the seed-list file should be stored.
      diff --git a/htroot/Settings_Seed_UploadFtp.inc b/htroot/Settings_Seed_UploadFtp.inc index 0d01201a7..3261f7cef 100644 --- a/htroot/Settings_Seed_UploadFtp.inc +++ b/htroot/Settings_Seed_UploadFtp.inc @@ -31,7 +31,7 @@ The password - + diff --git a/htroot/Settings_Seed_UploadScp.inc b/htroot/Settings_Seed_UploadScp.inc index c0945f817..fd0cea50f 100644 --- a/htroot/Settings_Seed_UploadScp.inc +++ b/htroot/Settings_Seed_UploadScp.inc @@ -31,7 +31,7 @@ The password - + diff --git a/htroot/Settings_ServerAccess.inc b/htroot/Settings_ServerAccess.inc index a002ba2d5..a9476e8bb 100644 --- a/htroot/Settings_ServerAccess.inc +++ b/htroot/Settings_ServerAccess.inc @@ -55,7 +55,7 @@ for the preconfigured value 'localpeer', the URL is: http://localpeer/. - + diff --git a/htroot/Status.html b/htroot/Status.html index 713d0500f..1bba59a75 100644 --- a/htroot/Status.html +++ b/htroot/Status.html @@ -127,11 +127,7 @@
      Latest public version is v#[latestVersion]#. You can download a more recent version of YaCy. Click here to install this update and restart YaCy:
      - - - diff --git a/htroot/Steering.html b/htroot/Steering.html index dec27e8a6..34d7f595c 100644 --- a/htroot/Steering.html +++ b/htroot/Steering.html @@ -171,7 +171,7 @@ XDtoU7vQ/wIAAP//AwBb7ktEXQ4nqQAAAABJRU5ErkJggg==" width="128" height="64" alt="K

      Application will terminate after working off all scheduled tasks.

      Please send us feed-back!

      We don't track YaCy users, YaCy does not send 'home-pings', we do not even know how many people use YaCy as their private search engine.
      - Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is is possible that we change a bit to suit your needs?

      + Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is it possible that we change a bit to suit your needs?

      Please send us feed-back about your experience with an
      anonymous message
      or a
      diff --git a/htroot/Surftips.html b/htroot/Surftips.html index 1f62a70ae..0d4470bfa 100644 --- a/htroot/Surftips.html +++ b/htroot/Surftips.html @@ -55,11 +55,11 @@

      - - diff --git a/htroot/Tables_p.html b/htroot/Tables_p.html index 9efb3e585..163ebeed2 100644 --- a/htroot/Tables_p.html +++ b/htroot/Tables_p.html @@ -60,7 +60,7 @@ document.write("
      URL:
      - - #(moar)#::#(/moar)# + + #(moar)#::#(/moar)#
      #(moar)#::
      Search in Document:
      - +
      #(/moar)# @@ -124,7 +124,7 @@ function updatepage(str) { - + diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 3f231b2ae..a139c3e85 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -325,6 +325,24 @@ public class ViewFile { prop.put("viewMode", VIEW_MODE_AS_LINKLIST); boolean dark = true; int i = 0; + + if (document.getEmaillinks() != null) { + Iterator emailit = document.getEmaillinks().iterator(); + while (emailit.hasNext()) { + AnchorURL eentry = emailit.next(); + prop.put("viewMode_links_" + i + "_nr", i); + prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0"); + prop.put("viewMode_links_" + i + "_type", "email"); + prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? " " : eentry.getTextProperty()); + prop.put("viewMode_links_" + i + "_url", "#"); + prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true)); + prop.put("viewMode_links_" + i + "_rel", ""); + prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty()); + dark = !dark; + i++; + } + } + i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0)); i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0)); dark = (i % 2 == 0); diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index c6c7f0bf0..ba75dc62f 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -30,6 +30,7 @@ import java.awt.Rectangle; import java.awt.image.BufferedImage; import java.awt.image.Raster; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; @@ -49,6 +50,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.storage.ConcurrentARC; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.URLLicense; +import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.peers.graphics.EncodedImage; @@ -60,8 +62,11 @@ import net.yacy.server.serverSwitch; public class ViewImage { - private static Map iconcache = new ConcurrentARC(1000, - Math.max(10, Math.min(32, WorkflowProcessor.availableCPU * 2))); + private static Map iconcache = new ConcurrentARC(1000, + Math.max(10, Math.min(32, WorkflowProcessor.availableCPU * 2))); + + private static String defaulticon = "htroot/env/grafics/dfltfvcn.ico"; + private static byte[] defaulticonb = null; /** * Try parsing image from post "url" parameter or from "code" parameter. @@ -132,7 +137,7 @@ public class ViewImage { ImageInputStream imageInStream = null; InputStream inStream = null; try { - String urlExt = MultiProtocolURL.getFileExtension(url.getFileName()); + String urlExt = MultiProtocolURL.getFileExtension(url.getFileName()); if (ext != null && ext.equalsIgnoreCase(urlExt) && isBrowserRendered(urlExt)) { return openInputStream(post, sb.loader, auth, url); } @@ -149,10 +154,23 @@ public class ViewImage { // read image encodedImage = parseAndScale(post, auth, urlString, ext, imageInStream); } catch(Exception e) { - /* Exceptions are not propagated here : many error causes are possible, network errors, - * incorrect or unsupported format, bad ImageIO plugin... - * Instead return an empty EncodedImage. Caller is responsible for handling this correctly (500 status code response) */ - encodedImage = new EncodedImage(new byte[0], ext, post.getBoolean("isStatic")); + /* Exceptions are not propagated here : many error causes are possible, network errors, + * incorrect or unsupported format, bad ImageIO plugin... + * Instead return an empty EncodedImage. Caller is responsible for handling this correctly (500 status code response) */ + + if ("favicon.ico".equalsIgnoreCase(url.getFileName())) { // but on missing favicon just present a default (occures frequently by call from searchitem.html) + // currently yacysearchitem assigns "hosturl/favicon.ico" (to look for the filename should not much interfere with other situatios) + if (defaulticonb == null) { // load the default icon once + try { + defaulticonb = FileUtils.read(new File(sb.getAppPath(), defaulticon)); + } catch (final IOException initicon) { + defaulticonb = new byte[0]; + } + } + encodedImage = new EncodedImage(defaulticonb, ext, post.getBoolean("isStatic")); + } else { + encodedImage = new EncodedImage(new byte[0], ext, post.getBoolean("isStatic")); + } } finally { /* * imageInStream.close() method doesn't close source input diff --git a/htroot/Vocabulary_p.html b/htroot/Vocabulary_p.html index a956a7499..491569879 100644 --- a/htroot/Vocabulary_p.html +++ b/htroot/Vocabulary_p.html @@ -94,7 +94,7 @@ To see a list of all APIs, please visit the
      +
      @@ -144,7 +144,7 @@ To see a list of all APIs, please visit the
      #{charset}##{/charset}# -
      +
      @@ -198,7 +198,7 @@ To see a list of all APIs, please visit the
      - + #(/edit)# diff --git a/htroot/WatchWebStructure_p.html b/htroot/WatchWebStructure_p.html index 3014a750b..74a4d7924 100644 --- a/htroot/WatchWebStructure_p.html +++ b/htroot/WatchWebStructure_p.html @@ -106,7 +106,7 @@ To see a list of all APIs, please visit the
      Dot-end
      Color
      -
      +
      diff --git a/htroot/compare_yacy.html b/htroot/compare_yacy.html index 4cd62a09b..058280796 100644 --- a/htroot/compare_yacy.html +++ b/htroot/compare_yacy.html @@ -19,7 +19,7 @@
      Websearch Comparison
      -

      +

      Left Search Engine - - + + + diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index dd050b86f..0c831c5dd 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -251,8 +251,6 @@ public final class search { indexSegment, rankingProfile, header.get(HeaderFramework.USER_AGENT, ""), - false, - false, 0.0d, 0.0d, 0.0d, @@ -317,8 +315,6 @@ public final class search { sb.index, rankingProfile, header.get(HeaderFramework.USER_AGENT, ""), - false, - false, 0.0d, 0.0d, 0.0d, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 0aea12e23..caf980487 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -669,10 +669,6 @@ public class yacysearch { indexSegment, ranking, header.get(HeaderFramework.USER_AGENT, ""), - sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false) - && sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) - && sb.peers.mySeed().getFlagAcceptRemoteIndex(), - false, lat, lon, rad, sb.getConfigArray("search.navigation", "")); EventTracker.delete(EventTracker.EClass.SEARCH); diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 0a69ff421..ad4013e97 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -1,9 +1,7 @@ #(content)#::

      - - - + #[title]#

      #(heuristic)#:: diff --git a/lib/common-image-3.2.jar b/lib/common-image-3.2.1.jar similarity index 79% rename from lib/common-image-3.2.jar rename to lib/common-image-3.2.1.jar index 385c55bd5..63f9e8d8a 100644 Binary files a/lib/common-image-3.2.jar and b/lib/common-image-3.2.1.jar differ diff --git a/lib/common-io-3.2.jar b/lib/common-io-3.2.1.jar similarity index 86% rename from lib/common-io-3.2.jar rename to lib/common-io-3.2.1.jar index 950515aed..5c30bbf89 100644 Binary files a/lib/common-io-3.2.jar and b/lib/common-io-3.2.1.jar differ diff --git a/lib/common-lang-3.2.jar b/lib/common-lang-3.2.1.jar similarity index 87% rename from lib/common-lang-3.2.jar rename to lib/common-lang-3.2.1.jar index a0f9503bb..76ff63170 100644 Binary files a/lib/common-lang-3.2.jar and b/lib/common-lang-3.2.1.jar differ diff --git a/lib/imageio-bmp-3.2.jar b/lib/imageio-bmp-3.2.1.jar old mode 100755 new mode 100644 similarity index 85% rename from lib/imageio-bmp-3.2.jar rename to lib/imageio-bmp-3.2.1.jar index 1946caefe..a856b1e11 Binary files a/lib/imageio-bmp-3.2.jar and b/lib/imageio-bmp-3.2.1.jar differ diff --git a/lib/imageio-core-3.2.jar b/lib/imageio-core-3.2.1.jar similarity index 85% rename from lib/imageio-core-3.2.jar rename to lib/imageio-core-3.2.1.jar index ce9f2c2dd..60842d5ce 100644 Binary files a/lib/imageio-core-3.2.jar and b/lib/imageio-core-3.2.1.jar differ diff --git a/lib/imageio-metadata-3.2.jar b/lib/imageio-metadata-3.2.1.jar similarity index 85% rename from lib/imageio-metadata-3.2.jar rename to lib/imageio-metadata-3.2.1.jar index 302e298a1..5c788527e 100644 Binary files a/lib/imageio-metadata-3.2.jar and b/lib/imageio-metadata-3.2.1.jar differ diff --git a/lib/imageio-tiff-3.2.jar b/lib/imageio-tiff-3.2.1.jar similarity index 87% rename from lib/imageio-tiff-3.2.jar rename to lib/imageio-tiff-3.2.1.jar index 722cbc8b5..8abdd857b 100644 Binary files a/lib/imageio-tiff-3.2.jar and b/lib/imageio-tiff-3.2.1.jar differ diff --git a/locales/de.lng b/locales/de.lng index ff30a7cf5..915f1f027 100644 --- a/locales/de.lng +++ b/locales/de.lng @@ -3304,7 +3304,7 @@ Just a moment, please!==Einen Moment bitte! Application will terminate after working off all scheduled tasks.==YaCy Proxy wird beendet, nachdem alle ausstehenden Aufgaben abgearbeitet wurden. Please send us feed-back!==Bitte senden Sie uns Feedback! We don't track YaCy users, YaCy does not send 'home-pings', we do not even know how many people use YaCy as their private search engine.==Wir tracken YaCy Benutzer nicht. YaCy sendet keine 'Pings nach Hause'. Wir wissen noch nicht einmal wie viele Menschen YaCy als ihre private Suchmaschine verwenden. -Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is is possible that we change a bit to suit your needs?==Darum wollen wir Sie fragen: Mögen Sie YaCy? Werden sie es wieder verwenden ... wenn nicht ... Warum? Ist es möglich dass wir uns verändern, um uns Ihren Anforderungen anzupassen? +Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is it possible that we change a bit to suit your needs?==Darum wollen wir Sie fragen: Mögen Sie YaCy? Werden sie es wieder verwenden ... wenn nicht ... Warum? Ist es möglich dass wir uns verändern, um uns Ihren Anforderungen anzupassen? Please send us feed-back about your experience with an==Bitte senden Sie uns Ihr Feedback mit einer >anonymous message<==>anonymen Nachricht< or a<==oder einem< diff --git a/locales/fr.lng b/locales/fr.lng index 5fce4f69d..c21cf4019 100644 --- a/locales/fr.lng +++ b/locales/fr.lng @@ -1809,7 +1809,7 @@ See you soon!==À bientôt! Application will terminate after working off all scheduled tasks.==L'application se terminera après avoir achevé toutes les tâches planifiées. Please send us feed-back!==Envoyez-nous vos commentaires! We don't track YaCy users, YaCy does not send 'home-pings', we do not even know how many people use YaCy as their private search engine.==Nous ne traçons pas les utilisateurs de YaCy, YaCy n'envoie pas de "home-pings", nous ne savons même pas combien de gens utilisent YaCy comme moteur de recherche. -Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is is possible that we change a bit to suit your needs?==C'est pourquoi nous aimerions beaucoup que vous nous disiez ce que vous pensez de YaCy. Appréciez-vous YaCy? Allez-vous l'utiliser à nouveau... si non, pourquoi? Nous pouvons peut-être modifier YaCy pour qu'il réponde à vos besoins. +Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is it possible that we change a bit to suit your needs?==C'est pourquoi nous aimerions beaucoup que vous nous disiez ce que vous pensez de YaCy. Appréciez-vous YaCy? Allez-vous l'utiliser à nouveau... si non, pourquoi? Nous pouvons peut-être modifier YaCy pour qu'il réponde à vos besoins. Please send us feed-back about your experience with an==Vous pouvez nous faire parvenir vos commentaires avec un anonymous message==message anonyme or a
      ==ou un
      diff --git a/locales/ru.lng b/locales/ru.lng index a9cbdaeb1..c183e1ddd 100644 --- a/locales/ru.lng +++ b/locales/ru.lng @@ -3481,7 +3481,7 @@ Just a moment, please!==Пожалуйста, подождите! Application will terminate after working off all scheduled tasks.==Приложение завершает свою работу. Please send us feed-back!==Пожалуйста, отправляйте нам отзывы! We don't track YaCy users, YaCy does not send 'home-pings', we do not even know how many people use YaCy as their private search engine.==Мы не следим за пользователями YaCy. Мы даже не знаем как много людей пользуются поиском YaCy для личных целей. -Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is is possible that we change a bit to suit your needs?==Спросите себя: за что вам нравится YaCy? Вы будете использовать его снова... или нет... почему? +Therefore we like to ask you: do you like YaCy? Will you use it again... if not, why? Is it possible that we change a bit to suit your needs?==Спросите себя: за что вам нравится YaCy? Вы будете использовать его снова... или нет... почему? Please send us feed-back about your experience with an==Пожалуйста, отправьте нам отзыв о вашем опыте использования >anonymous message<==>анонимно< or a<==или< diff --git a/nbproject/project.xml b/nbproject/project.xml index 8913367df..464f57cf5 100644 --- a/nbproject/project.xml +++ b/nbproject/project.xml @@ -83,7 +83,7 @@ source htroot - lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/common-image-3.2.jar;lib/common-io-3.2.jar;lib/common-lang-3.2.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.10.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.4.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.10.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.5.1.jar;lib/httpcore-4.4.4.jar;lib/httpmime-4.5.1.jar;lib/icu4j-56_1.jar;lib/imageio-core-3.2.jar;lib/imageio-metadata-3.2.jar;lib/imageio-tiff-3.2.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.13.jar;lib/jempbox-1.8.10.jar;lib/jetty-client-9.2.14.v20151106.jar;lib/jetty-continuation-9.2.14.v20151106.jar;lib/jetty-deploy-9.2.14.v20151106.jar;lib/jetty-http-9.2.14.v20151106.jar;lib/jetty-io-9.2.14.v20151106.jar;lib/jetty-jmx-9.2.14.v20151106.jar;lib/jetty-proxy-9.2.14.v20151106.jar;lib/jetty-security-9.2.14.v20151106.jar;lib/jetty-server-9.2.14.v20151106.jar;lib/jetty-servlet-9.2.14.v20151106.jar;lib/jetty-servlets-9.2.14.v20151106.jar;lib/jetty-util-9.2.14.v20151106.jar;lib/jetty-webapp-9.2.14.v20151106.jar;lib/jetty-xml-9.2.14.v20151106.jar;lib/jsch-0.1.53.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.3.jar;lib/log4j-over-slf4j-1.7.13.jar;lib/lucene-analyzers-common-5.3.1.jar;lib/lucene-analyzers-phonetic-5.3.1.jar;lib/lucene-backward-codecs-5.3.1.jar;lib/lucene-classification-5.3.1.jar;lib/lucene-codecs-5.3.1.jar;lib/lucene-core-5.3.1.jar;lib/lucene-facet-5.3.1.jar;lib/lucene-grouping-5.3.1.jar;lib/lucene-highlighter-5.3.1.jar;lib/lucene-join-5.3.1.jar;lib/lucene-memory-5.3.1.jar;lib/lucene-misc-5.3.1.jar;lib/lucene-queries-5.3.1.jar;lib/lucene-queryparser-5.3.1.jar;lib/lucene-spatial-5.3.1.jar;lib/lucene-suggest-5.3.1.jar;lib/metadata-extractor-2.8.1.jar;lib/noggit-0.6.jar;lib/org.restlet.jar;lib/pdfbox-1.8.10.jar;lib/poi-3.13-20150929.jar;lib/poi-scratchpad-3.13-20150929.jar;lib/slf4j-api-1.7.13.jar;lib/slf4j-jdk14-1.7.13.jar;lib/solr-core-5.3.1.jar;lib/solr-solrj-5.3.1.jar;lib/spatial4j-0.4.1.jar;lib/stax2-api-3.1.4.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.3.jar;lib/woodstox-core-asl-4.4.1.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar + lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/common-image-3.2.1.jar;lib/common-io-3.2.1.jar;lib/common-lang-3.2.1.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.10.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.4.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.10.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.5.1.jar;lib/httpcore-4.4.4.jar;lib/httpmime-4.5.1.jar;lib/icu4j-56_1.jar;lib/imageio-bmp-3.2.1.jar;lib/imageio-core-3.2.1.jar;lib/imageio-metadata-3.2.1.jar;lib/imageio-tiff-3.2.1.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.13.jar;lib/jempbox-1.8.10.jar;lib/jetty-client-9.2.14.v20151106.jar;lib/jetty-continuation-9.2.14.v20151106.jar;lib/jetty-deploy-9.2.14.v20151106.jar;lib/jetty-http-9.2.14.v20151106.jar;lib/jetty-io-9.2.14.v20151106.jar;lib/jetty-jmx-9.2.14.v20151106.jar;lib/jetty-proxy-9.2.14.v20151106.jar;lib/jetty-security-9.2.14.v20151106.jar;lib/jetty-server-9.2.14.v20151106.jar;lib/jetty-servlet-9.2.14.v20151106.jar;lib/jetty-servlets-9.2.14.v20151106.jar;lib/jetty-util-9.2.14.v20151106.jar;lib/jetty-webapp-9.2.14.v20151106.jar;lib/jetty-xml-9.2.14.v20151106.jar;lib/jsch-0.1.53.jar;lib/json-simple-1.1.1.jar;lib/jsonic-1.2.0.jar;lib/jsoup-1.8.3.jar;lib/langdetect.jar;lib/log4j-over-slf4j-1.7.13.jar;lib/lucene-analyzers-common-5.3.1.jar;lib/lucene-analyzers-phonetic-5.3.1.jar;lib/lucene-backward-codecs-5.3.1.jar;lib/lucene-classification-5.3.1.jar;lib/lucene-codecs-5.3.1.jar;lib/lucene-core-5.3.1.jar;lib/lucene-facet-5.3.1.jar;lib/lucene-grouping-5.3.1.jar;lib/lucene-highlighter-5.3.1.jar;lib/lucene-join-5.3.1.jar;lib/lucene-memory-5.3.1.jar;lib/lucene-misc-5.3.1.jar;lib/lucene-queries-5.3.1.jar;lib/lucene-queryparser-5.3.1.jar;lib/lucene-spatial-5.3.1.jar;lib/lucene-suggest-5.3.1.jar;lib/metadata-extractor-2.8.1.jar;lib/noggit-0.6.jar;lib/org.restlet.jar;lib/pdfbox-1.8.10.jar;lib/poi-3.13-20150929.jar;lib/poi-scratchpad-3.13-20150929.jar;lib/slf4j-api-1.7.13.jar;lib/slf4j-jdk14-1.7.13.jar;lib/solr-core-5.3.1.jar;lib/solr-solrj-5.3.1.jar;lib/spatial4j-0.4.1.jar;lib/stax2-api-3.1.4.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.3.jar;lib/woodstox-core-asl-4.4.1.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar lib/yacycore.jar 1.7 diff --git a/pom.xml b/pom.xml index 776e920a3..47c6447a7 100644 --- a/pom.xml +++ b/pom.xml @@ -213,7 +213,7 @@ org.codehaus.mojo templating-maven-plugin - 1.0-alpha-3 + 1.0.0 filter-src @@ -340,11 +340,6 @@ test - - javax.activation - activation - 1.1.1 - org.apache.james apache-mime4j @@ -433,12 +428,12 @@ com.twelvemonkeys.imageio imageio-bmp - 3.2 + 3.2.1 com.twelvemonkeys.imageio imageio-tiff - 3.2 + 3.2.1 org diff --git a/source/net/yacy/cora/document/id/AnchorURL.java b/source/net/yacy/cora/document/id/AnchorURL.java index 50da8269c..c39a37fe8 100644 --- a/source/net/yacy/cora/document/id/AnchorURL.java +++ b/source/net/yacy/cora/document/id/AnchorURL.java @@ -102,8 +102,8 @@ public class AnchorURL extends DigestURL { isHTTPS(relPath) || isFTP(relPath) || isFile(relPath) || - isSMB(relPath)/*|| - relPath.contains(":") && patternMail.matcher(relPath.toLowerCase()).find()*/) { + isSMB(relPath) || + relPath.startsWith("mailto:")) { return new AnchorURL(relPath); } return new AnchorURL(baseURL, relPath); diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index a35888020..24907b8d3 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -202,9 +202,9 @@ public class MultiProtocolURL implements Serializable, Comparable 7 && url.substring(0,7).equalsIgnoreCase("mailto:")) { p = 6; } else { url = "http://" + url; @@ -258,7 +258,7 @@ public class MultiProtocolURL implements Serializable, Comparable. - */ - -package net.yacy.cora.federate.solr.connector; - -import java.io.IOException; -import java.util.Collection; -import java.util.LinkedHashMap; - -import net.yacy.cora.sorting.ReversibleScoreMap; -import net.yacy.cora.storage.ARC; -import net.yacy.cora.storage.ConcurrentARC; -import net.yacy.kelondro.data.word.Word; -import net.yacy.kelondro.util.MemoryControl; -import net.yacy.search.schema.CollectionSchema; - -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.client.solrj.util.ClientUtils; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.ModifiableSolrParams; - -public class CachedSolrConnector extends AbstractSolrConnector implements SolrConnector { - - private final static Object EXIST = new Object(); - - private SolrConnector solr; - private final ARC documentCache; - public final ARC hitCache, missCache; - public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write - public long hitCache_Hit = 0, hitCache_Miss = 0, hitCache_Insert = 0; // for statistics only; do not write - public long missCache_Hit = 0, missCache_Miss = 0, missCache_Insert = 0; // for statistics only; do not write - - private static final String idQuery(String id) { - return CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\""; - } - - public CachedSolrConnector(SolrConnector c, int hitCacheMax, int missCacheMax, int docCacheMax) { - this.solr = c; - int partitions = Runtime.getRuntime().availableProcessors() * 2; - this.documentCache = new ConcurrentARC(docCacheMax, partitions); - this.hitCache = new ConcurrentARC(hitCacheMax, partitions); - this.missCache = new ConcurrentARC(missCacheMax, partitions); - } - - @Override - public int bufferSize() { - return solr.bufferSize(); - } - - @Override - public void clearCaches() { - this.hitCache.clear(); - this.missCache.clear(); - this.documentCache.clear(); - if (this.solr != null) this.solr.commit(true); - } - - @Override - public boolean isClosed() { - return this.solr == null || this.solr.isClosed(); - } - - @Override - protected void finalize() throws Throwable { - this.close(); - } - - @Override - public synchronized void close() { - this.clearCaches(); - if (this.solr != null) this.solr.close(); - this.solr = null; - } - - /** - * delete everything in the solr index - * @throws IOException - */ - @Override - public void clear() throws IOException { - this.clearCaches(); - if (this.solr != null) this.solr.clear(); - } - - /** - * delete an entry from solr - * @param id the url hash of the entry - * @throws IOException - */ - @Override - public void deleteById(final String id) throws IOException { - String q = idQuery(id); - this.documentCache.remove(q); - this.hitCache.remove(q); - this.missCache.put(q, EXIST); - this.missCache_Insert++; - if (this.solr != null) this.solr.deleteByQuery(q); - } - - /** - * delete a set of entries from solr; entries are identified by their url hash - * @param ids a list of url hashes - * @throws IOException - */ - @Override - public void deleteByIds(final Collection ids) throws IOException { - for (String id: ids) { - String q = idQuery(id); - this.documentCache.remove(q); - this.hitCache.remove(q); - this.missCache.put(q, EXIST); - this.missCache_Insert++; - } - if (this.solr != null) this.solr.deleteByIds(ids); - } - - @Override - public void deleteByQuery(final String querystring) throws IOException { - this.clearCaches(); - this.solr.deleteByQuery(querystring); - } - - @Override - public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException { - assert id.length() == Word.commonHashLength : "wrong id: " + id; - String q = idQuery(id); - SolrDocument doc = fields.length == 0 ? this.documentCache.get(q) : null; - if (doc != null) { - this.documentCache_Hit++; - return doc; - } - documentCache_Miss++; - if (this.missCache.containsKey(q)) { - this.missCache_Hit++; - return null; - } - this.missCache_Miss++; - if (solr != null && ((doc = solr.getDocumentById(id, fields)) != null)) { - addToCache(doc, fields.length == 0); - return doc; - } - // check if there is a autocommit problem - if (this.hitCache.containsKey(q)) { - // the document should be there, therefore make a commit and check again - if (solr != null && ((doc = solr.getDocumentById(id, fields)) != null)) { - addToCache(doc, fields.length == 0); - } - } - this.missCache.put(q, EXIST); - this.missCache_Insert++; - return null; - } - - /** - * add a Solr document - * @param solrdoc - * @throws IOException - */ - @Override - public void add(final SolrInputDocument solrdoc) throws IOException { - String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()); - assert id != null; - if (id == null) return; - String q = idQuery(id); - SolrDocument doc = ClientUtils.toSolrDocument(solrdoc); - addToCache(doc, true); - this.documentCache.put(q, doc); - this.documentCache_Insert++; - if (this.solr != null) this.solr.add(solrdoc); - } - - @Override - public void add(final Collection solrdocs) throws IOException, SolrException { - for (SolrInputDocument solrdoc: solrdocs) { - String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()); - assert id != null; - if (id == null) continue; - String q = idQuery(id); - SolrDocument doc = ClientUtils.toSolrDocument(solrdoc); - addToCache(doc, true); - this.documentCache.put(q, doc); - this.documentCache_Insert++; - } - if (this.solr != null) this.solr.add(solrdocs); - } - - /** - * get a query result from solr - * to get all results set the query String to "*:*" - * @param querystring - * @throws IOException - */ - @Override - public SolrDocumentList getDocumentListByQuery(final String querystring, final String sort, final int offset, final int count, final String ... fields) throws IOException { - if (offset == 0 && count == 1 && querystring.startsWith("id:") && - ((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') || - querystring.length() == 15)) { - final SolrDocumentList list = new SolrDocumentList(); - SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields); - list.add(doc); - // no addToCache(list) here because that was already handlet in get(); - return list; - } - if (this.solr != null) { - SolrDocumentList list = this.solr.getDocumentListByQuery(querystring, sort, offset, count, fields); - addToCache(list, fields.length == 0); - return list; - } - - // combine both lists - SolrDocumentList list; - list = this.solr.getDocumentListByQuery(querystring, sort, offset, count, fields); - - // add caching - addToCache(list, fields.length == 0); - return list; - } - - @Override - public QueryResponse getResponseByParams(ModifiableSolrParams query) throws IOException, SolrException { - QueryResponse list = this.solr.getResponseByParams(query); - return list; - } - - @Override - public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException { - SolrDocumentList sdl = this.solr.getDocumentListByParams(params); - return sdl; - } - - @Override - public long getCountByQuery(final String querystring) throws IOException { - return this.solr.getCountByQuery(querystring); - } - - @Override - public LinkedHashMap> getFacets(final String query, final int maxresults, final String ... fields) throws IOException { - return this.solr.getFacets(query, maxresults, fields); - } - - private void addToCache(SolrDocumentList list, boolean doccache) { - if (MemoryControl.shortStatus()) clearCaches(); - for (final SolrDocument solrdoc: list) { - addToCache(solrdoc, doccache); - } - } - - private void addToCache(SolrDocument doc, boolean doccach) { - String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()); - String q = idQuery(id); - this.missCache.remove(q); - this.hitCache.put(q, EXIST); - this.hitCache_Insert++; - if (doccach) { - this.documentCache.put(q, doc); - this.documentCache_Insert++; - } - } - - @Override - public long getSize() { - long s = this.solr.getSize(); - return Math.max(this.documentCache.size(), Math.max(this.hitCache.size(), s)); // this might be incorrect if there are other requests than "id:.." in the cache - } - - public int nameCacheHitSize() { - return this.hitCache.size(); - } - - public int nameCacheMissSize() { - return this.missCache.size(); - } - - public int nameCacheDocumentSize() { - return this.documentCache.size(); - } - - @Override - public void commit(boolean softCommit) { - this.solr.commit(softCommit); - } - - @Override - public void optimize(int maxSegments) { - this.solr.optimize(maxSegments); - } - - @Override - public int getSegmentCount() { - return this.solr.getSegmentCount(); - } - -} \ No newline at end of file diff --git a/source/net/yacy/cora/protocol/HeaderFramework.java b/source/net/yacy/cora/protocol/HeaderFramework.java index d4f56850a..2aef01f6e 100644 --- a/source/net/yacy/cora/protocol/HeaderFramework.java +++ b/source/net/yacy/cora/protocol/HeaderFramework.java @@ -413,13 +413,24 @@ public class HeaderFramework extends TreeMap implements Map 0) { + return tmpstr.substring(0, pos).trim(); + } else { + return tmpstr; + } } /* @@ -429,7 +440,7 @@ public class HeaderFramework extends TreeMap implements Map implements Map metadata = meta.loadMetadata(); + final String urls = document.dc_identifier(); + bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls); + if (!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) { + bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false"); + bmk_entry.put(YMarkEntry.BOOKMARK.TITLE.key(), metadata.get(YMarkMetadata.METADATA.TITLE)); + bmk_entry.put(YMarkEntry.BOOKMARK.DESC.key(), metadata.get(YMarkMetadata.METADATA.DESCRIPTION)); + } + final String fs = YMarkUtil.cleanFoldersString(foldersString); + if (fs.isEmpty()) + bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkEntry.BOOKMARK.FOLDERS.deflt()); + else + bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), fs); + final StringBuilder strb = new StringBuilder(); + if (autotag) { + final String autotags = YMarkAutoTagger.autoTag(document, 3, this.worktables.bookmarks.getTags(bmk_user)); + strb.append(autotags); + } + if (!tagsString.isEmpty()) { + strb.append(YMarkUtil.TAGS_SEPARATOR); + strb.append(tagsString); + } + bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(strb.toString())); + this.worktables.bookmarks.addBookmark(bmk_user, bmk_entry, true, true); + } + } + public boolean hasBookmark(final String bmk_user, final String urlhash) { final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); try { diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 54dcf21c3..265cbf45a 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -83,9 +83,9 @@ public class Document { // the anchors and images - Maps are URL-to-EntityDescription mappings. // The EntityDescription appear either as visible text in anchors or as alternative // text in image tags. - private LinkedHashMap audiolinks, videolinks, applinks, hyperlinks; + private LinkedHashMap audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed private LinkedHashMap inboundlinks, outboundlinks; - private Map emaillinks; + private Set emaillinks; // mailto: links private MultiProtocolURL favicon; private boolean resorted; private final Set languages; @@ -430,6 +430,11 @@ dc_rights return sentences; } + /** + * All anchor links of the document + * (this includes mailto links) + * @return all links embedded as anchors (clickeable entities) + */ public Collection getAnchors() { // returns all links embedded as anchors (clickeable entities) // this is a url(String)/text(String) map @@ -445,6 +450,11 @@ dc_rights // the next three methods provide a calculated view on the getAnchors/getImages: + /** + * List of links to resources (pages, images, files, media ...) + * (Hyperlinks do not include mailto: links) + * @return a subset of the getAnchor-set: only links to other hyperrefs + */ public Map getHyperlinks() { // this is a subset of the getAnchor-set: only links to other hyperrefs if (!this.resorted) resortLinks(); @@ -473,7 +483,10 @@ dc_rights return this.applinks; } - public Map getEmaillinks() { + /** + * @return mailto links + */ + public Set getEmaillinks() { // this is part of the getAnchor-set: only links to email addresses if (!this.resorted) resortLinks(); return this.emaillinks; @@ -491,6 +504,9 @@ dc_rights return this.lat; } + /** + * sorts all links (anchors) into individual collections + */ private void resortLinks() { if (this.resorted) return; synchronized (this) { @@ -506,13 +522,21 @@ dc_rights this.videolinks = new LinkedHashMap(); this.audiolinks = new LinkedHashMap(); this.applinks = new LinkedHashMap(); - this.emaillinks = new LinkedHashMap(); + this.emaillinks = new LinkedHashSet(); final Map collectedImages = new HashMap(); // this is a set that is collected now and joined later to the imagelinks for (final Map.Entry entry: this.images.entrySet()) { if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image"); } for (final AnchorURL url: this.anchors) { if (url == null) continue; + u = url.toNormalform(true); + final String name = url.getNameProperty(); + // check mailto scheme first (not suppose to get into in/outboundlinks or hyperlinks -> crawler can't process) + if (url.getProtocol().equals("mailto")) { + this.emaillinks.add(url); + continue; + } + final boolean noindex = url.getRelProperty().toLowerCase().indexOf("noindex",0) >= 0; final boolean nofollow = url.getRelProperty().toLowerCase().indexOf("nofollow",0) >= 0; if ((thishost == null && url.getHost() == null) || @@ -523,31 +547,24 @@ dc_rights } else { this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : "")); } - u = url.toNormalform(true); - final String name = url.getNameProperty(); - if (u.startsWith("mailto:")) { - this.emaillinks.put(u.substring(7), name); - } else { - extpos = u.lastIndexOf('.'); - if (extpos > 0) { - if (((qpos = u.indexOf('?')) >= 0) && (qpos > extpos)) { - ext = u.substring(extpos + 1, qpos).toLowerCase(); - } else { - ext = u.substring(extpos + 1).toLowerCase(); - } - if (Classification.isMediaExtension(ext)) { - // this is not a normal anchor, its a media link - if (Classification.isImageExtension(ext)) { - collectedImages.put(url, new ImageEntry(url, name, -1, -1, -1)); - } - else if (Classification.isAudioExtension(ext)) this.audiolinks.put(url, name); - else if (Classification.isVideoExtension(ext)) this.videolinks.put(url, name); - else if (Classification.isApplicationExtension(ext)) this.applinks.put(url, name); - } + extpos = u.lastIndexOf('.'); + if (extpos > 0) { + if (((qpos = u.indexOf('?')) >= 0) && (qpos > extpos)) { + ext = u.substring(extpos + 1, qpos).toLowerCase(); + } else { + ext = u.substring(extpos + 1).toLowerCase(); + } + if (Classification.isMediaExtension(ext)) { + // this is not a normal anchor, its a media link + if (Classification.isImageExtension(ext)) { // TODO: guess on a-tag href extension (may not be correct) + collectedImages.put(url, new ImageEntry(url, name, -1, -1, -1)); + } else if (Classification.isAudioExtension(ext)) this.audiolinks.put(url, name); + else if (Classification.isVideoExtension(ext)) this.videolinks.put(url, name); + else if (Classification.isApplicationExtension(ext)) this.applinks.put(url, name); } - // in any case we consider this as a link and let the parser decide if that link can be followed - this.hyperlinks.put(url, name); } + // in any case we consider this as a link and let the parser decide if that link can be followed + this.hyperlinks.put(url, name); } // add image links that we collected from the anchors to the image map @@ -761,7 +778,7 @@ dc_rights return this.crawldepth; } - public void writeXML(final Writer os, final Date date) throws IOException { + public void writeXML(final Writer os) throws IOException { os.write("\n"); final String title = dc_title(); if (title != null && title.length() > 0) os.write("\n"); @@ -779,7 +796,7 @@ dc_rights } final String language = dc_language(); if (language != null && language.length() > 0) os.write("" + dc_language() + "\n"); - os.write("" + ISO8601Formatter.FORMATTER.format(date) + "\n"); + os.write("" + ISO8601Formatter.FORMATTER.format(getLastModified()) + "\n"); if (this.lon != 0.0 && this.lat != 0.0) os.write("" + this.lon +"" + this.lat + "\n"); os.write("\n"); } @@ -789,7 +806,7 @@ dc_rights final ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { final Writer osw = new OutputStreamWriter(baos, "UTF-8"); - writeXML(osw, this.lastModified); + writeXML(osw); osw.close(); return UTF8.String(baos.toByteArray()); } catch (final UnsupportedEncodingException e1) { diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java index 153900b4e..cc90196dc 100644 --- a/source/net/yacy/document/content/DCEntry.java +++ b/source/net/yacy/document/content/DCEntry.java @@ -353,7 +353,7 @@ public class DCEntry extends MultiMapSolrParams { public void writeXML(OutputStreamWriter os) throws IOException { Document doc = document(); if (doc != null) { - doc.writeXML(os, this.getDate()); + doc.writeXML(os); } } } diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java index ba349c2bb..3a0b0c81c 100644 --- a/source/net/yacy/document/importer/MediawikiImporter.java +++ b/source/net/yacy/document/importer/MediawikiImporter.java @@ -523,7 +523,7 @@ public class MediawikiImporter extends Thread implements Importer { } } public void writeXML(final OutputStreamWriter os) throws IOException { - this.document.writeXML(os, new Date()); + this.document.writeXML(os); } } @@ -710,7 +710,7 @@ public class MediawikiImporter extends Thread implements Importer { this.osw.write("\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n"); } ConcurrentLog.info("WIKITRANSLATION", "[CONSUME] Title: " + record.title); - record.document.writeXML(this.osw, new Date()); + record.document.writeXML(this.osw); this.rc++; if (this.rc >= 10000) { this.osw.write("\n"); diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 6bd73f704..30310e017 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -475,7 +475,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { final String type = tag.opts.getProperty("type", EMPTY_STRING); final String hreflang = tag.opts.getProperty("hreflang", EMPTY_STRING); - if (rel.equalsIgnoreCase("shortcut icon")) { + if (rel.equalsIgnoreCase("shortcut icon") || rel.equalsIgnoreCase("icon")) { // html5 -> rel="icon") final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1); this.images.add(ie); this.favicon = newLink; diff --git a/source/net/yacy/document/parser/ooxmlParser.java b/source/net/yacy/document/parser/ooxmlParser.java index 596b54bdb..c119f642a 100644 --- a/source/net/yacy/document/parser/ooxmlParser.java +++ b/source/net/yacy/document/parser/ooxmlParser.java @@ -178,10 +178,10 @@ public class ooxmlParser extends AbstractParser implements Parser { docKeywords, singleList(docLongTitle), docAuthor, - "", + null, null, descriptions, - 0.0f, 0.0f, + 0.0d, 0.0d, contentBytes, null, null, diff --git a/source/net/yacy/http/servlets/UrlProxyServlet.java b/source/net/yacy/http/servlets/UrlProxyServlet.java index 034c02232..85e00aa62 100644 --- a/source/net/yacy/http/servlets/UrlProxyServlet.java +++ b/source/net/yacy/http/servlets/UrlProxyServlet.java @@ -8,7 +8,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.util.HashMap; -import java.util.StringTokenizer; +import java.util.regex.PatternSyntaxException; import javax.servlet.Servlet; import javax.servlet.ServletConfig; import javax.servlet.ServletException; @@ -122,22 +122,18 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet { } // 2 - get target url URL proxyurl = null; - String strARGS = request.getQueryString(); - if (strARGS == null) { + final String strUrl = request.getParameter("url"); + if (strUrl == null) { response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); return; } - if (strARGS.startsWith("url=")) { - final String strUrl = strARGS.substring(4); // strip "url=" - - try { - proxyurl = new URL(strUrl); - } catch (final MalformedURLException e) { - proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name())); - - } + try { + proxyurl = new URL(strUrl); + } catch (final MalformedURLException e) { + proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name())); } + if (proxyurl == null) { response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); return; @@ -334,16 +330,17 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet { private boolean proxyippatternmatch(final String key) { // the cfgippattern is a comma-separated list of patterns // each pattern may contain one wildcard-character '*' which matches anything - final String cfgippattern = Switchboard.getSwitchboard().getConfig("proxyURL.access", "*"); - if (cfgippattern.equals("*")) { + final String[] cfgippattern = Switchboard.getSwitchboard().getConfigArray("proxyURL.access", "*"); + if (cfgippattern[0].equals("*")) { return true; } - final StringTokenizer st = new StringTokenizer(cfgippattern, ","); - String pattern; - while (st.hasMoreTokens()) { - pattern = st.nextToken(); - if (key.matches(pattern)) { - return true; + for (String pattern : cfgippattern) { + try { + if (key.matches(pattern)) { + return true; + } + } catch (PatternSyntaxException ex) { + this._log.warn("wrong ip pattern in url proxy config", ex.getMessage() ); } } return false; diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index 1606b6650..6fdbdf645 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -794,9 +794,9 @@ public class YaCyDefaultServlet extends HttpServlet { long now = System.currentTimeMillis(); response.setDateHeader(HeaderFramework.LAST_MODIFIED, now); if (target.endsWith(".css")) { - response.setDateHeader(HeaderFramework.EXPIRES, now + 4000); // expires in 4 seconds (which is still too often) + response.setDateHeader(HeaderFramework.EXPIRES, now + 3600000); // expires in 1 hour (which is still often, others use 1 week, month or year) } else if (target.endsWith(".png")) { - response.setDateHeader(HeaderFramework.EXPIRES, now + 1000); // expires in 1 seconds (reduce heavy image creation load) + response.setDateHeader(HeaderFramework.EXPIRES, now + 60000); // expires in 1 minute (reduce heavy image creation load) } else { response.setDateHeader(HeaderFramework.EXPIRES, now); // expires now } @@ -858,8 +858,8 @@ public class YaCyDefaultServlet extends HttpServlet { result.close(); return; } - if (yp.isStatic()) { - response.setDateHeader(HeaderFramework.EXPIRES, now + 600000); // expires in ten minutes + if (yp.isStatic()) { // static image never expires + response.setDateHeader(HeaderFramework.EXPIRES, now + 3600000); // expires in 1 hour } } else if (tmp instanceof Image) { final Image i = (Image) tmp; diff --git a/source/net/yacy/http/servlets/YaCyProxyServlet.java b/source/net/yacy/http/servlets/YaCyProxyServlet.java index fed2e3da5..690debcfe 100644 --- a/source/net/yacy/http/servlets/YaCyProxyServlet.java +++ b/source/net/yacy/http/servlets/YaCyProxyServlet.java @@ -172,7 +172,7 @@ public class YaCyProxyServlet extends ProxyServlet implements Servlet { response.setContentType(mimeType); response.setStatus(httpStatus); - if ((mimeType != null) && (mimeType.startsWith("text/html") || mimeType.startsWith("text"))) { + if ((mimeType != null) && (mimeType.startsWith("text"))) { final StringWriter buffer = new StringWriter(); if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index b4f572967..efc5cdb4b 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -171,9 +171,21 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable tags=ListManager.string2set(BookmarkHelper.cleanTagsString(post.get("bookmarkFolder","/crawlStart"))); final Set tags=ListManager.string2set(BookmarkHelper.cleanTagsString("/crawlStart")); tags.add("crawlStart"); final Set keywords = scraper.dc_subject(); @@ -3238,8 +3237,10 @@ public final class Switchboard extends serverSwitch { if (kk.length() > 0) tags.add(kk); } } - String tagStr = tags.toString(); - if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2); + + // TODO: what to do with the result ? + //String tagStr = tags.toString(); + //if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2); // we will create always a bookmark to use this to track crawled hosts final BookmarksDB.Bookmark bookmark = this.bookmarksDB.createorgetBookmark(url.toNormalform(true), "admin"); @@ -3254,7 +3255,7 @@ public final class Switchboard extends serverSwitch { // do the same for ymarks // TODO: could a non admin user add crawls? try { - this.tables.bookmarks.createBookmark(this.loader, url, profile.getAgent(), YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start"); + this.tables.bookmarks.createBookmark(scraper, YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start"); } catch (final IOException e) { ConcurrentLog.logException(e); } catch (final Failure e) { diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index e61b875bd..0e0797145 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -141,7 +141,6 @@ public final class QueryParams { public int transmitcount; // number of results that had been shown to the user public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets public final String userAgent; - protected boolean filterfailurls, filterscannerfail; protected double lat, lon, radius; public LinkedHashSet facetfields; private SolrQuery cachedQuery; @@ -173,8 +172,6 @@ public final class QueryParams { final Segment indexSegment, final RankingProfile ranking, final String userAgent, - final boolean filterfailurls, - final boolean filterscannerfail, final double lat, final double lon, final double radius, @@ -241,8 +238,6 @@ public final class QueryParams { this.indexSegment = indexSegment; this.userAgent = userAgent; this.transmitcount = 0; - this.filterfailurls = filterfailurls; - this.filterscannerfail = filterscannerfail; // we normalize here the location and radius because that should cause a better caching // and as surplus it will increase privacy this.lat = Math.floor(lat * this.kmNormal) / this.kmNormal; diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index a93b112fa..1f98798c9 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -58,7 +58,6 @@ import net.yacy.cora.federate.yacy.Distribution; import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.order.Base64Order; import net.yacy.cora.protocol.Domains; -import net.yacy.cora.protocol.Scanner; import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; @@ -1176,7 +1175,6 @@ public final class SearchEvent { } // check modifier constraint (language) - // TODO: : page.language() never null but defaults to "en" (may cause false drop of result) if (this.query.modifier.language != null && !this.query.modifier.language.equals(page.language())) { if (log.isFine()) log.fine("dropped RWI: language constraint = " + this.query.modifier.language); if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); @@ -1266,14 +1264,6 @@ public final class SearchEvent { continue; } } - - // check Scanner - if (this.query.filterscannerfail && !Scanner.acceptURL(page.url())) { - if (log.isFine()) log.fine("dropped RWI: url not accepted by scanner"); - if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); - continue; - } - // check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field} // TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL) diff --git a/source/net/yacy/search/query/SearchEventCache.java b/source/net/yacy/search/query/SearchEventCache.java index bdae26b28..5fb5ff671 100644 --- a/source/net/yacy/search/query/SearchEventCache.java +++ b/source/net/yacy/search/query/SearchEventCache.java @@ -170,7 +170,8 @@ public class SearchEventCache { // start a new event Switchboard sb = Switchboard.getSwitchboard(); - final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true); + final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true) + || (sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) && sb.peers.mySeed().getFlagAcceptRemoteIndex()); final boolean addToLocalIdx = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true); event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete, addToLocalIdx); MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 047aee7a0..7094a1a51 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -965,7 +965,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri Boolean canonical_equal_sku = canonical == null ? null : canonical.toNormalform(true).equals(url); if (webgraph != null && (!containsCanonical || (canonical_equal_sku != null && (canonical_equal_sku.booleanValue())))) { // a document with canonical tag should not get a webgraph relation, because that belongs to the canonical document - List edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, processTypes, document.getAnchors(), sourceName); + List edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, processTypes, document.getHyperlinks().keySet(), sourceName); // this also enriched the subgraph doc.webgraphDocuments.addAll(edges); } else { @@ -976,7 +976,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri contains(CollectionSchema.outboundlinks_protocol_sxt) || contains(CollectionSchema.outboundlinks_urlstub_sxt) || contains(CollectionSchema.outboundlinks_anchortext_txt)) { - for (final AnchorURL target_url: document.getAnchors()) { + for (final AnchorURL target_url: document.getHyperlinks().keySet()) { enrichSubgraph(subgraph, digestURL, target_url); } } diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index fe46a700e..b97b6be2f 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -1187,9 +1187,6 @@ public final class HTTPDProxyHandler { if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { final HeaderFramework proxyRespondHeader = (HeaderFramework) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER); mime = proxyRespondHeader.mime(); - if (mime.indexOf(';') != -1) { - mime = mime.substring(0,mime.indexOf(';')); - } } logMessage.append(mime); diff --git a/test/net/yacy/cora/document/id/MultiProtocolURLTest.java b/test/net/yacy/cora/document/id/MultiProtocolURLTest.java index 94d0d9118..b3c3abf14 100644 --- a/test/net/yacy/cora/document/id/MultiProtocolURLTest.java +++ b/test/net/yacy/cora/document/id/MultiProtocolURLTest.java @@ -144,6 +144,28 @@ public class MultiProtocolURLTest { } } + /** + * Test getProtocol() + */ + @Test + public void testGetProtocol() throws MalformedURLException { + Map testurls = new HashMap(); + // ( 1. parameter = urlstring to test, 2. parameter = expected protocol) + testurls.put("http://host.com", "http"); + testurls.put("HTTPS://host.com", "https"); + testurls.put("Ftp://host.com", "ftp"); + testurls.put("SMB://host.com", "smb"); + testurls.put("/file.com", "file"); + testurls.put("file://host.com/file.com", "file"); + testurls.put("MailTo:Abc@host.com", "mailto"); + + for (String txt : testurls.keySet()) { + MultiProtocolURL url = new MultiProtocolURL(txt); + assertEquals("test " + txt, url.getProtocol(), testurls.get(txt)); + + } + } + /** * Test of toNormalform method, of class MultiProtocolURL. */ diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java index 2d7ad5b73..d18a38c99 100644 --- a/test/net/yacy/document/parser/htmlParserTest.java +++ b/test/net/yacy/document/parser/htmlParserTest.java @@ -93,7 +93,7 @@ public class htmlParserTest extends TestCase { // test link with inline html in text // expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt final AnchorURL url = new AnchorURL("http://localhost/"); - final String mimetype = "text/html"; + final String charset = "UTF-8"; final String testhtml = "" + "testtext" // "testtext" + " Start" // "Start" @@ -101,7 +101,7 @@ public class htmlParserTest extends TestCase { + "
      \"image" // + img width 550 (+html5 figure) + ""; - ContentScraper scraper = parseToScraper(url, mimetype, new VocabularyScraper(), 0, testhtml, 10); + ContentScraper scraper = parseToScraper(url, charset, new VocabularyScraper(), 0, testhtml, 10); List anchorlist = scraper.getAnchors(); String linktxt = anchorlist.get(0).getTextProperty(); @@ -126,7 +126,7 @@ public class htmlParserTest extends TestCase { @Test public void testParseToScraper_TagTest() throws Exception { final AnchorURL url = new AnchorURL("http://localhost/"); - final String mimetype = "text/html"; + final String charset = "UTF-8"; final String textSource = "test text"; final String testhtml = "" + "" @@ -134,7 +134,7 @@ public class htmlParserTest extends TestCase { + "

      " + textSource + "

      " + ""; - ContentScraper scraper = parseToScraper(url, mimetype, new VocabularyScraper(), 0, testhtml, 10); + ContentScraper scraper = parseToScraper(url, charset, new VocabularyScraper(), 0, testhtml, 10); String txt = scraper.getText(); System.out.println("ScraperTagTest: [" + textSource + "] = [" + txt + "]");