From af39a76bf6098e2a5fe2a2276a991074a965851d Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 29 Oct 2016 04:19:46 +0200 Subject: [PATCH] Reduce number of default max. search navigator lines (from 10000) to 100 + make it configurable --- defaults/yacy.init | 2567 +++++++++-------- htroot/ConfigSearchPage_p.html | 2 + htroot/ConfigSearchPage_p.java | 8 + source/net/yacy/search/Switchboard.java | 4 + .../net/yacy/search/SwitchboardConstants.java | 2 + source/net/yacy/search/query/QueryParams.java | 2 +- .../kelondro/rwi/ReferenceContainerTest.java | 2 +- 7 files changed, 1303 insertions(+), 1284 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 9af84efaf..d95c70be1 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1,1282 +1,1285 @@ -### -### YaCy Init File -### -# These properties will be loaded upon installation. -# They are used only once for set-up. -# If you make changes to this file and want these to make any effect, -# you must delete the yacy.conf file in DATA/SETTINGS - -# ---------------------------------------------------------------------------- -# port number where the server should bind to -port = 8090 - -# optinal ssl port (https port) the server should bind to -port.ssl = 8443 - -# prefix for new default peer names -peernameprefix=_anon - -# use UPnP [true/false] -upnp.enabled = true -# remote host on UPnP device (for more than one connection) -upnp.remoteHost = - -#sometimes you may want yacy to bind to another port, than the one reachable from outside. -#then set bindPort to the port yacy should bind on, and port to the port, visible from outside -#to run yacy on port 8090, reachable from port 80, set bindPort=8090, port=80 and use -#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8090 -#(of course you need to customize the ips) -bindPort = - -# SSL support: -# -# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS -# -# English speaking user read below: -# -# With this you can access your peer using https://localhost:8443 -# -# There are two possibilities to specify which certificate should -# be used by YaCy. -# -# 1) Create a new certificate: -# -# *) For testing purposes, you can create a keystore with a self-signed certificate, -# using the following command: -# C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert -# -# *) Then configure the keyStoreXXXX properties accordingly, e.g. -# keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore -# keyStorePassword = mypwd -# -# 2) Import an existing certificate: -# -# Alternatively you can import an existing certificate in pkcs12 format into -# the keystore. -# -# This can be done by setting the pkcs12XXX properties accordingly, e.g. -# pkcs12ImportFile = c:/temp/keystore.pkcs12 -# pkcs12ImportPwd = test -# -# If the property keyStore is not specified, then a new keystore file -# DATA/SETTINGS/myPeerKeystore will be created. - -keyStore=defaults/freeworldKeystore -keyStorePassword=freeworld -pkcs12ImportFile = -pkcs12ImportPwd = - -# the keyStore is only used, if server.https is set to true -# if server.https=true, then the YaCy web interface is available at -# https://localhost:/ and at http://localhost:/ -server.https=false - -# property that collects the names of all servlets that had been used so far -# that is used to track if the user has already done some configuration steps -# if the used missed configuration steps that should be done, then a help system -# is possible which leads the used based on the list of servlets that had been used -# the list distinguishes called and submitted servlets -server.servlets.called = -server.servlets.submitted = - -# server tracking: maximum time a track entry is hold in the internal cache -# value is in milliseconds, default is one hour -server.maxTrackingTime = 3600000 - -# maximum number of tracks per host -server.maxTrackingCount = 1000 - -# maximum number of hosts that are tracked -server.maxTrackingHostCount = 100 - -# maximum file sizes: since some users experience problems with too large files -# the file size of database files can be limited. Larger files can be used to get a -# better IO performance and to use less RAM; however, if the size must be limited -# because of limitations of the file system, the maximum size can be set here -filesize.max.win = 2147483647 -filesize.max.other = 8589934591 - -# Network Definition -# There can be separate YaCy networks, and managed sub-groups of the general network. -# The essentials of the network definition are attached in separate property files. -# The property here can also be a url where the definition can be loaded. -# In case of privately managed networks, this configuration must be changed BEFORE it is released -# to the members of the separated network peers. -network.unit.definition = defaults/yacy.network.freeworld.unit -#network.unit.definition = defaults/yacy.network.webportal.unit -#network.unit.definition = defaults/yacy.network.intranet.unit - -# distinguish intranet/internet IPs: -# if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the -# url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup -# to check if a host is in the internet oder an intranet can be omited. -# This option is only valid if the network.unit.domain property is set to 'any' -network.unit.domain.nocheck = false - -# in addition to non-dht networks a client may have its own agent name -# this option is only used if the value is non-empty and network.unit.dht = false -# that means it is not usable in YaCy p2p-configurations, only in private portal configurations -network.unit.tenant.agent = - -# Update process properties -# The update server location is given in the network.unit.definition, -# but the settings for update processing and cycles are individual. -# the update process can be either 'manual' (no automatic lookup for new versions), -# 'guided' (automatic lookup, but user is asked before update is performed', -# or 'auto' (whenever an update is available, the update is loaded and installed) -update.process = manual -# the cycle value applies only if the process is automatic or guided. The value means hours. -# There is currently a fixed minimum number of hours of 24 hours for updates -update.cycle = 168 -# a version number blacklist can restrict automatic or guided updates to a specific -# range of version numbers. The restriction is done with a blacklist (standard regexpr) -# It is recommended to set this list to low developer version numbers -update.blacklist = -# a update can also restricted with a concept property, which can decide if an -# update is only valid if it either is a main release or any release including new development releases -# Valid keywords are 'main' and 'any' -update.concept = any -# the following values are set automatically: -# the lookup time when the last time a lookup to the network update server(s) where done -update.time.lookup = 0 -# the download time when the last time a release was downloaded -update.time.download = 0 -# the deploy time when the last update was done; milliseconds since epoch -update.time.deploy = 0 -# delete old downloaded files after this amount of days to free disk space -# the latest release is always kept -update.deleteOld = 30 -# only install sign files -update.onlySignedFiles = 1 - -# restart-option -# a peer can be re-started periodically -# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below) -restart.process = off -# the restart.cycle is the number of hours that must pass before a restart is done -restart.cycle = 20 -# the restart.hour is a pattern that must match with the hour string (two-digit, 24h) -# when the restart should be performed -restart.hour = 03 -# the following values are set automatically -restart.time = 0 - -# clusters within a network: -# every network can have an unlimited number of clusters. Clusters may be also completely -# sealed and have no connection to other peers. When a cluster does not use the -# p2p protocol and the bootstraping mechanism to contact other peers, we call them -# Robinson peers. They can appear in different 'visibilities': -# - privatepeer: no connection and no data exchange to any other peer -# - privatecluster: connections only to self-defined addresses (other peers in same mode) -# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes -# - publicpeer: a single peer without cluster connection, but visible for p2p nodes -# all public robinson peers should use a peer tag string to be searcheable if in the -# search request these tags appear -cluster.mode=publicpeer -cluster.peers.yacydomain=localpeer.yacy -cluster.peers.ipport=localhost:8090 - -# bootstrapLoadTimeout -# this is the time-out for loading of the seedlist files during bootstraping -# If the time-out is too short, there is the danger that the peer stays in virgin mode -bootstrapLoadTimeout = 20000 - -# time-out of client control socket in milliseconds -# since this applies only to the client-proxy connection, -# it can be rather short -# milliseconds -clientTimeout = 10000 - -# maximal number of httpd sessions -# a client may open several connections at once, and the httpdMaxBusySessions value sets -# a limit on the number of concurrent connections -httpdMaxBusySessions = 200 - -# default root path for the file server -# may be overridden by the htdocs parameter -# users shall be encouraged to use the htdocs path for individual content, -# not this path defined here -htRootPath = htroot - -# the htroot path -# root path for the httpd file server -htDefaultPath=htroot - -# individual htroot folder -# every user may publicize her/his own web pages -# these pages shall be placed in the path defined here -# the htdocs path shares its content with the htroot path -htDocsPath = DATA/HTDOCS - -# the default files (typically index.html), if no file name is given -# The complete path to this file is created by combination with the rootPath -# you can set a list of defaults, separated by comma -# the first one is preferred -defaultFiles = index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt - -# locale-options: YaCy supports localization. -# Web pages for special languages are located in the htLocalePath -# The htLocaleLang defines a list of language options as / -# the must exist as sub-path to htLocalePath -# the htLocaleSelection selects from the given locales, value=one-of- -locale.source=locales -locale.translated_html=DATA/LOCALE/htroot -locale.language=default - -# virtual host for httpdFileServlet access -# for example http:/// shall access the file servlet and -# return the defaultFile at rootPath -# either way, http:/// denotes the same as http://localhost:/ -# for the preconfigured value 'localpeer', the URL is: -# http://localpeer/ -fileHost = localpeer - -# specify the path to the MIME matching file table -mimeTable = defaults/httpd.mime - -# specify the path to the sessionid name file -sessionidNamesFile = defaults/sessionid.names - -# a path to the file cache, used for the internal proxy and as crawl buffer -# This will be used if the server is addressed as a proxy -proxyCache = DATA/HTCACHE - -# the maximum disc cache size for files in Cache in megabytes -# default: 4 Gigabyte -proxyCacheSize = 4096 - -# you can use the proxy with fresh/stale rules or in a always-fresh mode -proxyAlwaysFresh = false - -# a path to the surrogate input directory -surrogates.in = DATA/SURROGATES/in - -# a path to the surrogate output directory -surrogates.out = DATA/SURROGATES/out - -# a path to the dictionaries directory -# this directory also contains subdirectories for input sources, the did-you-mean function and other -dictionaries = DATA/DICTIONARIES - -# a path to the classification directory -# each subdirectory is the name of a context (which becomes a navigator) with '.txt' files -# containing texts to teach a bayesian filter. One of the files must be named 'negative.txt'. -# The text files can be created with the Export functionality using the option "Only Text". -classification = DATA/CLASSIFICATION - -# storage place for new releases -releases = DATA/RELEASE - -# the following mime-types are a blacklist for indexing: -# parser.mime.deny: specifies mime-types that shall not be indexed -parser.mime.deny= -parser.extensions.deny= -parser.enableAudioTags=false - -# experimental single-page parser for pdf files: split one pdf into individual pages; -# the key is the property name in the post arguments that gets a page number assigned, -# page numbers start with 1 -parser.pdf.individualpages=false -parser.pdf.individualpages.key=page - -# Promotion Strings -# These strings appear in the Web Mask of the YACY search client -# Set these Strings to cusomize your peer and give any message to -# other peer users -promoteSearchPageGreeting = Web Search by the People, for the People -# if the following property is set to true, the network name is used as greeting -promoteSearchPageGreeting.useNetworkName = false -# the following attributes can be used to define a custom image, alternative text and home page on the search page -promoteSearchPageGreeting.homepage = http://yacy.net -promoteSearchPageGreeting.imageAlt = YaCy project web site -promoteSearchPageGreeting.largeImage = /env/grafics/YaCyLogo_120ppi.png -promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png - -# the path to the public reverse word index for text files (web pages) -# the primary path is relative to the data root, the secondary path is an absolute path -# when the secondary path should be equal to the primary, it must be declared empty -indexPrimaryPath=DATA/INDEX - -# the path to index archive dumps -indexArchivePath=DATA/ARCHIVE - -# the path to the LISTS files. Most lists are used to filter web content -listsPath=DATA/LISTS - -# path to additional databases, like messages, blog data and bookmarks -workPath=DATA/WORK - -# the path to the SKINS files. -skinPath=DATA/SKINS - -# the yellow-list; URL's elements -# (the core of an URL; like 'yahoo' in 'de.yahoo.com') -# appearing in this list will not get a manipulated user agent string -proxyYellowList=yacy.yellow - -# the black-list; URLs appearing in this list will not be loaded; -# instead always a 404 is returned -# all these files will be placed in the listsPath -BlackLists.Shared=url.default.black -BlackLists.DefaultList=url.default.black - -#these are not needed as default. they just keep the values from beeing deleted ... -proxy.BlackLists=url.default.black -crawler.BlackLists=url.default.black -dht.BlackLists=url.default.black -search.BlackLists=url.default.black -surftips.BlackLists=url.default.black -news.BlackLists=url.default.black - -# the blue-list; -# no search result is locally presented that has any word of the bluelist -# in the search words, the URL or the URL's description -plasmaBlueList=yacy.blue - -# this proxy may in turn again access another proxy -# if you wish to do that, specify it here -# if you want to switch on the proxy use, set remoteProxyUse=true -# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy -remoteProxyUse=false -remoteProxyUse4SSL=true - -remoteProxyHost=192.168.2.2 -remoteProxyPort=4239 -remoteProxyUser= -remoteProxyPwd= - -remoteProxyNoProxy=10\..*,127\..*,172\.(1[6-9]|2[0-9]|3[0-1])\..*,169\.254\..*,192\.168\..*,localhost,0:0:0:0:0:0:0:1 - -# the proxy may filter the content of transferred web pages -# the bluelist removes specific keywords from web pages -proxyBlueList=yacy.blue - -# security settigns -# we provide proxy and server security through a 2-stage security gate: -# 1st stage: firewall-like access control trough ip filter for clients -# 2nd stage: password settings for proxy, server and server administrators -# by default, these settings are weak to simplify set-up and testing -# every user/adiministrator shall be encouraged to change these settings -# your can change them also online during run-time on -# http://localhost:8090/ - -# proxyClient: client-ip's that may connect the proxy for proxy service -# if several ip's are allowed then they must be separated by a ',' -# regular expressions may be used -#proxyClient=192.168.0.4 -proxyClient=localhost,127\.0\.0\.1,192\.168\..*,10\..*,0:0:0:0:0:0:0:1.* - -# YaCyHop: allow public usage of proxy for yacy-protocol -# this enables usage of the internal http proxy for everyone, -# if the file path starts with /yacy/ -# This is used to enable anonymization of yacy protocol requests -# Instead of asking a remote peer directly, a peer in between is asked -# to prevent that the asked peer knows which peer asks. -YaCyHop=true - -# serverClient: client-ip's that may connect to the web server, -# thus are allowed to use the search service -# if you set this to another value, search requst from others -# are blocked, but you will also be blocked from using others -# search services. -serverClient=* - -# use_proxyAccounts: set to true to restrict proxy-access to some identified users. -#use User_p.html to create some Users. -use_proxyAccounts=true - -# adminAccount: a user:password - pair for administration of -# settings through the web interface -# should be set to a secret. By default it is without a password -# but you are encouraged to set it to another value on the page -# http://localhost:8090/ConfigAccounts_p.html -#adminAccount=admin:mysecretpassword -adminAccount= -adminAccountBase64MD5= -adminAccountUserName=admin - -# special access handling for users from localhost: -# access from localhost may be granted with administration authority -# if this flag is set. It is set to true by default to make usage of YaCy easy -# if you use YaCy on a headless server, you should set this to false -# or configure this on http://localhost:8090/ConfigAccounts_p.html -# during the first 10 minutes of operation of YaCy; -# if the admin account password is still empty after 10 minutes a random -# password is generated an access is then ONLY from localhost, which will cause -# inaccessibility for installations on headless servers. -adminAccountForLocalhost=true - -# adminAccountAllPages: if set to false, then all pages without the extension "_p" are -# accessible without authorization. Some servlets may individually decide to use or request -# administration rights. If adminAccountAllPages is set to true, then administration -# rights are needed to access all pages without any exception. Setting adminAccountAllPages -# to true therefore closes the YaCy web pages for everyone. -adminAccountAllPages=false - -# adminRealm: a internal name (like a group name) for the login setting of the admin frontend -# ATTENTION: changing this name will invalidate all currently password hashes -# - With DIGEST authentication mode is this realm name of generated password hashes -# (RFC 2617 standard and recommendation). If you want to share password configuration -# with additional machines they have to belong to the same realm -# - authentication defaults to BASIC -# - and can be configured in defaults/web.xml , tag -#adminRealm=YaCy-AdminUI -adminRealm=The YaCy access is limited to administrators. If you don't know the password, you can change it using /bin/passwd.sh - -# if you are running a principal peer, you must update the following variables -# The upload method that should be used to upload the seed-list file to -# a public accessible webserver where it can be loaded by other peers. -# -# You can set the seedUploadMethod-Property to -# - None -# - Ftp -# - File -# - Scp (only if you have installed the optional addon) -# -seedUploadMethod=none - -# This is the most common method to upload the seed-list -# -# This is an ftp account with all relevant information. -# The update is only made if there had been changes in between. -seedFTPServer= -seedFTPAccount= -seedFTPPassword= -seedFTPPath= - -# alternatively to an FTP account, a peer can also become a principal peer -# if the seed-list can be generated as a file and that file is also accessible from -# the internet. In this case, omit any ftp settings and set this path here. -# if this path stays empty, an ftp account is considered -# however, you must always set a seedURL because it is used to check if the -# file is actually accessible from the internet -seedFilePath= - -# Settings needed to upload the seed-list file via scp -# -# Please note that this upload method can only be used if you have installed -# this optional upload method. -seedScpServer= -seedScpServerPort= -seedScpAccount= -seedScpPassword= -seedScpPath= - -# every peer periodically scans for other peers. you can set the time -# of the period here (minutes) -peerCycle=2 - -# debug flags -debug.search.local.dht.off=false -debug.search.local.solr.off=false -debug.search.remote.dht.off=false -debug.search.remote.dht.testlocal=false -debug.search.remote.solr.off=false -debug.search.remote.solr.testlocal=false - -#staticIP if you have a static IP, you can use this setting -staticIP= - -# each time YaCy starts up, it can trigger the local browser to show the -# status page. This is active by default, to make it easier for first-time -# users to understand what this application does. You can disable browser -# pop-up here or set a different start page, like the search page -browserPopUpTrigger=true -browserPopUpPage=index.html - -# a forward page can be given for the index.html page -# when a user accesses the index.html page, he/she is forwarded to the page -# as given by indexForward. This is by default not defined which means 'no forward' -indexForward = - -# defines if the YaCy icon appears in the system tray on supported platforms -tray.icon.enabled=true -tray.icon.force=false -tray.icon.label=YaCy -tray.menu.enabled=true - -# index sharing attributes: by default, sharing is on. -# If you want to use YaCy only for local indexing (robinson mode), -# you may switch this off -allowDistributeIndex=true -allowDistributeIndexWhileCrawling=false -allowDistributeIndexWhileIndexing=true -allowReceiveIndex=true -allowReceiveIndex.search=true -indexReceiveBlockBlacklist=true - -# the frequency is the number of links per minute, that the peer allowes -# _every_ other peer to send to this peer -defaultWordReceiveFrequency=100 -defaultLinkReceiveFrequency=30 -# the default may be overridden for each peer individually, these -# settings are only available through the online interface - -# prefetch parameters -# the prefetch depth assigns a specific depth to the prefetch mechanism -# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all -# embedded URLs, but since embedded image links are loaded by the browser -# this means that only embedded anchors are prefetched additionally -# a prefetch of 2 would result in loading of all images and anchor pages -# of all embedded anchors. Be careful with this value, since even a prefetch -# of 2 would result in hundreds of prefetched URLs for each single proxy fill. -proxyPrefetchDepth=0 -proxyStoreHTCache=true -proxyIndexingRemote=false -proxyIndexingLocalText=true -proxyIndexingLocalMedia=true - -# proxy usage only for .yacy-Domains for autoconfig -proxyYacyOnly=false - -# enable proxy via url (/proxy.html?url=http://yacy.net) -proxyURL=false -proxyURL.access=127.0.0.1,0:0:0:0:0:0:0:1 -# which urls to rewrite to /proxy.html?url=x (values: all, domainlist) -proxyURL.rewriteURLs=domainlist -proxyURL.useforresults=false - -# Autocrawl configuration -autocrawl=false -autocrawl.index.text=true -autocrawl.index.media=true -autocrawl.ratio=50 -autocrawl.rows=100 -autocrawl.days=1 -autocrawl.query=*:* -autocrawl.deep.depth=3 -autocrawl.shallow.depth=1 - -# From the 'IndexCreate' menu point you can also define a crawling start point. -# The crawling works the same way as the prefetch, but it is possible to -# assign a different crawling depth. -# Be careful with this number. Consider a branching factor of average 20; -# A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. -crawlingDepth=3 -crawlingDirectDocByURL=true -crawlingIfOlder=-1 -crawlingDomFilterDepth=-1 -crawlingDomMaxPages=-1 -indexText=true -indexMedia=true - -# Filter for crawling; may be used to restrict a crawl to a specific domain -# URLs are only indexed and further crawled if they match this filter -crawlingFilter=.* -crawlingQ=true -followFrames=true -obeyHtmlRobotsNoindex=true -obeyHtmlRobotsNofollow=false -storeHTCache=true -storeTXCache=true - -# peers may initiate remote crawling tasks. -# every peer may allow or disallow to be used as crawling-peer; -# you can also set a maximum crawl depth that can be requested or accepted -# order=parameters for requester; response=parameters for responder -# these values apply only for senior-senior - communication -# The delay value is number of seconds bewteen two separate orders -# crawlOrder: default value for remote crawl starts -# crawlResponse: set to true if a peer should retrieve remote crawl urls from other peers -crawlOrder=true -crawlOrderDepth=0 -crawlResponse=false -crawlResponseDepth=0 - -# indexing-exclusion - rules -# There rules are important to reduce the number of words that are indexed -# We distinguish three different sets of stop-words: -# static - excludes all words given in the file yacy.stopwords from indexing, -# dynamic - excludes all words from indexing which are listed by statistic rules, -# parental - excludes all words from indexing which had been indexed in the parent web page. -xsstopw=true -xdstopw=true -xpstopw=true - -# Topwords filtering -# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords -# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version -filterOutStopwordsFromTopwords=true - -# crawling steering: must-match/must-not-match -crawlingIPMustMatch=.* -crawlingIPMustNotMatch= -# the default country codes are all codes for countries in Europe -crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU - -# collections for index data separation -# these collections can either be used to produce search tenants. -# The collection is used in the site-parameter in the GSA interface. -# Collections are assigned during crawl-time and defined in the crawl start. -# The YaCyScheme field collection_sxt must be switched on to use this field. -collection=user - -# performance-settings -# delay-times for permanent loops (milliseconds) -# the idlesleep is the pause that an proces sleeps if the last call to the -# process job was without execution of anything; -# the busysleep is the pause after a full job execution -# the prereq-value is a memory pre-requisite: that much bytes must -# be available/free in the heap; othervise the loop is not executed -# and another idlesleep is performed -20_dhtdistribution_idlesleep=30000 -20_dhtdistribution_busysleep=15000 -20_dhtdistribution_memprereq=12582912 -20_dhtdistribution_loadprereq=2.0 -30_peerping_idlesleep=30000 -30_peerping_busysleep=30000 -30_peerping_memprereq=2097152 -30_peerping_loadprereq=4.0 -40_peerseedcycle_idlesleep=1800000 -40_peerseedcycle_busysleep=1200000 -40_peerseedcycle_memprereq=4194304 -40_peerseedcycle_loadprereq=2.0 -50_localcrawl_idlesleep=2000 -50_localcrawl_busysleep=10 -50_localcrawl_memprereq=25165824 -50_localcrawl_loadprereq=6.0 -50_localcrawl_isPaused=false -55_autocrawl_idlesleep=10000 -55_autocrawl_busysleep=10000 -55_autocrawl_memprereq=25165824 -55_autocrawl_loadprereq=6.0 -60_remotecrawlloader_idlesleep=4000 -60_remotecrawlloader_busysleep=800 -60_remotecrawlloader_memprereq=12582912 -60_remotecrawlloader_loadprereq=8.0 -60_remotecrawlloader_isPaused=false -62_remotetriggeredcrawl_idlesleep=2000 -62_remotetriggeredcrawl_busysleep=200 -62_remotetriggeredcrawl_memprereq=12582912 -62_remotetriggeredcrawl_loadprereq=8.0 -62_remotetriggeredcrawl_isPaused=false -70_surrogates_idlesleep=10000 -70_surrogates_busysleep=0 -70_surrogates_memprereq=12582912 -70_surrogates_loadprereq=8.0 -720_ccimport_idlesleep=100 -720_ccimport_busysleep=1000 -720_ccimport_memprereq=1048576 -720_ccimport_loadprereq=8.0 -730_ccfilter_idlesleep=100 -730_ccfilter_busysleep=1000 -730_ccfilter_memprereq=1048576 -730_ccfilter_loadprereq=8.0 - -85_scheduler_idlesleep=60000 -85_scheduler_busysleep=60000 -85_scheduler_memprereq=1048576 -85_scheduler_loadprereq=4.0 -90_cleanup_idlesleep=300000 -90_cleanup_busysleep=300000 -90_cleanup_memprereq=0 -90_cleanup_loadprereq=16.0 - -reindexSolr_idlesleep=1000 -reindexSolr_busysleep=1 -reindexSolr_memprereq=10485760 -reindexSolr_loadprereq=9.0 - -# additional attributes: -# performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time -# is used to flush the RAM cache, which is the major part of the IO in YaCy -performanceProfile=defaults/yacy.init -performanceSpeed=100 -performanceIO=10 - -# cleanup-process: -# properties for tasks that are performed during cleanup -cleanup.deletionProcessedNews = true -cleanup.deletionPublishedNews = true -cleanup.failedSearchURLtimeout = 86400000 - - -# default memory settings for startup of yacy -# is valid in unix/shell and windows environments but -# not for first startup of YaCy - -# -Xmx and -Xms maximum/init Java heap size -# if a high performance for large search indexes is wanted, then setting the values to equal number is recommended -# if YaCy shall be nice in not-only-yacy environments, then the Xms value may be lower -javastart_Xmx=Xmx600m -javastart_Xms=Xms90m - -# YaCy is able to use RAM copies of database tables. This needs a lot of RAM. -# To switch on copying of file tables int RAM, there must be enough memory -# The memory that is available at startup time is used to switch the feature on -# The tableCachingLimit is the amount of free RAM at startup time to switch on the feature -tableCachingLimit=419430400 - -# some java versions may be limited to a specific array size -# of 134217727 entries. To prevent that tables of that size are generated, -# set this property to false -# If you want to have better performance and switch ramcopy on, try also to -# set this property to true -# this value is automatically set to true, if more than two gigabyte is available -exceed134217727=false - -# priority of the yacy-process -# is valid in unix/shell and windows environments but -# not for first startup of YaCy -# UNIX: corresponds to the nice-level -# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low -javastart_priority=10 - -# performance properties for the word index cache -# wordCacheMaxLow/High is the number of word indexes that shall be held in the -# ram cache during indexing. If you want to increase indexing speed, increase this -# value i.e. up to one million, but increase also the memory limit to a minimum of 2GB -wordCacheMaxCount = 50000 - -# Specifies if yacy can be used as transparent http proxy. -# -# Please note that you also have to reconfigure your firewall -# before you can use yacy as transparent proxy. On linux this -# can be done like this: -# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \ -# --dport 80 -j DNAT --to 192.168.0.1:8090 -# -# With this iptables filter listed above all http traffic that -# comes from your private network (in this case 192.168.0.0) -# and goes to any webserver listening on port 80 will be forwarded -# by the firewall to yacy running on port 8090 (192.168.0.1:8090) -isTransparentProxy=false - -# Specifies the timeout the proxy sould use -proxy.clientTimeout = 60000 - -# Specifies if the proxy should send the via header according to RFC -proxy.sendViaHeader=true - -# Specifies if the proxy should send the X-Forwarded-For header -proxy.sendXForwardedForHeader=true - -# Enable cookie monitoring -proxy.monitorCookies=false - -# msgForwarding: Specifies if yacy should forward received messages via -# email to the configured email address -msgForwardingEnabled=false -msgForwardingCmd=/usr/sbin/sendmail -msgForwardingTo=root@localhost - -#crawlPause: delay time after specific functions before crawling is resumed -crawlPause.proxy=10 -crawlPause.localsearch=50 -crawlPause.remotesearch=10 - -# Some configuration values for the crawler -crawler.clientTimeout=30000 - -# http crawler specific settings; size in bytes -crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 -crawler.http.acceptEncoding=gzip -crawler.http.acceptLanguage=en-us,en;q=0.5 -crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 -crawler.http.maxFileSize=10485760 -crawler.http.FollowRedirects=true -crawler.http.RecordRedirects=false - -# ftp crawler specific settings; size in bytes -crawler.ftp.maxFileSize=10485760 - -# smb crawler specific settings: maximum size -crawler.smb.maxFileSize=100000000 - -# smb crawler specific settings: maximum size -crawler.file.maxFileSize=100000000 - -# maximum number of crawler threads -crawler.MaxActiveThreads = 200 - -# maximum number of same hosts in crawler threads -crawler.MaxSameHostInQueue = 20 - -# default latency is the start value of the average of remote server response time -crawler.defaultAverageLatency = 500 - -# the latency factor is a factor that is applied to the average remote server latency. -# The result is the minimum remote server access delay time -crawler.latencyFactor = 0.5 - -# The onDemandLimit is the maximum number of crawl queues that are concurrently opened -# at the same time. If the number of hosts exceeds this number, onDemand queues are opened -# which are opened each time a queue is accessed which creates high IO load. On the other -# hand, having too many entries in onDemandLimit may exceed the maximum number of file -# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number -# defined here -crawler.onDemandLimit = 1000 - -# maximum size of indexing queue -indexer.slots = 100 - -# maximum size of stacker queue -stacker.slots = 2000 - -# search options: show advanced options on main search page -search.options = true - -# search domains. If set to false then that search is not available -search.text = true -search.image = true -search.audio = false -search.video = false -search.app = false - -# number of search results per page displayed by default -search.items = 10 - -# target for search results; this is the href target attribute inside every search result link -# possible values: -# "_blank" (new window), "_self" (same window), "_parent" (the parent frame of a frameset), -# "_top" (top of all frames), "searchresult" (a default custom page name for search results) -# a special pattern can be given for exceptions to the default target according to urls -search.target = _self -search.target.special = _self -search.target.special.pattern = - -# search result lines may show additional information for each search hit -# these information pieces may be switched on or off -search.result.show.date = true -search.result.show.size = false -search.result.show.metadata = false -search.result.show.parser = false -search.result.show.citation = true -search.result.show.pictures = false -search.result.show.cache = true -search.result.show.proxy = false -search.result.show.hostbrowser = true -search.result.show.vocabulary = false -search.result.show.vocabulary.omit = -search.result.show.snapshots = false - - -# search navigators: comma-separated list of default values for search navigation. -# can be temporary different if search string is given with differen navigation values -# assigning no value(s) means that no navigation is shown -search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language -#search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language,collections,date - -# search result verification and snippet fetch caching rules -# each search result can be verified byloading the link from the web -# this can be enhanced using a cache. In some cases it may be appropriate -# to not verify the link at all and do not compute a snippet -# the possible cases are: -# nocache: no use of web cache, load all snippets online -# iffresh: use the cache if the cache exists and is fresh otherwise load online -# ifexist: use the cache if the cache exist or load online -# cacheonly: never go online, use all content from cache. If no cache entry exist, -# consider content nevertheless as available and show result without snippet -# false: no link verification and not snippet generation: -# all search results are valid without verification -search.verify = ifexist - -search.excludehosts= -search.excludehosth= - -# in case that a link verification fails then the corresponding index reference can be -# deleted to clean up the index. If this property is set then failed index verification in -# the cases of nocache, iffresh and ifexist causes an index deletion -search.verify.delete = true - -# remote search details -remotesearch.maxcount = 10 -remotesearch.maxtime = 3000 -remotesearch.result.store=true -# Maximum size allowed (in bytes) for a remote document result to be stored to local index. Defaults to -1, which means no limit. -remotesearch.result.store.maxsize=-1 -remotesearch.maxload.rwi=8.0 -remotesearch.maxload.solr=4.0 - -# specifies if yacy should set it's own referer if no referer URL -# was set by the client. -useYacyReferer = false - -# specifies if the http post body should be transfered -# using content-encoding gzip during index transfer -# a) indexDistribution: which is done periodically if you have enabled -# Index Distribution via IndexControl_p.html -# b) indexTransfer: which can be used to transfer the whole index of a peer -# this can be started via IndexTransfer_p.html -# c) indexControl: which can be triggered manually via IndexControl_p.html to -# transfer a chosen subset of the peer index -indexDistribution.gzipBody = true -indexTransfer.gzipBody = true -indexControl.gzipBody = true - -# defining timeouts for index- transfer/distribution/control -indexControl.timeout = 60000 -indexDistribution.timeout = 60000 -indexTransfer.timeout = 120000 - -# defining max. allowed amount of open files during index- transfer/distribution -indexDistribution.maxOpenFiles = 800 -indexTransfer.maxOpenFiles = 800 - -# sizes for index distribution -indexDistribution.minChunkSize = 10 -indexDistribution.maxChunkSize = 1000 -indexDistribution.startChunkSize = 200 -indexDistribution.maxChunkFails = 1 - -# limit of references per term & blob to the younges of this value -# a value of <= 0 disables this feature (no limit) -# a value of e.g. 100000 can improve stability and reduce load while searching very popular words -index.maxReferences = 0 - -# Search sequence settings -# collection: -# time = time to get a RWI out of RAM cache, assortments and WORDS files -# count = maximum number of RWI-entries that shall be collected -# -# join: -# time = time to perform the join between all collected RWIs -# count = maximum number of entries that shall be joined -# -# presort: -# time = time to do a sort of the joined URL-records -# count = maximum number of entries that shall be pre-sorted -# -# urlfetch: -# time = time to fetch the real URLs from the LURL database -# count = maximum number of urls that shall be fetched -# -# postsort: -# time = time for final sort of URLs -# count = maximum number oof URLs that shall be retrieved during sort -# -# filter: -# time = time to filter out unwanted urls (like redundant urls) -# count = maximum number of urls that shall be filtered -# -# snippetfetch: -# time = time to fetch snippets for selected URLs -# count = maximum number of snipptes to be fetched -# -# all values are percent -# time-percent is the percent of total search time -# count-percent is the percent of total wanted urls in result -# we distinguish local and remote search times -searchProcessLocalTime_c = 44 -searchProcessLocalCount_c = 10000000 -searchProcessLocalTime_j = 8 -searchProcessLocalCount_j = 1000000 -searchProcessLocalTime_r = 8 -searchProcessLocalCount_r =100000 -searchProcessLocalTime_u = 20 -searchProcessLocalCount_u = 10000 -searchProcessLocalTime_o = 10 -searchProcessLocalCount_o = 100 -searchProcessLocalTime_f = 5 -searchProcessLocalCount_f = 100 -searchProcessLocalTime_s = 5 -searchProcessLocalCount_s = 30 - -searchProcessRemoteTime_c = 44 -searchProcessRemoteCount_c = 1000000 -searchProcessRemoteTime_j = 8 -searchProcessRemoteCount_j = 1000000 -searchProcessRemoteTime_r = 8 -searchProcessRemoteCount_r = 1000 -searchProcessRemoteTime_u = 20 -searchProcessRemoteCount_u = 1000 -searchProcessRemoteTime_o = 10 -searchProcessRemoteCount_o = 1000 -searchProcessRemoteTime_f = 5 -searchProcessRemoteCount_f = 100 -searchProcessRemoteTime_s = 5 -searchProcessRemoteCount_s = 10 - -# timeouts for snippet fetching in ms -# timeout_text is for text-snippets, timeout_media for media, e.g. images -timeout_text = 10000 -timeout_media = 15000 - -# a list of domain name patterns that should not be cached by the httpc dns cache -httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu - -#externalRedirectors -#squid Redirector compatible -externalRedirector= - -# the Yacy Version this config was created with -Version= -# old version value (keep to allow conversion of .conf, until next main releas > 1.83) -svnRevision=0 - -currentSkin=pdbootstrap - -# flag to show if pages shall be usable for non-admin users -# this can be applied to the Surftips.html and yacysearch.html page -publicSurftips = true -publicSearchpage = true - -# flag to show if the top navigation bar shall be shown to all users -# if this is disabled, then the user must navigate manually from the search page -# to /Status.html to get the main memu bar back -publicTopmenu = true - -# Wiki access rights -# the built-in wiki system allows by default only that the administrator is allowed to make changes -# this can be changed. There are three options: -# admin - only the admin has write right -# all - everybody has write right -# user - the admin and every user registered in the user db has write right -WikiAccess = admin - -# Search Profiles -# we will support different search profiles -# If this profile setting is empty, a hard-coded profile is used to initialise the values -search.ranking.rwi.profile = -# The boost fields contains all fields which shall be searched together with a boost. non-mentioned fields are not searched. - -# Boost queries are added to all queries; functions evaluate a value which is either added or multiplied with the ranking. -# The field boostfunctionmode can be either 'add' or 'multiply' to describe the mode. -# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name -# The bostfields setting is of special importance as these are the fields used to query for search terms -search.ranking.solr.collection.boostname.tmpa.0=Default Profile -search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0 -search.ranking.solr.collection.filterquery.tmpa.0= -search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 -search.ranking.solr.collection.boostfunction.tmpb.0= -search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage -search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1 -search.ranking.solr.collection.filterquery.tmpa.1= -search.ranking.solr.collection.boostquery.tmpa.1= -search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1) -search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a single domain only, i.e. if a site:-operator is used -search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0,keywords^2.0,description_txt^1.5,author^1.0 -search.ranking.solr.collection.filterquery.tmpa.2= -search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0 -search.ranking.solr.collection.boostfunction.tmpb.2= -search.ranking.solr.collection.boostname.tmpa.3=_unused3 -search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0 -search.ranking.solr.collection.filterquery.tmpa.3= -search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 -search.ranking.solr.collection.boostfunction.tmpb.3= - -# the following values are used to identify duplicate content -search.ranking.solr.doubledetection.minlength=3 -search.ranking.solr.doubledetection.quantrate=0.5f - -# Another attribute for double content is a 'greedy' ignoring of a http url is present for each https and vice versa -# The same may be true for documents with leading 'www.' subdomain and without. -# The following attributes will cause that https is preferred over http and with-www is preferred over without-www -search.ranking.uniqueheuristic.preferhttps = false -search.ranking.uniqueheuristic.preferwwwprefix = true - -#optional extern thumbnail program. -#the program must accept the invocation PROGRAM http://url /path/to/filename -thumbnailProgram = - -# settings for the peer's local robots.txt -# the following restrictions are possible (comma-separated): -# - all : entire domain is disallowed -# - blog : the blog-pages -# - bookmarks : the bookmark-page -# - dirs : all directories in htroot (standard setting, as there is no usable information in) -# - fileshare : all files in the peer's file share (DATA/HTDOCS/share) -# - homepage : all files on the peer's home page (DATA/HTDOCS/www) -# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways) -# - news : the news-page -# - network : the network-pages -# - status : peer's status page -# - surftips : the surftips-page -# - wiki : the wiki-page -httpd.robots.txt = locked,dirs,bookmarks,network,news,status,profile - -# class to use for parsing wikicode -wikiParser.class = de.anomic.data.wikiCode - -# settings for automatic deletion of old entries in passive and potential seed-db -# time means max time (in days) a peer may not have been seen before it is deleted -routing.deleteOldSeeds.permission = true -routing.deleteOldSeeds.time = 30 - -# options to remember the default search engines when using the search compare features -compare_yacy.left = YaCy -compare_yacy.right = startpage.com - -# minimum free disk space for crawling (MiB) -disk.free = 3000 -# minimum for DHT -disk.free.hardlimit = 1000 - -# ResourceObserver settings -# We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot) -# under/overshot states in the system are supposed to be regulated to match the steady-state value - -# autoregulation of resource states -# ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached! -# the autoregulate function starts workin if resources reach over/undershot values and the auto-regulation tries to regulate to the steadystate value -resource.disk.free.autoregulate=true -resource.disk.used.autoregulate=false - -# the target steady-state of minimum disk space left (MB) -resource.disk.free.min.steadystate=4096 - -# the undershot below the steady-state of minimum disk free as absolute size (MB) -resource.disk.free.min.undershot=2048 - -# the target steady-state of maximum disk space for YaCy (MB) -resource.disk.used.max.steadystate=2097152 - -# the overshot above the steady-state of disk space for YaCy (absolute) (MB) -resource.disk.used.max.overshot=4194304 - -# minimum memory to accept dht-in (MiB) -memory.acceptDHTabove = 50 -memory.disabledDHT = false - -# wether using standard memory strategy - or try generation memory strategy -memory.standardStrategy = true - -# content integration settings -content.phpbb3.urlstub = http:/// -content.phpbb3.dbtype = mysql -content.phpbb3.dbhost = localhost -content.phpbb3.dbport = 3306 -content.phpbb3.dbname = forum -content.phpbb3.tableprefix = phpbb_ -content.phpbb3.dbuser = notroot -content.phpbb3.dbpw = joshua -content.phpbb3.ppf = 1000 -content.phpbb3.dumpfile = - -# search engine teaser: an about box in search results -# this is only shown, if the about.body is filled -about.headline=Please support YaCy! -about.body=
If you run a YaCy server, feel free to replace our donation plea with your own support message, use the Portal Configuration servlet.
- -donation.iframesource=http://yacy.net/include/donate.html -donation.iframetarget=env/donate.html - -# search heuristics -heuristic.site = false -heuristic.searchresults = false -heuristic.searchresults.crawlglobal = false -heuristic.opensearch = false - -# colours for generic design -# white -color_background = #FFFFFF - -# dark blue/grey -color_text = #18294A - -# success/green -color_legend = #5cb85c - -# brand/blue -color_tableheader = #84B3DE - -# dark/light grey (for tables) -color_tableitem = #dddddd -color_tableitem2 = #eeeeee - -# light red -color_tablebottom = #F2DEDE - -color_borderline = #888888 -color_signbad = #990000 -color_signgood = #009900 -color_signother = #000099 - -# dark blue -color_searchheadline = #2145ca - -# green / success/3*2 -color_searchurl = #1c65ba -color_searchurlhover = #1c65ba - - -# federated index storage and federated search functionality -# federated search means that other search engines may be used together with the built-in indexing. -# each federated search may be able to be used as remote indexing service and/or as remote search service. -# a typical use case for a federated search is a concurrent search from opensearch sources. -# a typical use case for a remote indexing service is a remote solr index. YaCy supports remote solr indexes. - -# solr indexes can be filled if enabled is set to true -# the remote index scheme is the same as produced by the SolrCell; see http://wiki.apache.org/solr/ExtractingRequestHandler -# because this default scheme is used the default example scheme can be used as solr configuration -# to use this, do the following: -# - set federated.service.solr.indexing.enabled = true -# - download solr from http://www.apache.org/dyn/closer.cgi/lucene/solr/ -# - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar' -# - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes. -# - to check whats in solr after indexing, open http://localhost:8983/solr/admin/ -federated.service.solr.indexing.enabled = false -federated.service.solr.indexing.url = http://127.0.0.1:8983/solr -federated.service.solr.indexing.sharding = MODULO_HOST_MD5 -# the lazy attribute causes that fields containing "" or 0 are not added and not written -federated.service.solr.indexing.lazy = true -federated.service.solr.indexing.timeout = 60000 -federated.service.solr.indexing.writeEnabled = true - -# temporary definition of backend services to use. -# After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db. -# To get a handle for a migration, these values are defined as temporary, if the migration starts the values are renamed -# and defined with different default values. -# The citation service is used for ranking; this is a reverse linking index. It should be on before and after the migration. -# It can be switched off if only a remote solr index is used. -core.service.fulltext = true -core.service.rwi.tmp = true -core.service.citation.tmp = true -core.service.webgraph.tmp = false - -# Augmentation settings -parserAugmentation = false -parserAugmentation.RDFa = false - -# Content control settings -contentcontrol.enabled = false -contentcontrol.bookmarklist = contentcontrol -contentcontrol.mandatoryfilterlist = yacy -contentcontrol.smwimport.enabled = false -contentcontrol.smwimport.baseurl = -contentcontrol.smwimport.purgelistoninit = true -contentcontrol.smwimport.targetlist = contentcontrol -contentcontrol.smwimport.defaultcategory = yacy - -# host browser settings -browser.autoload = false -browser.load4everyone = false - -# greedy learning: fast information acquisition heuristic for new peers -# to make greedy learning work, it must be enabled in the network definition -# the user may switch it off at any time, but if the automatic learning limit is reached -# then the active flag is set to false automatically and this will switch to that state -# automatically by the cleanup process each time if the user switches it on again. -# While the switch in on, it will cause that the user-submitted search will be done along -# with some heuristics like: loading linked documents and adding a twitter search. -# When the learning mode is finished, the user may switch on individual heuristics by himself. -greedylearning.active = true - -# postprocessing steering -postprocessing.maximum_load = 2.5 -postprocessing.minimum_ram = 536870912 -postprocessing.partialUpdate = true - -# Custom user agents for 'allip' networks: -# This user agent is only available if the network is set to 'allip' (which is a non-limited domain 'network' -# without p2p options). Changing this will NOT change the default YaCy user agent, it will only provide an -# agent which is available at crawl start within 'allip'. The userAgent.name is the identifier for the -# robots.txt file which YaCy always obeys for the given name or a wildcard for robot types. -# If any part of this custom user agent name or string includes the phrase 'yacy', it will be IGNORED -# to prevent fraud, DoS or bad behavior in the name of YaCy. -# To use this user agent option, you must define completely different names and strings -# and remove the given example here, which will be ignored by default. -crawler.userAgent.name = yacybot -crawler.userAgent.string = yacybot ($$SYSTEM$$) http://yacy.net/bot.html -crawler.userAgent.minimumdelta = 500 -crawler.userAgent.clienttimeout = 10000 - -# experiments with timeout requests -timeoutrequests = true - -# interface decorations -decoration.audio = false -decoration.grafics.linkstructure = true -decoration.hostanalysis = false -decoration.simpleheadernavbar = navbar-default - +### +### YaCy Init File +### +# These properties will be loaded upon installation. +# They are used only once for set-up. +# If you make changes to this file and want these to make any effect, +# you must delete the yacy.conf file in DATA/SETTINGS + +# ---------------------------------------------------------------------------- +# port number where the server should bind to +port = 8090 + +# optinal ssl port (https port) the server should bind to +port.ssl = 8443 + +# prefix for new default peer names +peernameprefix=_anon + +# use UPnP [true/false] +upnp.enabled = true +# remote host on UPnP device (for more than one connection) +upnp.remoteHost = + +#sometimes you may want yacy to bind to another port, than the one reachable from outside. +#then set bindPort to the port yacy should bind on, and port to the port, visible from outside +#to run yacy on port 8090, reachable from port 80, set bindPort=8090, port=80 and use +#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8090 +#(of course you need to customize the ips) +bindPort = + +# SSL support: +# +# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS +# +# English speaking user read below: +# +# With this you can access your peer using https://localhost:8443 +# +# There are two possibilities to specify which certificate should +# be used by YaCy. +# +# 1) Create a new certificate: +# +# *) For testing purposes, you can create a keystore with a self-signed certificate, +# using the following command: +# C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert +# +# *) Then configure the keyStoreXXXX properties accordingly, e.g. +# keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore +# keyStorePassword = mypwd +# +# 2) Import an existing certificate: +# +# Alternatively you can import an existing certificate in pkcs12 format into +# the keystore. +# +# This can be done by setting the pkcs12XXX properties accordingly, e.g. +# pkcs12ImportFile = c:/temp/keystore.pkcs12 +# pkcs12ImportPwd = test +# +# If the property keyStore is not specified, then a new keystore file +# DATA/SETTINGS/myPeerKeystore will be created. + +keyStore=defaults/freeworldKeystore +keyStorePassword=freeworld +pkcs12ImportFile = +pkcs12ImportPwd = + +# the keyStore is only used, if server.https is set to true +# if server.https=true, then the YaCy web interface is available at +# https://localhost:/ and at http://localhost:/ +server.https=false + +# property that collects the names of all servlets that had been used so far +# that is used to track if the user has already done some configuration steps +# if the used missed configuration steps that should be done, then a help system +# is possible which leads the used based on the list of servlets that had been used +# the list distinguishes called and submitted servlets +server.servlets.called = +server.servlets.submitted = + +# server tracking: maximum time a track entry is hold in the internal cache +# value is in milliseconds, default is one hour +server.maxTrackingTime = 3600000 + +# maximum number of tracks per host +server.maxTrackingCount = 1000 + +# maximum number of hosts that are tracked +server.maxTrackingHostCount = 100 + +# maximum file sizes: since some users experience problems with too large files +# the file size of database files can be limited. Larger files can be used to get a +# better IO performance and to use less RAM; however, if the size must be limited +# because of limitations of the file system, the maximum size can be set here +filesize.max.win = 2147483647 +filesize.max.other = 8589934591 + +# Network Definition +# There can be separate YaCy networks, and managed sub-groups of the general network. +# The essentials of the network definition are attached in separate property files. +# The property here can also be a url where the definition can be loaded. +# In case of privately managed networks, this configuration must be changed BEFORE it is released +# to the members of the separated network peers. +network.unit.definition = defaults/yacy.network.freeworld.unit +#network.unit.definition = defaults/yacy.network.webportal.unit +#network.unit.definition = defaults/yacy.network.intranet.unit + +# distinguish intranet/internet IPs: +# if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the +# url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup +# to check if a host is in the internet oder an intranet can be omited. +# This option is only valid if the network.unit.domain property is set to 'any' +network.unit.domain.nocheck = false + +# in addition to non-dht networks a client may have its own agent name +# this option is only used if the value is non-empty and network.unit.dht = false +# that means it is not usable in YaCy p2p-configurations, only in private portal configurations +network.unit.tenant.agent = + +# Update process properties +# The update server location is given in the network.unit.definition, +# but the settings for update processing and cycles are individual. +# the update process can be either 'manual' (no automatic lookup for new versions), +# 'guided' (automatic lookup, but user is asked before update is performed', +# or 'auto' (whenever an update is available, the update is loaded and installed) +update.process = manual +# the cycle value applies only if the process is automatic or guided. The value means hours. +# There is currently a fixed minimum number of hours of 24 hours for updates +update.cycle = 168 +# a version number blacklist can restrict automatic or guided updates to a specific +# range of version numbers. The restriction is done with a blacklist (standard regexpr) +# It is recommended to set this list to low developer version numbers +update.blacklist = +# a update can also restricted with a concept property, which can decide if an +# update is only valid if it either is a main release or any release including new development releases +# Valid keywords are 'main' and 'any' +update.concept = any +# the following values are set automatically: +# the lookup time when the last time a lookup to the network update server(s) where done +update.time.lookup = 0 +# the download time when the last time a release was downloaded +update.time.download = 0 +# the deploy time when the last update was done; milliseconds since epoch +update.time.deploy = 0 +# delete old downloaded files after this amount of days to free disk space +# the latest release is always kept +update.deleteOld = 30 +# only install sign files +update.onlySignedFiles = 1 + +# restart-option +# a peer can be re-started periodically +# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below) +restart.process = off +# the restart.cycle is the number of hours that must pass before a restart is done +restart.cycle = 20 +# the restart.hour is a pattern that must match with the hour string (two-digit, 24h) +# when the restart should be performed +restart.hour = 03 +# the following values are set automatically +restart.time = 0 + +# clusters within a network: +# every network can have an unlimited number of clusters. Clusters may be also completely +# sealed and have no connection to other peers. When a cluster does not use the +# p2p protocol and the bootstraping mechanism to contact other peers, we call them +# Robinson peers. They can appear in different 'visibilities': +# - privatepeer: no connection and no data exchange to any other peer +# - privatecluster: connections only to self-defined addresses (other peers in same mode) +# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes +# - publicpeer: a single peer without cluster connection, but visible for p2p nodes +# all public robinson peers should use a peer tag string to be searcheable if in the +# search request these tags appear +cluster.mode=publicpeer +cluster.peers.yacydomain=localpeer.yacy +cluster.peers.ipport=localhost:8090 + +# bootstrapLoadTimeout +# this is the time-out for loading of the seedlist files during bootstraping +# If the time-out is too short, there is the danger that the peer stays in virgin mode +bootstrapLoadTimeout = 20000 + +# time-out of client control socket in milliseconds +# since this applies only to the client-proxy connection, +# it can be rather short +# milliseconds +clientTimeout = 10000 + +# maximal number of httpd sessions +# a client may open several connections at once, and the httpdMaxBusySessions value sets +# a limit on the number of concurrent connections +httpdMaxBusySessions = 200 + +# default root path for the file server +# may be overridden by the htdocs parameter +# users shall be encouraged to use the htdocs path for individual content, +# not this path defined here +htRootPath = htroot + +# the htroot path +# root path for the httpd file server +htDefaultPath=htroot + +# individual htroot folder +# every user may publicize her/his own web pages +# these pages shall be placed in the path defined here +# the htdocs path shares its content with the htroot path +htDocsPath = DATA/HTDOCS + +# the default files (typically index.html), if no file name is given +# The complete path to this file is created by combination with the rootPath +# you can set a list of defaults, separated by comma +# the first one is preferred +defaultFiles = index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt + +# locale-options: YaCy supports localization. +# Web pages for special languages are located in the htLocalePath +# The htLocaleLang defines a list of language options as / +# the must exist as sub-path to htLocalePath +# the htLocaleSelection selects from the given locales, value=one-of- +locale.source=locales +locale.translated_html=DATA/LOCALE/htroot +locale.language=default + +# virtual host for httpdFileServlet access +# for example http:/// shall access the file servlet and +# return the defaultFile at rootPath +# either way, http:/// denotes the same as http://localhost:/ +# for the preconfigured value 'localpeer', the URL is: +# http://localpeer/ +fileHost = localpeer + +# specify the path to the MIME matching file table +mimeTable = defaults/httpd.mime + +# specify the path to the sessionid name file +sessionidNamesFile = defaults/sessionid.names + +# a path to the file cache, used for the internal proxy and as crawl buffer +# This will be used if the server is addressed as a proxy +proxyCache = DATA/HTCACHE + +# the maximum disc cache size for files in Cache in megabytes +# default: 4 Gigabyte +proxyCacheSize = 4096 + +# you can use the proxy with fresh/stale rules or in a always-fresh mode +proxyAlwaysFresh = false + +# a path to the surrogate input directory +surrogates.in = DATA/SURROGATES/in + +# a path to the surrogate output directory +surrogates.out = DATA/SURROGATES/out + +# a path to the dictionaries directory +# this directory also contains subdirectories for input sources, the did-you-mean function and other +dictionaries = DATA/DICTIONARIES + +# a path to the classification directory +# each subdirectory is the name of a context (which becomes a navigator) with '.txt' files +# containing texts to teach a bayesian filter. One of the files must be named 'negative.txt'. +# The text files can be created with the Export functionality using the option "Only Text". +classification = DATA/CLASSIFICATION + +# storage place for new releases +releases = DATA/RELEASE + +# the following mime-types are a blacklist for indexing: +# parser.mime.deny: specifies mime-types that shall not be indexed +parser.mime.deny= +parser.extensions.deny= +parser.enableAudioTags=false + +# experimental single-page parser for pdf files: split one pdf into individual pages; +# the key is the property name in the post arguments that gets a page number assigned, +# page numbers start with 1 +parser.pdf.individualpages=false +parser.pdf.individualpages.key=page + +# Promotion Strings +# These strings appear in the Web Mask of the YACY search client +# Set these Strings to cusomize your peer and give any message to +# other peer users +promoteSearchPageGreeting = Web Search by the People, for the People +# if the following property is set to true, the network name is used as greeting +promoteSearchPageGreeting.useNetworkName = false +# the following attributes can be used to define a custom image, alternative text and home page on the search page +promoteSearchPageGreeting.homepage = http://yacy.net +promoteSearchPageGreeting.imageAlt = YaCy project web site +promoteSearchPageGreeting.largeImage = /env/grafics/YaCyLogo_120ppi.png +promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png + +# the path to the public reverse word index for text files (web pages) +# the primary path is relative to the data root, the secondary path is an absolute path +# when the secondary path should be equal to the primary, it must be declared empty +indexPrimaryPath=DATA/INDEX + +# the path to index archive dumps +indexArchivePath=DATA/ARCHIVE + +# the path to the LISTS files. Most lists are used to filter web content +listsPath=DATA/LISTS + +# path to additional databases, like messages, blog data and bookmarks +workPath=DATA/WORK + +# the path to the SKINS files. +skinPath=DATA/SKINS + +# the yellow-list; URL's elements +# (the core of an URL; like 'yahoo' in 'de.yahoo.com') +# appearing in this list will not get a manipulated user agent string +proxyYellowList=yacy.yellow + +# the black-list; URLs appearing in this list will not be loaded; +# instead always a 404 is returned +# all these files will be placed in the listsPath +BlackLists.Shared=url.default.black +BlackLists.DefaultList=url.default.black + +#these are not needed as default. they just keep the values from beeing deleted ... +proxy.BlackLists=url.default.black +crawler.BlackLists=url.default.black +dht.BlackLists=url.default.black +search.BlackLists=url.default.black +surftips.BlackLists=url.default.black +news.BlackLists=url.default.black + +# the blue-list; +# no search result is locally presented that has any word of the bluelist +# in the search words, the URL or the URL's description +plasmaBlueList=yacy.blue + +# this proxy may in turn again access another proxy +# if you wish to do that, specify it here +# if you want to switch on the proxy use, set remoteProxyUse=true +# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy +remoteProxyUse=false +remoteProxyUse4SSL=true + +remoteProxyHost=192.168.2.2 +remoteProxyPort=4239 +remoteProxyUser= +remoteProxyPwd= + +remoteProxyNoProxy=10\..*,127\..*,172\.(1[6-9]|2[0-9]|3[0-1])\..*,169\.254\..*,192\.168\..*,localhost,0:0:0:0:0:0:0:1 + +# the proxy may filter the content of transferred web pages +# the bluelist removes specific keywords from web pages +proxyBlueList=yacy.blue + +# security settigns +# we provide proxy and server security through a 2-stage security gate: +# 1st stage: firewall-like access control trough ip filter for clients +# 2nd stage: password settings for proxy, server and server administrators +# by default, these settings are weak to simplify set-up and testing +# every user/adiministrator shall be encouraged to change these settings +# your can change them also online during run-time on +# http://localhost:8090/ + +# proxyClient: client-ip's that may connect the proxy for proxy service +# if several ip's are allowed then they must be separated by a ',' +# regular expressions may be used +#proxyClient=192.168.0.4 +proxyClient=localhost,127\.0\.0\.1,192\.168\..*,10\..*,0:0:0:0:0:0:0:1.* + +# YaCyHop: allow public usage of proxy for yacy-protocol +# this enables usage of the internal http proxy for everyone, +# if the file path starts with /yacy/ +# This is used to enable anonymization of yacy protocol requests +# Instead of asking a remote peer directly, a peer in between is asked +# to prevent that the asked peer knows which peer asks. +YaCyHop=true + +# serverClient: client-ip's that may connect to the web server, +# thus are allowed to use the search service +# if you set this to another value, search requst from others +# are blocked, but you will also be blocked from using others +# search services. +serverClient=* + +# use_proxyAccounts: set to true to restrict proxy-access to some identified users. +#use User_p.html to create some Users. +use_proxyAccounts=true + +# adminAccount: a user:password - pair for administration of +# settings through the web interface +# should be set to a secret. By default it is without a password +# but you are encouraged to set it to another value on the page +# http://localhost:8090/ConfigAccounts_p.html +#adminAccount=admin:mysecretpassword +adminAccount= +adminAccountBase64MD5= +adminAccountUserName=admin + +# special access handling for users from localhost: +# access from localhost may be granted with administration authority +# if this flag is set. It is set to true by default to make usage of YaCy easy +# if you use YaCy on a headless server, you should set this to false +# or configure this on http://localhost:8090/ConfigAccounts_p.html +# during the first 10 minutes of operation of YaCy; +# if the admin account password is still empty after 10 minutes a random +# password is generated an access is then ONLY from localhost, which will cause +# inaccessibility for installations on headless servers. +adminAccountForLocalhost=true + +# adminAccountAllPages: if set to false, then all pages without the extension "_p" are +# accessible without authorization. Some servlets may individually decide to use or request +# administration rights. If adminAccountAllPages is set to true, then administration +# rights are needed to access all pages without any exception. Setting adminAccountAllPages +# to true therefore closes the YaCy web pages for everyone. +adminAccountAllPages=false + +# adminRealm: a internal name (like a group name) for the login setting of the admin frontend +# ATTENTION: changing this name will invalidate all currently password hashes +# - With DIGEST authentication mode is this realm name of generated password hashes +# (RFC 2617 standard and recommendation). If you want to share password configuration +# with additional machines they have to belong to the same realm +# - authentication defaults to BASIC +# - and can be configured in defaults/web.xml , tag +#adminRealm=YaCy-AdminUI +adminRealm=The YaCy access is limited to administrators. If you don't know the password, you can change it using /bin/passwd.sh + +# if you are running a principal peer, you must update the following variables +# The upload method that should be used to upload the seed-list file to +# a public accessible webserver where it can be loaded by other peers. +# +# You can set the seedUploadMethod-Property to +# - None +# - Ftp +# - File +# - Scp (only if you have installed the optional addon) +# +seedUploadMethod=none + +# This is the most common method to upload the seed-list +# +# This is an ftp account with all relevant information. +# The update is only made if there had been changes in between. +seedFTPServer= +seedFTPAccount= +seedFTPPassword= +seedFTPPath= + +# alternatively to an FTP account, a peer can also become a principal peer +# if the seed-list can be generated as a file and that file is also accessible from +# the internet. In this case, omit any ftp settings and set this path here. +# if this path stays empty, an ftp account is considered +# however, you must always set a seedURL because it is used to check if the +# file is actually accessible from the internet +seedFilePath= + +# Settings needed to upload the seed-list file via scp +# +# Please note that this upload method can only be used if you have installed +# this optional upload method. +seedScpServer= +seedScpServerPort= +seedScpAccount= +seedScpPassword= +seedScpPath= + +# every peer periodically scans for other peers. you can set the time +# of the period here (minutes) +peerCycle=2 + +# debug flags +debug.search.local.dht.off=false +debug.search.local.solr.off=false +debug.search.remote.dht.off=false +debug.search.remote.dht.testlocal=false +debug.search.remote.solr.off=false +debug.search.remote.solr.testlocal=false + +#staticIP if you have a static IP, you can use this setting +staticIP= + +# each time YaCy starts up, it can trigger the local browser to show the +# status page. This is active by default, to make it easier for first-time +# users to understand what this application does. You can disable browser +# pop-up here or set a different start page, like the search page +browserPopUpTrigger=true +browserPopUpPage=index.html + +# a forward page can be given for the index.html page +# when a user accesses the index.html page, he/she is forwarded to the page +# as given by indexForward. This is by default not defined which means 'no forward' +indexForward = + +# defines if the YaCy icon appears in the system tray on supported platforms +tray.icon.enabled=true +tray.icon.force=false +tray.icon.label=YaCy +tray.menu.enabled=true + +# index sharing attributes: by default, sharing is on. +# If you want to use YaCy only for local indexing (robinson mode), +# you may switch this off +allowDistributeIndex=true +allowDistributeIndexWhileCrawling=false +allowDistributeIndexWhileIndexing=true +allowReceiveIndex=true +allowReceiveIndex.search=true +indexReceiveBlockBlacklist=true + +# the frequency is the number of links per minute, that the peer allowes +# _every_ other peer to send to this peer +defaultWordReceiveFrequency=100 +defaultLinkReceiveFrequency=30 +# the default may be overridden for each peer individually, these +# settings are only available through the online interface + +# prefetch parameters +# the prefetch depth assigns a specific depth to the prefetch mechanism +# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all +# embedded URLs, but since embedded image links are loaded by the browser +# this means that only embedded anchors are prefetched additionally +# a prefetch of 2 would result in loading of all images and anchor pages +# of all embedded anchors. Be careful with this value, since even a prefetch +# of 2 would result in hundreds of prefetched URLs for each single proxy fill. +proxyPrefetchDepth=0 +proxyStoreHTCache=true +proxyIndexingRemote=false +proxyIndexingLocalText=true +proxyIndexingLocalMedia=true + +# proxy usage only for .yacy-Domains for autoconfig +proxyYacyOnly=false + +# enable proxy via url (/proxy.html?url=http://yacy.net) +proxyURL=false +proxyURL.access=127.0.0.1,0:0:0:0:0:0:0:1 +# which urls to rewrite to /proxy.html?url=x (values: all, domainlist) +proxyURL.rewriteURLs=domainlist +proxyURL.useforresults=false + +# Autocrawl configuration +autocrawl=false +autocrawl.index.text=true +autocrawl.index.media=true +autocrawl.ratio=50 +autocrawl.rows=100 +autocrawl.days=1 +autocrawl.query=*:* +autocrawl.deep.depth=3 +autocrawl.shallow.depth=1 + +# From the 'IndexCreate' menu point you can also define a crawling start point. +# The crawling works the same way as the prefetch, but it is possible to +# assign a different crawling depth. +# Be careful with this number. Consider a branching factor of average 20; +# A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. +crawlingDepth=3 +crawlingDirectDocByURL=true +crawlingIfOlder=-1 +crawlingDomFilterDepth=-1 +crawlingDomMaxPages=-1 +indexText=true +indexMedia=true + +# Filter for crawling; may be used to restrict a crawl to a specific domain +# URLs are only indexed and further crawled if they match this filter +crawlingFilter=.* +crawlingQ=true +followFrames=true +obeyHtmlRobotsNoindex=true +obeyHtmlRobotsNofollow=false +storeHTCache=true +storeTXCache=true + +# peers may initiate remote crawling tasks. +# every peer may allow or disallow to be used as crawling-peer; +# you can also set a maximum crawl depth that can be requested or accepted +# order=parameters for requester; response=parameters for responder +# these values apply only for senior-senior - communication +# The delay value is number of seconds bewteen two separate orders +# crawlOrder: default value for remote crawl starts +# crawlResponse: set to true if a peer should retrieve remote crawl urls from other peers +crawlOrder=true +crawlOrderDepth=0 +crawlResponse=false +crawlResponseDepth=0 + +# indexing-exclusion - rules +# There rules are important to reduce the number of words that are indexed +# We distinguish three different sets of stop-words: +# static - excludes all words given in the file yacy.stopwords from indexing, +# dynamic - excludes all words from indexing which are listed by statistic rules, +# parental - excludes all words from indexing which had been indexed in the parent web page. +xsstopw=true +xdstopw=true +xpstopw=true + +# Topwords filtering +# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords +# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version +filterOutStopwordsFromTopwords=true + +# crawling steering: must-match/must-not-match +crawlingIPMustMatch=.* +crawlingIPMustNotMatch= +# the default country codes are all codes for countries in Europe +crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU + +# collections for index data separation +# these collections can either be used to produce search tenants. +# The collection is used in the site-parameter in the GSA interface. +# Collections are assigned during crawl-time and defined in the crawl start. +# The YaCyScheme field collection_sxt must be switched on to use this field. +collection=user + +# performance-settings +# delay-times for permanent loops (milliseconds) +# the idlesleep is the pause that an proces sleeps if the last call to the +# process job was without execution of anything; +# the busysleep is the pause after a full job execution +# the prereq-value is a memory pre-requisite: that much bytes must +# be available/free in the heap; othervise the loop is not executed +# and another idlesleep is performed +20_dhtdistribution_idlesleep=30000 +20_dhtdistribution_busysleep=15000 +20_dhtdistribution_memprereq=12582912 +20_dhtdistribution_loadprereq=2.0 +30_peerping_idlesleep=30000 +30_peerping_busysleep=30000 +30_peerping_memprereq=2097152 +30_peerping_loadprereq=4.0 +40_peerseedcycle_idlesleep=1800000 +40_peerseedcycle_busysleep=1200000 +40_peerseedcycle_memprereq=4194304 +40_peerseedcycle_loadprereq=2.0 +50_localcrawl_idlesleep=2000 +50_localcrawl_busysleep=10 +50_localcrawl_memprereq=25165824 +50_localcrawl_loadprereq=6.0 +50_localcrawl_isPaused=false +55_autocrawl_idlesleep=10000 +55_autocrawl_busysleep=10000 +55_autocrawl_memprereq=25165824 +55_autocrawl_loadprereq=6.0 +60_remotecrawlloader_idlesleep=4000 +60_remotecrawlloader_busysleep=800 +60_remotecrawlloader_memprereq=12582912 +60_remotecrawlloader_loadprereq=8.0 +60_remotecrawlloader_isPaused=false +62_remotetriggeredcrawl_idlesleep=2000 +62_remotetriggeredcrawl_busysleep=200 +62_remotetriggeredcrawl_memprereq=12582912 +62_remotetriggeredcrawl_loadprereq=8.0 +62_remotetriggeredcrawl_isPaused=false +70_surrogates_idlesleep=10000 +70_surrogates_busysleep=0 +70_surrogates_memprereq=12582912 +70_surrogates_loadprereq=8.0 +720_ccimport_idlesleep=100 +720_ccimport_busysleep=1000 +720_ccimport_memprereq=1048576 +720_ccimport_loadprereq=8.0 +730_ccfilter_idlesleep=100 +730_ccfilter_busysleep=1000 +730_ccfilter_memprereq=1048576 +730_ccfilter_loadprereq=8.0 + +85_scheduler_idlesleep=60000 +85_scheduler_busysleep=60000 +85_scheduler_memprereq=1048576 +85_scheduler_loadprereq=4.0 +90_cleanup_idlesleep=300000 +90_cleanup_busysleep=300000 +90_cleanup_memprereq=0 +90_cleanup_loadprereq=16.0 + +reindexSolr_idlesleep=1000 +reindexSolr_busysleep=1 +reindexSolr_memprereq=10485760 +reindexSolr_loadprereq=9.0 + +# additional attributes: +# performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time +# is used to flush the RAM cache, which is the major part of the IO in YaCy +performanceProfile=defaults/yacy.init +performanceSpeed=100 +performanceIO=10 + +# cleanup-process: +# properties for tasks that are performed during cleanup +cleanup.deletionProcessedNews = true +cleanup.deletionPublishedNews = true +cleanup.failedSearchURLtimeout = 86400000 + + +# default memory settings for startup of yacy +# is valid in unix/shell and windows environments but +# not for first startup of YaCy + +# -Xmx and -Xms maximum/init Java heap size +# if a high performance for large search indexes is wanted, then setting the values to equal number is recommended +# if YaCy shall be nice in not-only-yacy environments, then the Xms value may be lower +javastart_Xmx=Xmx600m +javastart_Xms=Xms90m + +# YaCy is able to use RAM copies of database tables. This needs a lot of RAM. +# To switch on copying of file tables int RAM, there must be enough memory +# The memory that is available at startup time is used to switch the feature on +# The tableCachingLimit is the amount of free RAM at startup time to switch on the feature +tableCachingLimit=419430400 + +# some java versions may be limited to a specific array size +# of 134217727 entries. To prevent that tables of that size are generated, +# set this property to false +# If you want to have better performance and switch ramcopy on, try also to +# set this property to true +# this value is automatically set to true, if more than two gigabyte is available +exceed134217727=false + +# priority of the yacy-process +# is valid in unix/shell and windows environments but +# not for first startup of YaCy +# UNIX: corresponds to the nice-level +# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low +javastart_priority=10 + +# performance properties for the word index cache +# wordCacheMaxLow/High is the number of word indexes that shall be held in the +# ram cache during indexing. If you want to increase indexing speed, increase this +# value i.e. up to one million, but increase also the memory limit to a minimum of 2GB +wordCacheMaxCount = 50000 + +# Specifies if yacy can be used as transparent http proxy. +# +# Please note that you also have to reconfigure your firewall +# before you can use yacy as transparent proxy. On linux this +# can be done like this: +# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \ +# --dport 80 -j DNAT --to 192.168.0.1:8090 +# +# With this iptables filter listed above all http traffic that +# comes from your private network (in this case 192.168.0.0) +# and goes to any webserver listening on port 80 will be forwarded +# by the firewall to yacy running on port 8090 (192.168.0.1:8090) +isTransparentProxy=false + +# Specifies the timeout the proxy sould use +proxy.clientTimeout = 60000 + +# Specifies if the proxy should send the via header according to RFC +proxy.sendViaHeader=true + +# Specifies if the proxy should send the X-Forwarded-For header +proxy.sendXForwardedForHeader=true + +# Enable cookie monitoring +proxy.monitorCookies=false + +# msgForwarding: Specifies if yacy should forward received messages via +# email to the configured email address +msgForwardingEnabled=false +msgForwardingCmd=/usr/sbin/sendmail +msgForwardingTo=root@localhost + +#crawlPause: delay time after specific functions before crawling is resumed +crawlPause.proxy=10 +crawlPause.localsearch=50 +crawlPause.remotesearch=10 + +# Some configuration values for the crawler +crawler.clientTimeout=30000 + +# http crawler specific settings; size in bytes +crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 +crawler.http.acceptEncoding=gzip +crawler.http.acceptLanguage=en-us,en;q=0.5 +crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 +crawler.http.maxFileSize=10485760 +crawler.http.FollowRedirects=true +crawler.http.RecordRedirects=false + +# ftp crawler specific settings; size in bytes +crawler.ftp.maxFileSize=10485760 + +# smb crawler specific settings: maximum size +crawler.smb.maxFileSize=100000000 + +# smb crawler specific settings: maximum size +crawler.file.maxFileSize=100000000 + +# maximum number of crawler threads +crawler.MaxActiveThreads = 200 + +# maximum number of same hosts in crawler threads +crawler.MaxSameHostInQueue = 20 + +# default latency is the start value of the average of remote server response time +crawler.defaultAverageLatency = 500 + +# the latency factor is a factor that is applied to the average remote server latency. +# The result is the minimum remote server access delay time +crawler.latencyFactor = 0.5 + +# The onDemandLimit is the maximum number of crawl queues that are concurrently opened +# at the same time. If the number of hosts exceeds this number, onDemand queues are opened +# which are opened each time a queue is accessed which creates high IO load. On the other +# hand, having too many entries in onDemandLimit may exceed the maximum number of file +# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number +# defined here +crawler.onDemandLimit = 1000 + +# maximum size of indexing queue +indexer.slots = 100 + +# maximum size of stacker queue +stacker.slots = 2000 + +# search options: show advanced options on main search page +search.options = true + +# search domains. If set to false then that search is not available +search.text = true +search.image = true +search.audio = false +search.video = false +search.app = false + +# number of search results per page displayed by default +search.items = 10 + +# target for search results; this is the href target attribute inside every search result link +# possible values: +# "_blank" (new window), "_self" (same window), "_parent" (the parent frame of a frameset), +# "_top" (top of all frames), "searchresult" (a default custom page name for search results) +# a special pattern can be given for exceptions to the default target according to urls +search.target = _self +search.target.special = _self +search.target.special.pattern = + +# search result lines may show additional information for each search hit +# these information pieces may be switched on or off +search.result.show.date = true +search.result.show.size = false +search.result.show.metadata = false +search.result.show.parser = false +search.result.show.citation = true +search.result.show.pictures = false +search.result.show.cache = true +search.result.show.proxy = false +search.result.show.hostbrowser = true +search.result.show.vocabulary = false +search.result.show.vocabulary.omit = +search.result.show.snapshots = false + + +# search navigators: comma-separated list of default values for search navigation. +# can be temporary different if search string is given with differen navigation values +# assigning no value(s) means that no navigation is shown +search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language +#search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language,collections,date + +# max number of items displayed in search navigators +search.navigation.maxcount=100 + +# search result verification and snippet fetch caching rules +# each search result can be verified byloading the link from the web +# this can be enhanced using a cache. In some cases it may be appropriate +# to not verify the link at all and do not compute a snippet +# the possible cases are: +# nocache: no use of web cache, load all snippets online +# iffresh: use the cache if the cache exists and is fresh otherwise load online +# ifexist: use the cache if the cache exist or load online +# cacheonly: never go online, use all content from cache. If no cache entry exist, +# consider content nevertheless as available and show result without snippet +# false: no link verification and not snippet generation: +# all search results are valid without verification +search.verify = ifexist + +search.excludehosts= +search.excludehosth= + +# in case that a link verification fails then the corresponding index reference can be +# deleted to clean up the index. If this property is set then failed index verification in +# the cases of nocache, iffresh and ifexist causes an index deletion +search.verify.delete = true + +# remote search details +remotesearch.maxcount = 10 +remotesearch.maxtime = 3000 +remotesearch.result.store=true +# Maximum size allowed (in bytes) for a remote document result to be stored to local index. Defaults to -1, which means no limit. +remotesearch.result.store.maxsize=-1 +remotesearch.maxload.rwi=8.0 +remotesearch.maxload.solr=4.0 + +# specifies if yacy should set it's own referer if no referer URL +# was set by the client. +useYacyReferer = false + +# specifies if the http post body should be transfered +# using content-encoding gzip during index transfer +# a) indexDistribution: which is done periodically if you have enabled +# Index Distribution via IndexControl_p.html +# b) indexTransfer: which can be used to transfer the whole index of a peer +# this can be started via IndexTransfer_p.html +# c) indexControl: which can be triggered manually via IndexControl_p.html to +# transfer a chosen subset of the peer index +indexDistribution.gzipBody = true +indexTransfer.gzipBody = true +indexControl.gzipBody = true + +# defining timeouts for index- transfer/distribution/control +indexControl.timeout = 60000 +indexDistribution.timeout = 60000 +indexTransfer.timeout = 120000 + +# defining max. allowed amount of open files during index- transfer/distribution +indexDistribution.maxOpenFiles = 800 +indexTransfer.maxOpenFiles = 800 + +# sizes for index distribution +indexDistribution.minChunkSize = 10 +indexDistribution.maxChunkSize = 1000 +indexDistribution.startChunkSize = 200 +indexDistribution.maxChunkFails = 1 + +# limit of references per term & blob to the younges of this value +# a value of <= 0 disables this feature (no limit) +# a value of e.g. 100000 can improve stability and reduce load while searching very popular words +index.maxReferences = 0 + +# Search sequence settings +# collection: +# time = time to get a RWI out of RAM cache, assortments and WORDS files +# count = maximum number of RWI-entries that shall be collected +# +# join: +# time = time to perform the join between all collected RWIs +# count = maximum number of entries that shall be joined +# +# presort: +# time = time to do a sort of the joined URL-records +# count = maximum number of entries that shall be pre-sorted +# +# urlfetch: +# time = time to fetch the real URLs from the LURL database +# count = maximum number of urls that shall be fetched +# +# postsort: +# time = time for final sort of URLs +# count = maximum number oof URLs that shall be retrieved during sort +# +# filter: +# time = time to filter out unwanted urls (like redundant urls) +# count = maximum number of urls that shall be filtered +# +# snippetfetch: +# time = time to fetch snippets for selected URLs +# count = maximum number of snipptes to be fetched +# +# all values are percent +# time-percent is the percent of total search time +# count-percent is the percent of total wanted urls in result +# we distinguish local and remote search times +searchProcessLocalTime_c = 44 +searchProcessLocalCount_c = 10000000 +searchProcessLocalTime_j = 8 +searchProcessLocalCount_j = 1000000 +searchProcessLocalTime_r = 8 +searchProcessLocalCount_r =100000 +searchProcessLocalTime_u = 20 +searchProcessLocalCount_u = 10000 +searchProcessLocalTime_o = 10 +searchProcessLocalCount_o = 100 +searchProcessLocalTime_f = 5 +searchProcessLocalCount_f = 100 +searchProcessLocalTime_s = 5 +searchProcessLocalCount_s = 30 + +searchProcessRemoteTime_c = 44 +searchProcessRemoteCount_c = 1000000 +searchProcessRemoteTime_j = 8 +searchProcessRemoteCount_j = 1000000 +searchProcessRemoteTime_r = 8 +searchProcessRemoteCount_r = 1000 +searchProcessRemoteTime_u = 20 +searchProcessRemoteCount_u = 1000 +searchProcessRemoteTime_o = 10 +searchProcessRemoteCount_o = 1000 +searchProcessRemoteTime_f = 5 +searchProcessRemoteCount_f = 100 +searchProcessRemoteTime_s = 5 +searchProcessRemoteCount_s = 10 + +# timeouts for snippet fetching in ms +# timeout_text is for text-snippets, timeout_media for media, e.g. images +timeout_text = 10000 +timeout_media = 15000 + +# a list of domain name patterns that should not be cached by the httpc dns cache +httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu + +#externalRedirectors +#squid Redirector compatible +externalRedirector= + +# the Yacy Version this config was created with +Version= +# old version value (keep to allow conversion of .conf, until next main releas > 1.83) +svnRevision=0 + +currentSkin=pdbootstrap + +# flag to show if pages shall be usable for non-admin users +# this can be applied to the Surftips.html and yacysearch.html page +publicSurftips = true +publicSearchpage = true + +# flag to show if the top navigation bar shall be shown to all users +# if this is disabled, then the user must navigate manually from the search page +# to /Status.html to get the main memu bar back +publicTopmenu = true + +# Wiki access rights +# the built-in wiki system allows by default only that the administrator is allowed to make changes +# this can be changed. There are three options: +# admin - only the admin has write right +# all - everybody has write right +# user - the admin and every user registered in the user db has write right +WikiAccess = admin + +# Search Profiles +# we will support different search profiles +# If this profile setting is empty, a hard-coded profile is used to initialise the values +search.ranking.rwi.profile = +# The boost fields contains all fields which shall be searched together with a boost. non-mentioned fields are not searched. + +# Boost queries are added to all queries; functions evaluate a value which is either added or multiplied with the ranking. +# The field boostfunctionmode can be either 'add' or 'multiply' to describe the mode. +# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name +# The bostfields setting is of special importance as these are the fields used to query for search terms +search.ranking.solr.collection.boostname.tmpa.0=Default Profile +search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0 +search.ranking.solr.collection.filterquery.tmpa.0= +search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 +search.ranking.solr.collection.boostfunction.tmpb.0= +search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage +search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1 +search.ranking.solr.collection.filterquery.tmpa.1= +search.ranking.solr.collection.boostquery.tmpa.1= +search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1) +search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a single domain only, i.e. if a site:-operator is used +search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0,keywords^2.0,description_txt^1.5,author^1.0 +search.ranking.solr.collection.filterquery.tmpa.2= +search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0 +search.ranking.solr.collection.boostfunction.tmpb.2= +search.ranking.solr.collection.boostname.tmpa.3=_unused3 +search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0 +search.ranking.solr.collection.filterquery.tmpa.3= +search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 +search.ranking.solr.collection.boostfunction.tmpb.3= + +# the following values are used to identify duplicate content +search.ranking.solr.doubledetection.minlength=3 +search.ranking.solr.doubledetection.quantrate=0.5f + +# Another attribute for double content is a 'greedy' ignoring of a http url is present for each https and vice versa +# The same may be true for documents with leading 'www.' subdomain and without. +# The following attributes will cause that https is preferred over http and with-www is preferred over without-www +search.ranking.uniqueheuristic.preferhttps = false +search.ranking.uniqueheuristic.preferwwwprefix = true + +#optional extern thumbnail program. +#the program must accept the invocation PROGRAM http://url /path/to/filename +thumbnailProgram = + +# settings for the peer's local robots.txt +# the following restrictions are possible (comma-separated): +# - all : entire domain is disallowed +# - blog : the blog-pages +# - bookmarks : the bookmark-page +# - dirs : all directories in htroot (standard setting, as there is no usable information in) +# - fileshare : all files in the peer's file share (DATA/HTDOCS/share) +# - homepage : all files on the peer's home page (DATA/HTDOCS/www) +# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways) +# - news : the news-page +# - network : the network-pages +# - status : peer's status page +# - surftips : the surftips-page +# - wiki : the wiki-page +httpd.robots.txt = locked,dirs,bookmarks,network,news,status,profile + +# class to use for parsing wikicode +wikiParser.class = de.anomic.data.wikiCode + +# settings for automatic deletion of old entries in passive and potential seed-db +# time means max time (in days) a peer may not have been seen before it is deleted +routing.deleteOldSeeds.permission = true +routing.deleteOldSeeds.time = 30 + +# options to remember the default search engines when using the search compare features +compare_yacy.left = YaCy +compare_yacy.right = startpage.com + +# minimum free disk space for crawling (MiB) +disk.free = 3000 +# minimum for DHT +disk.free.hardlimit = 1000 + +# ResourceObserver settings +# We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot) +# under/overshot states in the system are supposed to be regulated to match the steady-state value + +# autoregulation of resource states +# ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached! +# the autoregulate function starts workin if resources reach over/undershot values and the auto-regulation tries to regulate to the steadystate value +resource.disk.free.autoregulate=true +resource.disk.used.autoregulate=false + +# the target steady-state of minimum disk space left (MB) +resource.disk.free.min.steadystate=4096 + +# the undershot below the steady-state of minimum disk free as absolute size (MB) +resource.disk.free.min.undershot=2048 + +# the target steady-state of maximum disk space for YaCy (MB) +resource.disk.used.max.steadystate=2097152 + +# the overshot above the steady-state of disk space for YaCy (absolute) (MB) +resource.disk.used.max.overshot=4194304 + +# minimum memory to accept dht-in (MiB) +memory.acceptDHTabove = 50 +memory.disabledDHT = false + +# wether using standard memory strategy - or try generation memory strategy +memory.standardStrategy = true + +# content integration settings +content.phpbb3.urlstub = http:/// +content.phpbb3.dbtype = mysql +content.phpbb3.dbhost = localhost +content.phpbb3.dbport = 3306 +content.phpbb3.dbname = forum +content.phpbb3.tableprefix = phpbb_ +content.phpbb3.dbuser = notroot +content.phpbb3.dbpw = joshua +content.phpbb3.ppf = 1000 +content.phpbb3.dumpfile = + +# search engine teaser: an about box in search results +# this is only shown, if the about.body is filled +about.headline=Please support YaCy! +about.body=
If you run a YaCy server, feel free to replace our donation plea with your own support message, use the Portal Configuration servlet.
+ +donation.iframesource=http://yacy.net/include/donate.html +donation.iframetarget=env/donate.html + +# search heuristics +heuristic.site = false +heuristic.searchresults = false +heuristic.searchresults.crawlglobal = false +heuristic.opensearch = false + +# colours for generic design +# white +color_background = #FFFFFF + +# dark blue/grey +color_text = #18294A + +# success/green +color_legend = #5cb85c + +# brand/blue +color_tableheader = #84B3DE + +# dark/light grey (for tables) +color_tableitem = #dddddd +color_tableitem2 = #eeeeee + +# light red +color_tablebottom = #F2DEDE + +color_borderline = #888888 +color_signbad = #990000 +color_signgood = #009900 +color_signother = #000099 + +# dark blue +color_searchheadline = #2145ca + +# green / success/3*2 +color_searchurl = #1c65ba +color_searchurlhover = #1c65ba + + +# federated index storage and federated search functionality +# federated search means that other search engines may be used together with the built-in indexing. +# each federated search may be able to be used as remote indexing service and/or as remote search service. +# a typical use case for a federated search is a concurrent search from opensearch sources. +# a typical use case for a remote indexing service is a remote solr index. YaCy supports remote solr indexes. + +# solr indexes can be filled if enabled is set to true +# the remote index scheme is the same as produced by the SolrCell; see http://wiki.apache.org/solr/ExtractingRequestHandler +# because this default scheme is used the default example scheme can be used as solr configuration +# to use this, do the following: +# - set federated.service.solr.indexing.enabled = true +# - download solr from http://www.apache.org/dyn/closer.cgi/lucene/solr/ +# - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar' +# - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes. +# - to check whats in solr after indexing, open http://localhost:8983/solr/admin/ +federated.service.solr.indexing.enabled = false +federated.service.solr.indexing.url = http://127.0.0.1:8983/solr +federated.service.solr.indexing.sharding = MODULO_HOST_MD5 +# the lazy attribute causes that fields containing "" or 0 are not added and not written +federated.service.solr.indexing.lazy = true +federated.service.solr.indexing.timeout = 60000 +federated.service.solr.indexing.writeEnabled = true + +# temporary definition of backend services to use. +# After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db. +# To get a handle for a migration, these values are defined as temporary, if the migration starts the values are renamed +# and defined with different default values. +# The citation service is used for ranking; this is a reverse linking index. It should be on before and after the migration. +# It can be switched off if only a remote solr index is used. +core.service.fulltext = true +core.service.rwi.tmp = true +core.service.citation.tmp = true +core.service.webgraph.tmp = false + +# Augmentation settings +parserAugmentation = false +parserAugmentation.RDFa = false + +# Content control settings +contentcontrol.enabled = false +contentcontrol.bookmarklist = contentcontrol +contentcontrol.mandatoryfilterlist = yacy +contentcontrol.smwimport.enabled = false +contentcontrol.smwimport.baseurl = +contentcontrol.smwimport.purgelistoninit = true +contentcontrol.smwimport.targetlist = contentcontrol +contentcontrol.smwimport.defaultcategory = yacy + +# host browser settings +browser.autoload = false +browser.load4everyone = false + +# greedy learning: fast information acquisition heuristic for new peers +# to make greedy learning work, it must be enabled in the network definition +# the user may switch it off at any time, but if the automatic learning limit is reached +# then the active flag is set to false automatically and this will switch to that state +# automatically by the cleanup process each time if the user switches it on again. +# While the switch in on, it will cause that the user-submitted search will be done along +# with some heuristics like: loading linked documents and adding a twitter search. +# When the learning mode is finished, the user may switch on individual heuristics by himself. +greedylearning.active = true + +# postprocessing steering +postprocessing.maximum_load = 2.5 +postprocessing.minimum_ram = 536870912 +postprocessing.partialUpdate = true + +# Custom user agents for 'allip' networks: +# This user agent is only available if the network is set to 'allip' (which is a non-limited domain 'network' +# without p2p options). Changing this will NOT change the default YaCy user agent, it will only provide an +# agent which is available at crawl start within 'allip'. The userAgent.name is the identifier for the +# robots.txt file which YaCy always obeys for the given name or a wildcard for robot types. +# If any part of this custom user agent name or string includes the phrase 'yacy', it will be IGNORED +# to prevent fraud, DoS or bad behavior in the name of YaCy. +# To use this user agent option, you must define completely different names and strings +# and remove the given example here, which will be ignored by default. +crawler.userAgent.name = yacybot +crawler.userAgent.string = yacybot ($$SYSTEM$$) http://yacy.net/bot.html +crawler.userAgent.minimumdelta = 500 +crawler.userAgent.clienttimeout = 10000 + +# experiments with timeout requests +timeoutrequests = true + +# interface decorations +decoration.audio = false +decoration.grafics.linkstructure = true +decoration.hostanalysis = false +decoration.simpleheadernavbar = navbar-default + diff --git a/htroot/ConfigSearchPage_p.html b/htroot/ConfigSearchPage_p.html index 366f370b1..0d55289ca 100644 --- a/htroot/ConfigSearchPage_p.html +++ b/htroot/ConfigSearchPage_p.html @@ -158,6 +158,8 @@ + + diff --git a/htroot/ConfigSearchPage_p.java b/htroot/ConfigSearchPage_p.java index 33e75ac32..e54173cca 100644 --- a/htroot/ConfigSearchPage_p.java +++ b/htroot/ConfigSearchPage_p.java @@ -37,6 +37,7 @@ import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.WorkTables; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.query.QueryParams; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -93,6 +94,12 @@ public class ConfigSearchPage_p { if (post.getBoolean("search.navigation.date")) nav += "date,"; if (nav.endsWith(",")) nav = nav.substring(0, nav.length() - 1); sb.setConfig("search.navigation", nav); + // maxcount default + int navmaxcnt = post.getInt("search.navigation.maxcount", QueryParams.FACETS_STANDARD_MAXCOUNT); + if (navmaxcnt > 5) { + sb.setConfig(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, navmaxcnt); + if (navmaxcnt != QueryParams.FACETS_STANDARD_MAXCOUNT) QueryParams.FACETS_STANDARD_MAXCOUNT = navmaxcnt; + } } if (post.containsKey("searchpage_default")) { // load defaults from defaults/yacy.init file @@ -174,6 +181,7 @@ public class ConfigSearchPage_p { prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0); prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0); prop.put("search.navigation.date", sb.getConfig("search.navigation", "").indexOf("date",0) >= 0 ? 1 : 0); + prop.put("search.navigation.maxcount", sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, QueryParams.FACETS_STANDARD_MAXCOUNT)); prop.put("about.headline", sb.getConfig("about.headline", "About")); prop.put("about.body", sb.getConfig("about.body", "")); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index fe9989f12..dafd120c9 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -200,6 +200,7 @@ import net.yacy.search.index.Fulltext; import net.yacy.search.index.Segment; import net.yacy.search.index.Segment.ReferenceReportCache; import net.yacy.search.query.AccessTracker; +import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.RankingProfile; @@ -1164,6 +1165,9 @@ public final class Switchboard extends serverSwitch { this.trail = new LinkedBlockingQueue(); + // set configurable ui defaults + QueryParams.FACETS_STANDARD_MAXCOUNT = sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, QueryParams.FACETS_STANDARD_MAXCOUNT); // max number of navigator/facet lines + this.log.config("Finished Switchboard Initialization"); } diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 02f92caac..b0a9be1a6 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -491,6 +491,8 @@ public final class SwitchboardConstants { public static final String SEARCH_VERIFY = "search.verify"; public static final String SEARCH_VERIFY_DELETE = "search.verify.delete"; + public static final String SEARCH_NAVIGATION_MAXCOUNT = "search.navigation.maxcount"; // max lines displayed in standard search navigators/facets + /** * ranking+evaluation */ diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 9efbb8dcf..089fce57f 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -80,7 +80,7 @@ import org.apache.solr.util.DateFormatUtil; public final class QueryParams { - public static int FACETS_STANDARD_MAXCOUNT = 10000; + public static int FACETS_STANDARD_MAXCOUNT = 100; // max count of item lines in navigator public static int FACETS_DATE_MAXCOUNT = 640; public enum Searchdom { diff --git a/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java index 995fd016f..646b55f6e 100644 --- a/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java +++ b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java @@ -89,7 +89,7 @@ public class ReferenceContainerTest { assertNotNull("getReference failed", wc); - // TODO: ReferenceContainer used for rwi results. As it distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix + // TODO: ReferenceContainer used for rwi results. As distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix System.out.println("-----------------------------------------------------------"); System.out.println("WordReference (word distance) before add to container: " + wentry.distance()); System.out.println("WordReference (word distance) after get from container: " + wc.distance());