yacy_search_server/yacy.init

###
### YaCy Init File
###
# These properties will be loaded upon installation.
# They are used only once for set-up.
# If you make changes to this file and want these to make any effect,
# you must delete the httpProxy.conf file in DATA/SETTINGS

# ----------------------------------------------------------------------------
# the HTTP service configurations

# port number where the server should bind to
# e.g. 8080
#      #eth0:8080
# 	   192.168.0.1:8080
port = 8080

#sometimes you may want yacy to bind to another port, than the one reachable from outside.
#then set bindPort to the port yacy should bind on, and port to the port, visible from outside
#to run yacy on port 8080, reachable from port 80, set bindPort=8080, port=80 and use
#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8080
#(of course you need to customize the ips)
bindPort =

# SSL support:
#
# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS
#
# English speaking user read below:
#
# With this you can access your peer using https://localhost:8080
#
# There are two possibilities to specify which certificate should
# be used by YaCy.
#
# 1) Create a new certificate:
#
#    *) For testing purposes, you can create a keystore with a self-signed certificate,
#       using the following command:
#       C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert
#
#    *) Then configure the keyStoreXXXX properties accordingly, e.g.
#       keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore
#       keyStorePassword = mypwd
#
# 2) Import an existing certificate:
#
#    Alternatively you can import an existing certificate in pkcs12 format into
#    the keystore.
#
#    This can be done by setting the pkcs12XXX properties accordingly, e.g.
#    pkcs12ImportFile = c:/temp/keystore.pkcs12
#    pkcs12ImportPwd = test
#
#    If the property keyStore is not specified, then a new keystore file
#    DATA/SETTINGS/myPeerKeystore will be created.

keyStore =
keyStorePassword =
pkcs12ImportFile =
pkcs12ImportPwd =

# peer-to-peer construction for distributed search
# we have several stages:
# 1st: a file within every distribution that has a list of URLs:
#      -> this is the superseed file
# 2nd: the files that can be retrieved by the superseeds' URLs
#      are called seed list-files.
#      -> the seed list-files contain IP/port combinations of running
#         AnomicHTTPProxies
# 3rd: the peers that are targeted within the seed files are called superpeers
# 4th: the superpeers hold and share a list of all client/search/crawl peers
#
# some superpeers should be able to create again seed list-files.
# These superpeers must upload their IP or their list of peer-IP's to a
# ftp location to create the seed list-file.
# Everyone who do so should mail his/her new seed location to mc<at>anomic.de
# The seed list-file location will then be included in the superseed file.
# This superseed file is available then at two localtions:
# - it is included in every distribution and
# - updated through a specific URL-location
# we see the file name and the URL of the superseed here:
superseedFile=superseed.txt
superseedLocation=http://www.yacy.net/superseed.txt

# network definition
# we distiguish local and global networks. Each network type can have different user groups
# groups can be uncontrolled, moderated or controlled
# Each group has a group master, but in case the group is uncontrolled the only task of the
# group master is to provide a bootstrap file which contains peer addresses of peers that
# are member of the group. All attributes are given in a single network description string
# of the form:
# network = <group-name>:<network-name>:<network-domain>:{'uncontrolled'|'moderated'|'controlled'}:<bootstrap-uri>
# in case that the network is the uncontrolled global dht-managed community for everybody,
# the details defaults to:
# network = all:world:global:uncontrolled:http://yacy.net/
# the network-uri must have a sub-path yacy/seed.txt containing a list of urls pointing to the
# peer-address of peers within the group of that network
# several network definition strings can be listed

# clusters within a network:
# every network can have an unlimited number of clusters. Clusters may be also completely
# sealed and have no connection to other peers. When a cluster does not use the
# p2p protocol and the bootstraping mechanism to contact other peers, we call them
# Robinson peers. They can appear in different 'visibilities':
# - privatepeer: no connection and no data exchange to any other peer
# - privatecluster: connections only to self-defined addresses (other peers in same mode)
# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes
# - publicpeer: a single peer without cluster connection, but visible for p2p nodes
# all public robinson peers should use a peer tag string to be searcheable if in the
# search request these tags appear
cluster.mode=publicpeer
cluster.peers.yacydomain=localpeer.yacy
cluster.peers.ipport=localhost:8080

# bootstrapLoadTimeout
# this is the time-out for loading of the seedlist files during bootstraping
# the time should not be too long, since loading of the seedlist is not parallelized
# and a not successful loading of a seed file may prevent a peer from becoming
# a (at least) junior status. If the time-out is too short, there is the danger
# that the peer stays in virgin mode
bootstrapLoadTimeout = 6000

# time-out of client control socket in milliseconds
# since this applies only to the client-proxy connection,
# it can be rather short
# milliseconds
clientTimeout = 10000

# maximal number of httpd sessions
# a client may open several connections at one, and the httpdMaxActiveSessions value sets
# a limit on the number of concurrent connections
httpdMaxActiveSessions = 150
httpdMaxIdleSessions = 75
httpdMinIdleSessions = 5

# default root path for the file server
# may be overridden by the htdocs parameter
# users shall be encouraged to use the htdocs path for individual content,
# not this path defined here
htRootPath = htroot
htTemplatePath = htroot/env/templates

# individual htroot folder
# every user may publicise her/his own web pages
# these pages shall be placed in the path defined here
# the htdocs path shares its content with the htroot path
htDocsPath = DATA/HTDOCS

# the default files (typically index.html), if no file name is given
# The complete path to this file is created by combination with the rootPath
# you can set a list of defaults, separated by comma
# the first one is priorized
defaultFiles = ConfigBasic.html,index.html,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt

# locale-options: YaCy supports localization.
# Web pages for special languages are located in the htLocalePath
# The htLocaleLang defines a list of language options as <dir>/<named-language>
# the <dir> must exist as sub-path to htLocalePath
# the htLocaleSelection selects from the given locales, value=one-of-<dir>
htDefaultPath=htroot
htLocalePath=DATA/HTROOT/locale
htLocaleLang=default/English,de/Deutsch,fr/Fran&ccedil;ais,nl/Nederlands,it/Italiano,es/Espa&ntilde;ol,pt/Portug&ecirc;s,fi/Suomi,se/Svenska,dk/Dansk,gr/E&lambda;&lambda;&eta;v&iota;&kappa;&alpha;,sk/Slovensky
htLocaleSelection=default

# virtual host for httpdFileServlet access
# for example http://<fileHost>/ shall access the file servlet and
# return the defaultFile at rootPath
# either way, http://<fileHost>/ denotes the same as http://localhost:<port>/
# for the preconfigured value 'localpeer', the URL is:
# http://localpeer/
fileHost = localpeer

# specify the path to the MIME matching file table
mimeConfig = httpd.mime

# a path to the file cache, used for the internal proxy and as crawl buffer
# This will be used if the server is addressed as a proxy
proxyCache = DATA/HTCACHE

# the maximum disc cache size for files in proxyCache in megabytes
proxyCacheSize = 200

# use the mostly direct mapping of URLs to Filenames
# makes it easy watching the content of the cache using file browsers
# problems arise when a file already exists where a new entry expects a directory
# or vice versa.
# when set to false, the file names are set to the hash of the URL and the
# directory is build from protokoll, hostname and port, as with the old
# layout.
# the advantage of this scheme is that no directory/file collisions can
# occurr.
# switching this flag will take effect after a restart of yacy.
# files that are present under the previously used layout will be renamed
# to the new location and thus be accessible immediately. so an accumulated
# cache is still usable after the switch.
# possible values are {tree, hash}
proxyCacheLayout = hash

# the migration flag shows, if the different layout shall be migrated from one to another
proxyCacheMigration = true

# the following mime-types are the whitelist for indexing
#
# parseableRealtimeMimeTypes: specifies mime-types that can be indexed on the fly
# parseableMime: specifies mime-types that can be indexed but not on the fly
parseableRealtimeMimeTypes=application/xhtml+xml,text/html,text/plain,text/sgml
parseableMimeTypes=
parseableMimeTypes.CRAWLER=
parseableMimeTypes.PROXY=
parseableMimeTypes.ICAP=
parseableMimeTypes.URLREDIRECTOR=

# media extension string
# a comma-separated list of extensions that denote media file formats
# this is important to recognize <a href> - tags as not-html reference
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp

# Promotion Strings
# These strings appear in the Web Mask of the YACY search client
# Set these Strings to cusomize your peer and give any message to
# other peer users
promoteSearchPageGreeting =

# the path to the PLASMA database of the web spider
dbPath=DATA/PLASMADB

# the path to the public reverse word index for text files (web pages)
# the primary path is relative to the data root, the secondary path is an absolute path
# when the secondary path should be equal to the primary, it must be declared empty
indexPrimaryPath=DATA/INDEX
indexSecondaryPath=

# the path to the LISTS files. Most lists are used to filter web content
listsPath=DATA/LISTS

# the path to the SKINS files.
skinPath=DATA/SKINS

# the yellow-list; URL's elements
# (the core of an URL; like 'yahoo' in 'de.yahoo.com')
# appearing in this list will not get a manipulated user agent string
proxyYellowList=yacy.yellow

# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
BlackLists.Shared=url.default.black
BlackLists.DefaultList=url.default.black

#these are not needed as default. they just keep the values from beeing deleted ...
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
surftips.BlackLists=url.default.black


proxyCookieBlackList=cookie.default.black
proxyCookieWhiteList=cookie.default.black

# the blue-list;
# no search result is locally presented that has any word of the bluelist
# in the search words, the URL or the URL's description
plasmaBlueList=yacy.blue

# this proxy may in turn again access another proxy
# if you wish to do that, specify it here
# if you want to switch on the proxy use, set remoteProxyUse=true
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
remoteProxyUse=false
remoteProxyUse4Yacy=true
remoteProxyUse4SSL=true

remoteProxyHost=192.168.2.2
remoteProxyPort=4239
remoteProxyUser=
remoteProxyPwd=

remoteProxyNoProxy=192.*,10.*,127.*,localhost

# the proxy may filter the content of transferred web pages
# the bluelist removes specific keywords from web pages
proxyBlueList=yacy.blue

# security settigns
# we provide proxy and server security through a 2-stage security gate:
# 1st stage: firewall-like access control trough ip filter for clients
# 2nd stage: password settings for proxy, server and server administrators
# by default, these settings are weak to simplify set-up and testing
# every user/adiministrator shall be encouraged to change these settings
# your can change them also online during run-time on
# http://localhost:8080/

# proxyClient: client-ip's that may connect the proxy for proxy service
# if several ip's are allowed then they must be separated by a ','
# any ip may contain the wildcard-sign '*'
#proxyClient=192.168.0.4
proxyClient=localhost,127.0.0.1,192.168.*,10.*

# YaCyHop: allow public usage of proxy for yacy-protocol
# this enables usage of the internal http proxy for everyone,
# if the file path starts with /yacy/
# This is used to enable anonymization of yacy protocol requests
# Instead of asking a remote peer directly, a peer in between is asked
# to prevent that the asked peer knows which peer asks.
YaCyHop=true

# serverClient: client-ip's that may connect to the web server,
# thus are allowed to use the search service
# if you set this to another value, search requst from others
# are blocked, but you will also be blocked from using others
# search services.
serverClient=*

# use_proxyAccounts: set to true to restrict proxy-access to some identified users.
#use User_p.html to create some Users.
use_proxyAccounts=false

# serverAccount: a user:password - pair for web server access
# this is the access to the 'public' pages on the server
# should be always open, but you get the option here
# if set to a user:password, you get a conflict with the administration account
# future versions will check if the server is unprotected,
# because the p2p-index-sharing function will use the http server for
# data exchange.
# example
#serverAccount=admin:mysecretpassword
serverAccount=
serverAccountBase64MD5=

# adminAccount: a user:password - pair for administration of
# settings through the web interface
# should be set to a secret. By default it is without a password
# but you are encouraged to set it to another value on the page
# http://localhost:8080/
#adminAccount=admin:mysecretpassword
adminAccount=
adminAccountBase64MD5=

# if you are running a principal peer, you must update the following variables
# The upload method that should be used to upload the seed-list file to
# a public accessible webserver where it can be loaded by other peers.
#
# You can set the seedUploadMethod-Property to
# - None
# - Ftp
# - File
# - Scp (only if you have installed the optional addon)
#
seedUploadMethod=none

# The URL to the seed list file
seedURL=

# This is the most common method to upload the seed-list
#
# This is an ftp account with all relevant information.
# The update is only made if there had been changes in between.
seedFTPServer=
seedFTPAccount=
seedFTPPassword=
seedFTPPath=

# alternatively to an FTP account, a peer can also become a principal peer
# if the seed-list can be generated as a file and that file is also accessible from
# the internet. In this case, omit any ftp settings and set this path here.
# if this path stays empty, an ftp account is considered
# however, you must always set a seedURL because it is used to check if the
# file is actually accessible from the internet
seedFilePath=

# Settings needed to upload the seed-list file via scp
#
# Please note that this upload method can only be used if you have installed
# this optional upload method.
seedScpServer=
seedScpServerPort=
seedScpAccount=
seedScpPassword=
seedScpPath=

# every peer should have a name. inded, we try to give every peer an unique ID,
# which is necessary for internal organization of the index sharing, but the
# peer's name is purely informal. No function but information is applied.
# please change this at your pleasure
peerName=anomic

# every peer periodically scans for other peers. you can set the time
# of the period here (minutes)
peerCycle=2

# The p2p maintenance can run in either of two online modes:
# - don't process jobs and only access available in cache -> mode 0
# - process any job only if we are online, which is technically only the case
#   if the proxy is used -> mode 1
# - process jobs periodically, with periodes according to peerCycle -> mode 2
onlineMode=2

# Debug mode for YACY network: this will trigger that also local ip's are
# accepted as peer addresses
yacyDebugMode=false

#staticIP if you have a static IP, you can use this setting
staticIP=

# each time the proxy starts up, it can trigger the local browser to show the
# status page. This is active by default, to make it easier for first-time
# users to understand what this application does. You can disable browser
# pop-up here or set a different start page, like the search page
# the browser type is optional and works only under certain conditions
#browserPopUpTrigger=false
browserPopUpTrigger=true
#browserPopUpPage=index.html
browserPopUpPage=Status.html
browserPopUpApplication=netscape

# the proxy saves it's own seed information. It is positive for the network if
# the seed does not change it's configuration often (or not at all).
# The reason for that is that the seed hash is the target for the
# distributed hash table distribution function.
# The following file will contain the saved seed:
yacyOwnSeedFile=DATA/YACYDB/mySeed.txt
yacyDB=DATA/YACYDB

# index sharing attributes: by default, sharing is on.
# If you want to use YaCy only for local indexing (robinson mode),
# you may switch this off
allowDistributeIndex=true
allowDistributeIndexWhileCrawling=false
allowReceiveIndex=true
allowUnlimitedReceiveIndexFrom=
indexReceiveBlockBlacklist=true

# the frequency is the number of links per minute, that the peer allowes
# _every_ other peer to send to this peer
defaultWordReceiveFrequency=100
defaultLinkReceiveFrequency=30
# the default may be overridden for each peer individually, these
# settings are only available through the online interface

# prefetch parameters
# the prefetch depth assigns a specific depth to the prefetch mechanism
# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
# embedded URLs, but since embedded image links are loaded by the browser
# this means that only embedded anchors are prefetched additionally
# a prefetch of 2 would result in loading of all images and anchor pages
# of all embedded anchors. Be careful with this value, since even a prefetch
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
proxyPrefetchDepth=0
proxyStoreHTCache=true
proxyIndexingRemote=false
proxyIndexingLocalText=true
proxyIndexingLocalMedia=true

# From the 'IndexCreate' menu point you can also define a crawling start point.
# The crawling works the same way as the prefetch, but it is possible to
# assign a different crawling depth.
# Be careful with this number. Consider a branching factor of average 20;
# A prefect-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW.
crawlingDepth=2
crawlingIfOlder=525600
crawlingDomFilterDepth=-1
crawlingDomMaxPages=-1
indexText=true
indexMedia=true

# Filter for crawlinig; may be used to restrict a crawl to a specific domain
# URLs are only indexed and further crawled if they match this filter
crawlingFilter=.*
crawlingQ=false
storeHTCache=false
storeTXCache=true

# peers may initiate remote crawling tasks.
# every peer may allow or disallow to be used as crawling-peer;
# you can also set a maximum crawl depth that can be requested or accepted
# order=parameters for requester; response=parameters for responder
# these values apply only for senior-senior - communication
# The delay value is number of seconds bewteen two separate orders
crawlOrder=true
crawlOrderDepth=0
crawlOrderDelay=8
crawlResponse=true
crawlResponseDepth=0

# indexing-exclusion - rules
# There rules are important to reduce the number of words that are indexed
# We distinguish three different sets of stop-words:
# static   - excludes all words given in the file yacy.stopwords from indexing,
# dynamic  - excludes all words from indexing which are listed by statistic rules,
# parental - excludes all words from indexing which had been indexed in the parent web page.
xsstopw=true
xdstopw=true
xpstopw=true

# Topwords filtering
# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords
# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version
filterOutStopwordsFromTopwords=true

# performance-settings
# delay-times for permanent loops (milliseconds)
# the idlesleep is the pause that an proces sleeps if the last call to the
# process job was without execution of anything;
# the busysleep is the pause after a full job execution
# the prereq-value is a memory pre-requisite: that much bytes must
# be available/free in the heap; othervise the loop is not executed
# and another idlesleep is performed
20_dhtdistribution_idlesleep=30000
20_dhtdistribution_busysleep=10000
20_dhtdistribution_memprereq=4194304
30_peerping_idlesleep=120000
30_peerping_busysleep=120000
30_peerping_memprereq=1048576
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=2097152
50_localcrawl_idlesleep=2000
50_localcrawl_busysleep=250
50_localcrawl_memprereq=4194304
50_localcrawl_isPaused=false
61_globalcrawltrigger_idlesleep=10000
61_globalcrawltrigger_busysleep=500
61_globalcrawltrigger_memprereq=2097152
61_globalcrawltrigger_isPaused=false
62_remotetriggeredcrawl_idlesleep=10000
62_remotetriggeredcrawl_busysleep=1000
62_remotetriggeredcrawl_memprereq=4194304
62_remotetriggeredcrawl_isPaused=false
70_cachemanager_idlesleep=1000
70_cachemanager_busysleep=0
70_cachemanager_memprereq=1048576
80_indexing_idlesleep=1000
80_indexing_busysleep=125
80_indexing_memprereq=6291456
82_crawlstack_idlesleep=5000
82_crawlstack_busysleep=10
82_crawlstack_memprereq=1048576
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000
90_cleanup_memprereq=0

# multiprocessor-settings
# you may want to run time-consuming processes on several processors
# the most time-consuming process is the indexing-Process
# We implemented an option to run several of these processes here
# setting the number of processes to Zero is not allowed
# If you have a double-processor system,
# a cluster value of '2' would be appropriate
80_indexing_cluster=1

# ram cache for database files

# ram cache for collection index
ramCacheRWI_time     =    30000

# ram cache for responseHeader.db
ramCacheHTTP_time    =     1000

# ram cache for urlHash.db
ramCacheLURL_time    =    10000

# ram cache for stack crawl thread db
ramCachePreNURL_time =     3000

# ram cache for urlNotice.db
ramCacheNURL_time    =     2000

# ram cache for urlErr.db
ramCacheEURL_time    =     1000

# ram cache for seedDBs
ramCacheDHT_time     =     1000

# ram cache for message.db
ramCacheMessage_time =      500

# ram cache for wiki.db
ramCacheWiki_time    =      500

# ram cache for blog.db
ramCacheBlog_time    =      500

# ram cache for news1.db
ramCacheNews_time    =     1000

# ram cache for robotsTxt.db
ramCacheRobots_time  =        0

# ram cache for crawlProfile.db
ramCacheProfiles_time=      500

# default memory settings for startup of yacy
# is valid in unix/shell and windows environments but
# not for first startup of YaCy

# -Xmx<size> and -Xms<size> maximum/init Java heap size
# both values should be equal,
# othervise the YaCy-internal memory supervision does not work
javastart_Xmx=Xmx96m
javastart_Xms=Xms96m

# priority of the yacy-process
# is valid in unix/shell and windows environments but
# not for first startup of YaCy
# UNIX: corresponds to the nice-level
# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low
javastart_priority=0

# performance properties for the word index cache
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes.
wordCacheMaxCount = 20000
wordCacheInitCount = 30000
wordFlushSize = 500;

# Specifies if yacy can be used as transparent http proxy.
#
# Please note that you also have to reconfigure your firewall
# before you can use yacy as transparent proxy. On linux this
# can be done like this:
#   iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \
#   --dport 80 -j DNAT --to 192.168.0.1:8080
#
# With this iptables filter listed above all http traffic that
# comes from your private network (in this case 192.168.0.0)
# and goes to any webserver listening on port 80 will be forwarded
# by the firewall to yacy running on port 8080 (192.168.0.1:8080)
isTransparentProxy=false

# Specifies if yacy should use the http connection keep-alive feature
connectionKeepAliveSupport=true

# Specifies the timeout the proxy sould use
proxy.clientTimeout = 30000

# Specifies if the proxy should send the via header according to RFC
proxy.sendViaHeader=true

# Specifies if the proxy should send the X-Forwarded-For header
proxy.sendXForwardedForHeader=true

# Configuration options needed to configure server port forwarding
portForwarding.Enabled=false
portForwarding.Type=none

# port forwarding via sch
portForwarding.sch=de.anomic.server.portForwarding.sch.serverPortForwardingSch
portForwarding.sch.UseProxy=false
portForwarding.sch.Port=
portForwarding.sch.Host=
portForwarding.sch.HostPort=22
portForwarding.sch.HostUser=
portForwarding.sch.HostPwd=

# port forwarding via upnp
portForwarding.upnp=de.anomic.server.portForwarding.upnp.serverPortForwardingUpnp

# msgForwarding: Specifies if yacy should forward received messages via
# email to the configured email address
msgForwardingEnabled=false
msgForwardingCmd=/usr/sbin/sendmail
msgForwardingTo=root@localhost

#onlineCautionDelay: delay time after proxy usage before crawling is resumed
onlineCautionDelay=10000

# Some configuration values for the crawler
crawler.clientTimeout=9000

# http crawler specific settings
crawler.http.acceptEncoding=gzip
crawler.http.acceptLanguage=en-us,en;q=0.5
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
crawler.http.maxFileSize=262144

# ftp crawler specific settings
crawler.ftp.maxFileSize=262144

# maximum number of crawler threads
crawler.MaxActiveThreads = 30
crawler.MaxIdleThreads = 5

# maximum number of crawl-stacker threads
stacker.MaxActiveThreads = 50
stacker.MaxIdleThreads = 10
stacker.MinIdleThreads = 5

# maximum size of indexing queue
indexer.slots = 60

# specifies if yacy should set it's own referer if no referer URL
# was set by the client.
useYacyReferer = true

# allow only 443(https-port) for https-proxy?
# if you want to tunnel other protokols, set to false
secureHttps = true

# specifies if the httpdFileHandler should cache
# the template-files from the htroot directory
enableTemplateCache = true

# specifies if the http post body should be transfered
# using content-encoding gzip during index transfer
# a) indexDistribution: which is done periodically if you have enabled
#    Index Distribution via IndexControl_p.html
# b) indexTransfer: which can be used to transfer the whole index of a peer
#    this can be started via IndexTransfer_p.html
# c) indexControl: which can be triggered manually via IndexControl_p.html to
#    transfer a chosen subset of the peer index
indexDistribution.gzipBody = true
indexTransfer.gzipBody = true
indexControl.gzipBody = true

# defining timeouts for index- transfer/distribution/control
indexControl.timeout = 60000
indexDistribution.timeout = 60000
indexTransfer.timeout = 120000

# defining max. allowed amount of open files during index- transfer/distribution
indexDistribution.maxOpenFiles = 800
indexTransfer.maxOpenFiles = 800

# sizes for index distribution
indexDistribution.minChunkSize = 5
indexDistribution.maxChunkSize = 1000
indexDistribution.startChunkSize = 50

indexDistribution.maxChunkFails = 1

# defines if the peer should reject incoming index transfer
# request if a given limit is reached
indexDistribution.transferRWIReceiptLimitEnabled = true

# defines the word cache for DHT reaches this level
# the peer reports itself as busy
indexDistribution.dhtReceiptLimit = 1000

# Distribution of Citation-Reference (CR-) files
# The distribution is done in two steps:
# first step to anonymize the records
# second step to forward to collecting peer
# to anonymize the data even against the intermediate peer
# a specific precentage is also sent again to other peers.
# for key-numbers please see de.anomic.plasma.plasmaRankingDistribution
CRDistOn       = true
CRDist0Path    = GLOBAL/010_owncr
CRDist0Method  = 1
CRDist0Percent = 0
CRDist0Target  =
CRDist1Path    = GLOBAL/014_othercr
CRDist1Method  = 9
CRDist1Percent = 30
CRDist1Target  = kaskelix.de:8080,yacy.dyndns.org:8000,suma-lab.de:8080

# Hash of the peer, you would like to store to the data your installation collected.
storagePeerHash =

#use /env/page.html (and if it exists page.class) as super template,
# which includes the generated page in #[page]#.
#This option is for developers only, because it may be integrated by default, or not,
#but its not useful to mix configurations with and without this supertemplate.
usePageTemplate = false

# Search sequence settings
# collection:
# time = time to get a RWI out of RAM cache, assortments and WORDS files
# count = maximum number of RWI-entries that shall be collected
#
# join:
# time = time to perform the join between all collected RWIs
# count = maximum number of entries that shall be joined
#
# presort:
# time = time to do a sort of the joined URL-records
# count = maximum number of entries that shall be pre-sorted
#
# urlfetch:
# time = time to fetch the real URLs from the LURL database
# count = maximum number of urls that shall be fetched
#
# postsort:
# time = time for final sort of URLs
# count = maximum number oof URLs that shall be retrieved during sort
#
# filter:
# time = time to filter out unwanted urls (like redundant urls)
# count = maximum number of urls that shall be filtered
#
# snippetfetch:
# time = time to fetch snippets for selected URLs
# count = maximum number of snipptes to be fetched
#
# all values are percent
# time-percent is the percent of total search time
# count-percent is the percent of total wanted urls in result
# we distinguish local and remote search times
searchProcessLocalTime_c = 44
searchProcessLocalCount_c = 10000000
searchProcessLocalTime_j = 8
searchProcessLocalCount_j = 1000000
searchProcessLocalTime_r = 8
searchProcessLocalCount_r =100000
searchProcessLocalTime_u = 20
searchProcessLocalCount_u = 10000
searchProcessLocalTime_o = 10
searchProcessLocalCount_o = 100
searchProcessLocalTime_f = 5
searchProcessLocalCount_f = 100
searchProcessLocalTime_s = 5
searchProcessLocalCount_s = 30

searchProcessRemoteTime_c = 44
searchProcessRemoteCount_c = 1000000
searchProcessRemoteTime_j = 8
searchProcessRemoteCount_j = 1000000
searchProcessRemoteTime_r = 8
searchProcessRemoteCount_r = 1000
searchProcessRemoteTime_u = 20
searchProcessRemoteCount_u = 1000
searchProcessRemoteTime_o = 10
searchProcessRemoteCount_o = 1000
searchProcessRemoteTime_f = 5
searchProcessRemoteCount_f = 100
searchProcessRemoteTime_s = 5
searchProcessRemoteCount_s = 10

# timeouts for snippet fetching in ms
# timeout_text is for text-snippets, timeout_media for media, e.g. images
timeout_text = 10000
timeout_media = 15000

# path to ranking directory containing ranking reference files
rankingPath = DATA/RANKING

# a list of domain name patterns that should not be cached by the httpc dns cache
httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu

#externalRedirectors
#squid Redirector compatible
externalRedirector=

svnRevision=0

currentSkin=

# temporary flag for new database structure. set only true for testing
# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
# table-types: RAM = 0, TREE = 1, FLEX = 2;
tableTypeForPreNURL=0

# flag to show if pages shall be usable for non-admin users
# this can be applied to the Surftips.html and yacysearch.html page
publicSurftips = true
publicSearchpage = true

# a Java Properties file containig a list of SOAP services that should deployed
# additionally to the default services. E.g.
#    soap.serviceDeploymentList = DATA/SETTINGS/myServices.properties
#
# One entry in the property file should must have the following format:
#    servicename=fullClassname
#
# e.g.
#    test=org.myservices.test
#
# Servicename is the name that shoud be used to access the service, e.g.
# if the service name is "test" then the service can be reached using
#    http://localhost:8080/soap/test
#
# The WSDL document that belongs to the deployed service could be reached
# using
#    http://localhost:8080/soap/test?wsdl
#
soap.serviceDeploymentList =


# Wiki access rights
# the built-in wiki system allows by default only that the administrator is allowed to make changes
# this can be changed. There are three options:
# admin - only the admin has write right
# all   - everybody has write right
# user  - the admin and every user registered in the user db has write right
WikiAccess = admin

# Search Profiles
# we will support different search profiles
# this is currently only a single default profile
# If this profile setting is empty, a hard-coded profile from plasmaSearchRanking is used
rankingProfile =

#optional extern thumbnail program.
#the program must accept the invocation PROGRAM http://url /path/to/filename
thumbnailProgram =

# settings for the peer's local robots.txt
# the following restrictions are possible (comma-separated):
# - all       : entire domain is disallowed
# - blog      : the blog-pages
# - bookmarks : the bookmark-page
# - dirs      : all directories in htroot (standard setting, as there is no usable information in)
# - fileshare : all files in the peer's file share (DATA/HTDOCS/share)
# - homepage  : all files on the peer's home page (DATA/HTDOCS/www)
# - locked    : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways)
# - news      : the news-page
# - network   : the network-pages
# - status    : peer's status page
# - surftips  : the surftips-page
# - wiki      : the wiki-page
httpd.robots.txt = locked,dirs

# class to use for parsing wikicode
wikiParser.class = de.anomic.data.wikiCode