|
|
|
###
|
|
|
|
### YACY Init File
|
|
|
|
###
|
|
|
|
# These properties will be loaded upon installation.
|
|
|
|
# They are used only once for set-up.
|
|
|
|
# If you make changes to this file and want these to make any effect,
|
|
|
|
# you must delete the httpProxy.conf file in DATA/SETTINGS
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
# the http service configurations
|
|
|
|
|
|
|
|
# port number where the server should bind to
|
|
|
|
# e.g. 8080
|
|
|
|
# #eth0:8080
|
|
|
|
# 192.168.0.1:8080
|
|
|
|
port = 8080
|
|
|
|
|
|
|
|
# time-out of client control socket in milliseconds
|
|
|
|
# since this applies only to the client-proxy connection,
|
|
|
|
# it can be rather short
|
|
|
|
# milliseconds
|
|
|
|
clientTimeout = 30000
|
|
|
|
|
|
|
|
# maximal number of httpd sessions
|
|
|
|
# a client may open several connections at one, and the httpdMaxActiveSessions value sets
|
|
|
|
# a limit on the number of concurrent connections
|
|
|
|
httpdMaxActiveSessions = 150
|
|
|
|
httpdMaxIdleSessions = 75
|
|
|
|
httpdMinIdleSessions = 5
|
|
|
|
|
|
|
|
# default root path for the file server
|
|
|
|
# may be overridden by the htdocs parameter
|
|
|
|
# users shall be encouraged to use the htdocs path for individual content,
|
|
|
|
# not this path defined here
|
|
|
|
htRootPath = htroot
|
|
|
|
htTemplatePath = htroot/env/templates
|
|
|
|
|
|
|
|
# individual htroot folder
|
|
|
|
# every user may publicise her/his own web pages
|
|
|
|
# these pages shall be placed in the path defined here
|
|
|
|
# the htdocs path shares its content with the htroot path
|
|
|
|
htDocsPath = DATA/HTDOCS
|
|
|
|
|
|
|
|
# the default files (typically index.html), if no file name is given
|
|
|
|
# The complete path to this file is created by combination with the rootPath
|
|
|
|
# you can set a list of defaults, separated by comma
|
|
|
|
# the first one is priorized
|
|
|
|
defaultFiles = ConfigBasic.html,index.html,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt
|
|
|
|
|
|
|
|
# locale-options: YaCy supports localization.
|
|
|
|
# Web pages for special languages are located in the htLocalePath
|
|
|
|
# The htLocaleLang defines a list of language options as <dir>/<named-language>
|
|
|
|
# the <dir> must exist as sub-path to htLocalePath
|
|
|
|
# the htLocaleSelection selects from the given locales, value=one-of-<dir>
|
|
|
|
htDefaultPath=htroot
|
|
|
|
htLocalePath=DATA/HTROOT/locale
|
|
|
|
htLocaleLang=default/English,de/Deutsch,fr/Français,nl/Nederlands,it/Italiano,es/Español,pt/Portugês,fi/Suomi,se/Svenska,dk/Dansk,gr/Eλληvικα
|
|
|
|
htLocaleSelection=default
|
|
|
|
|
|
|
|
# virtual host for httpdFileServlet access
|
|
|
|
# for example http://<fileHost>/ shall access the file servlet and
|
|
|
|
# return the defaultFile at rootPath
|
|
|
|
# either way, http://<fileHost>/ denotes the same as http://localhost:<port>/
|
|
|
|
# for the preconfigured value 'localpeer', the URL is:
|
|
|
|
# http://localpeer/
|
|
|
|
fileHost = localpeer
|
|
|
|
|
|
|
|
# root path for message files
|
|
|
|
messPath = C:/AnomicServer
|
|
|
|
|
|
|
|
# specify the path to the MIME matching file table
|
|
|
|
mimeConfig = httpd.mime
|
|
|
|
|
|
|
|
# specify the path to message resource file
|
|
|
|
messConfig = httpd.messages
|
|
|
|
|
|
|
|
# proxy use. This server can also act as an caching proxy.
|
|
|
|
# to enable that function, set proxy=true
|
|
|
|
proxy=true
|
|
|
|
|
|
|
|
# a path to the proxy's file cache.
|
|
|
|
# This will be used if the server is addressed as a proxy
|
|
|
|
proxyCache = DATA/HTCACHE
|
|
|
|
|
|
|
|
# the proxy's maximum disc cache size in megabytes
|
|
|
|
# there should be enough space for the browsing load of an internet caffee
|
|
|
|
# running at 56kbit/s modem speed (this time not unusual)
|
|
|
|
# during 3 days, 8 hours a day
|
|
|
|
# necessary space = 3 * 8 * 60 * 60 * 56 / 8 = 604800 KB = ca. 590 MB
|
|
|
|
# since 600 MB is not much these days (it's below one GB!)
|
|
|
|
# we recommend using that space
|
|
|
|
#proxyCacheSize = 600
|
|
|
|
#for testing:
|
|
|
|
proxyCacheSize = 200
|
|
|
|
|
|
|
|
# the following mime-types are the whitelist for indexing
|
|
|
|
#
|
|
|
|
# parseableRealtimeMimeTypes: specifies mime-types that can be indexed on the fly
|
|
|
|
# parseableMime: specifies mime-types that can be indexed but not on the fly
|
|
|
|
parseableRealtimeMimeTypes=application/xhtml+xml,text/html,text/plain
|
|
|
|
parseableMimeTypes=
|
|
|
|
parseableMimeTypes.CRAWLER=
|
|
|
|
parseableMimeTypes.PROXY=
|
|
|
|
parseableMimeTypes.ICAP=
|
|
|
|
parseableMimeTypes.URLREDIRECTOR=
|
|
|
|
|
|
|
|
|
|
|
|
# media extension string
|
|
|
|
# a comma-separated list of extensions that denote media file formats
|
|
|
|
# this is important to recognize <a href> - tags as not-html reference
|
|
|
|
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
|
|
|
|
mediaExt=7z,ace,arj,asf,asx,avi,bin,bz2,css,db,dcm,deb,doc,dll,dmg,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wmv,xcf,xls,zip
|
|
|
|
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
|
|
|
|
|
|
|
|
# Promotion Strings
|
|
|
|
# These strings appear in the Web Mask of the YACY search client
|
|
|
|
# Set these Strings to cusomize your peer and give any message to
|
|
|
|
# other peer users
|
|
|
|
promoteSearchPageGreeting =
|
|
|
|
|
|
|
|
# the path to the PLASMA database, especially the reverse word index
|
|
|
|
dbPath=DATA/PLASMADB
|
|
|
|
|
|
|
|
# the path to the LISTS files. Most lists are used to filter web content
|
|
|
|
listsPath=DATA/LISTS
|
|
|
|
|
|
|
|
# the path to the SKINS files.
|
|
|
|
skinPath=DATA/SKINS
|
|
|
|
|
|
|
|
# the yellow-list; URL's elements
|
|
|
|
# (the core of an URL; like 'yahoo' in 'de.yahoo.com')
|
|
|
|
# appearing in this list will not get a manipulated user agent string
|
|
|
|
proxyYellowList=yacy.yellow
|
|
|
|
|
|
|
|
# the black-list; URLs appearing in this list will not be loaded;
|
|
|
|
# instead always a 404 is returned
|
|
|
|
# all these files will be placed in the listsPath
|
|
|
|
proxyBlackLists=url.default.black
|
|
|
|
proxyBlackListsActive=url.default.black
|
|
|
|
proxyBlackListsShared=url.default.black
|
|
|
|
proxyCookieBlackList=cookie.default.black
|
|
|
|
proxyCookieWhiteList=cookie.default.black
|
|
|
|
|
|
|
|
# the blue-list;
|
|
|
|
# no search result is locally presented that has any word of the bluelist
|
|
|
|
# in the search words, the URL or the URL's description
|
|
|
|
plasmaBlueList=yacy.blue
|
|
|
|
|
|
|
|
# this proxy may in turn again access another proxy
|
|
|
|
# if you wish to do that, specify it here
|
|
|
|
# if you want to switch on the proxy use, set remoteProxyUse=true
|
|
|
|
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
|
|
|
|
remoteProxyUse=false
|
|
|
|
remoteProxyUse4Yacy=true
|
|
|
|
remoteProxyUse4SSL=true
|
|
|
|
|
|
|
|
remoteProxyHost=192.168.2.2
|
|
|
|
remoteProxyPort=4239
|
|
|
|
remoteProxyUser=
|
|
|
|
remoteProxyPwd=
|
|
|
|
|
|
|
|
remoteProxyNoProxy=192.*,10.*,127.*,localhost
|
|
|
|
|
|
|
|
# the proxy may filter the content of transferred web pages
|
|
|
|
# the bluelist removes specific keywords from web pages
|
|
|
|
proxyBlueList=yacy.blue
|
|
|
|
|
|
|
|
# security settigns
|
|
|
|
# we provide proxy and server security through a 2-stage security gate:
|
|
|
|
# 1st stage: firewall-like access control trough ip filter for clients
|
|
|
|
# 2nd stage: password settings for proxy, server and server administrators
|
|
|
|
# by default, these settings are weak to simplify set-up and testing
|
|
|
|
# every user/adiministrator shall be encouraged to change these settings
|
|
|
|
# your can change them also online during run-time on
|
|
|
|
# http://localhost:8080/
|
|
|
|
|
|
|
|
# proxyClient: client-ip's that may connect the proxy for proxy service
|
|
|
|
# if several ip's are allowed then they must be separated by a ','
|
|
|
|
# any ip may contain the wildcard-sign '*'
|
|
|
|
#proxyClient=192.168.0.4
|
|
|
|
proxyClient=localhost,127.0.0.1,192.168.*,10.*
|
|
|
|
|
|
|
|
# serverClient: client-ip's that may connect to the web server,
|
|
|
|
# thus are allowed to use the search service
|
|
|
|
# if you set this to another value, search requst from others
|
|
|
|
# are blocked, but you will also be blocked from using others
|
|
|
|
# search services.
|
|
|
|
serverClient=*
|
|
|
|
|
|
|
|
### proxyAccount: a user:password - pair for proxy authentification
|
|
|
|
### leave empty for no authenication
|
|
|
|
### example:
|
|
|
|
##proxyAccount=jim:knopf
|
|
|
|
##proxyAccount=
|
|
|
|
##proxyAccountBase64MD5=
|
|
|
|
|
|
|
|
# use_proxyAccounts: set to true to restrict proxy-access to some identified users.
|
|
|
|
#use User_p.html to create some Users.
|
|
|
|
use_proxyAccounts=false
|
|
|
|
|
|
|
|
# serverAccount: a user:password - pair for web server access
|
|
|
|
# this is the access to the 'public' pages on the server
|
|
|
|
# should be always open, but you get the option here
|
|
|
|
# if set to a user:password, you get a conflict with the administration account
|
|
|
|
# future versions will check if the server is unprotected,
|
|
|
|
# because the p2p-index-sharing function will use the http server for
|
|
|
|
# data exchange.
|
|
|
|
# example
|
|
|
|
#serverAccount=dicke:berta
|
|
|
|
serverAccount=
|
|
|
|
serverAccountBase64MD5=
|
|
|
|
|
|
|
|
# adminAccount: a user:password - pair for administration of
|
|
|
|
# settings through the web interface
|
|
|
|
# should be set to a secret. By default it is without a password
|
|
|
|
# but you are encouraged to set it to another value on the page
|
|
|
|
# http://localhost:8080/
|
|
|
|
#adminAccount=admin:anomic
|
|
|
|
adminAccount=
|
|
|
|
adminAccountBase64MD5=
|
|
|
|
|
|
|
|
# peer-to-peer construction for distributed search
|
|
|
|
# we have several stages:
|
|
|
|
# 1st: a file within every distribution that has a list of URLs:
|
|
|
|
# -> this is the superseed file
|
|
|
|
# 2nd: the files that can be retrieved by the superseeds' URLs
|
|
|
|
# are called seed list-files.
|
|
|
|
# -> the seed list-files contain IP/port combinations of running
|
|
|
|
# AnomicHTTPProxies
|
|
|
|
# 3rd: the peers that are targeted within the seed files are called superpeers
|
|
|
|
# 4th: the superpeers hold and share a list of all client/search/crawl peers
|
|
|
|
#
|
|
|
|
# some superpeers should be able to create again seed list-files.
|
|
|
|
# These superpeers must upload their IP or their list of peer-IP's to a
|
|
|
|
# ftp location to create the seed list-file.
|
|
|
|
# Everyone who do so should mail his/her new seed location to mc<at>anomic.de
|
|
|
|
# The seed list-file location will then be included in the superseed file.
|
|
|
|
# This superseed file is available then at two localtions:
|
|
|
|
# - it is included in every distribution and
|
|
|
|
# - updated through a specific URL-location
|
|
|
|
# we see the file name and the URL of the superseed here:
|
|
|
|
superseedFile=superseed.txt
|
|
|
|
superseedLocation=http://www.yacy.net/superseed.txt
|
|
|
|
|
|
|
|
|
|
|
|
# if you are running a principal peer, you must update the following variables
|
|
|
|
# The upload method that should be used to upload the seed-list file to
|
|
|
|
# a public accessible webserver where it can be loaded by other peers.
|
|
|
|
#
|
|
|
|
# You can set the seedUploadMethod-Property to
|
|
|
|
# - None
|
|
|
|
# - Ftp
|
|
|
|
# - File
|
|
|
|
# - Scp (only if you have installed the optional addon)
|
|
|
|
#
|
|
|
|
seedUploadMethod=
|
|
|
|
|
|
|
|
# The URL to the seed list file
|
|
|
|
seedURL=
|
|
|
|
|
|
|
|
# This is the most common method to upload the seed-list
|
|
|
|
#
|
|
|
|
# This is an ftp account with all relevant information.
|
|
|
|
# The update is only made if there had been changes in between.
|
|
|
|
seedFTPServer=
|
|
|
|
seedFTPAccount=
|
|
|
|
seedFTPPassword=
|
|
|
|
seedFTPPath=
|
|
|
|
|
|
|
|
# alternatively to an FTP account, a peer can also become a principal peer
|
|
|
|
# if the seed-list can be generated as a file and that file is also accessible from
|
|
|
|
# the internet. In this case, omit any ftp settings and set this path here.
|
|
|
|
# if this path stays empty, an ftp account is considered
|
|
|
|
# however, you must always set a seedURL because it is used to check if the
|
|
|
|
# file is actually accessible from the internet
|
|
|
|
seedFilePath=
|
|
|
|
|
|
|
|
# Settings needed to upload the seed-list file via scp
|
|
|
|
#
|
|
|
|
# Please note that this upload method can only be used if you have installed
|
|
|
|
# this optional upload method.
|
|
|
|
seedScpServer=
|
|
|
|
seedScpServerPort=
|
|
|
|
seedScpAccount=
|
|
|
|
seedScpPassword=
|
|
|
|
seedScpPath=
|
|
|
|
|
|
|
|
# every peer should have a name. inded, we try to give every peer an unique ID,
|
|
|
|
# which is necessary for internal organization of the index sharing, but the
|
|
|
|
# peer's name is purely informal. No function but information is applied.
|
|
|
|
# please change this at your pleasure
|
|
|
|
peerName=anomic
|
|
|
|
|
|
|
|
# every peer periodically scans for other peers. you can set the time
|
|
|
|
# of the period here (minutes)
|
|
|
|
peerCycle=2
|
|
|
|
|
|
|
|
# The p2p maintenance can run in either of two online modes:
|
|
|
|
# - don't process jobs and only access available in cache -> mode 0
|
|
|
|
# - process any job only if we are online, which is technically only the case
|
|
|
|
# if the proxy is used -> mode 1
|
|
|
|
# - process jobs periodically, with periodes according to peerCycle -> mode 2
|
|
|
|
#onlineMode=1
|
|
|
|
onlineMode=2
|
|
|
|
|
|
|
|
# Debug mode for YACY network: this will trigger that also local ip's are
|
|
|
|
# accepted as peer addresses
|
|
|
|
yacyDebugMode=false
|
|
|
|
|
|
|
|
#staticIP if you have a static IP, you can use this setting
|
|
|
|
staticIP=
|
|
|
|
|
|
|
|
# each time the proxy starts up, it can trigger the local browser to show the
|
|
|
|
# status page. This is active by default, to make it easier for first-time
|
|
|
|
# users to understand what this application does. You can disable browser
|
|
|
|
# pop-up here or set a different start page, like the search page
|
|
|
|
# the browser type is optional and works only under certain conditions
|
|
|
|
#browserPopUpTrigger=false
|
|
|
|
browserPopUpTrigger=true
|
|
|
|
#browserPopUpPage=index.html
|
|
|
|
browserPopUpPage=Status.html
|
|
|
|
browserPopUpApplication=netscape
|
|
|
|
|
|
|
|
# the proxy saves it's own seed information. It is positive for the network if
|
|
|
|
# the seed does not change it's configuration often (or not at all).
|
|
|
|
# The reason for that is that the seed hash is the target for the
|
|
|
|
# distributed hash table distribution function.
|
|
|
|
# The following file will contain the saved seed:
|
|
|
|
yacyOwnSeedFile=DATA/YACYDB/mySeed.txt
|
|
|
|
yacyDB=DATA/YACYDB
|
|
|
|
|
|
|
|
# index sharing attributes: by default, sharing is on.
|
|
|
|
# If you want to use YaCy only for local indexing (robinson mode),
|
|
|
|
# you may switch this off
|
|
|
|
allowDistributeIndex=true
|
|
|
|
allowDistributeIndexWhileCrawling=false
|
|
|
|
allowReceiveIndex=true
|
|
|
|
allowUnlimitedReceiveIndexFrom=
|
|
|
|
indexReceiveBlockBlacklist=true
|
|
|
|
|
|
|
|
# the frequency is the number of links per minute, that the peer allowes
|
|
|
|
# _every_ other peer to send to this peer
|
|
|
|
defaultWordReceiveFrequency=100
|
|
|
|
defaultLinkReceiveFrequency=30
|
|
|
|
# the default may be overridden for each peer individually, these
|
|
|
|
# settings are only available through the online interface
|
|
|
|
|
|
|
|
# prefetch parameters
|
|
|
|
# the prefetch depth assigns a specific depth to the prefetch mechanism
|
|
|
|
# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
|
|
|
|
# embedded URLs, but since embedded image links are loaded by the browser
|
|
|
|
# this means that only embedded anchors are prefetched additionally
|
|
|
|
# a prefetch of 2 would result in loading of all images and anchor pages
|
|
|
|
# of all embedded anchors. Be careful with this value, since even a prefetch
|
|
|
|
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
|
|
|
|
proxyPrefetchDepth=0
|
|
|
|
proxyStoreHTCache=true
|
|
|
|
proxyCrawlOrder=false
|
|
|
|
|
|
|
|
# From the 'IndexCreate' menu point you can also define a crawling start point.
|
|
|
|
# The crawling works the same way as the prefetch, but it is possible to
|
|
|
|
# assign a different crawling depth.
|
|
|
|
# Be careful with this number. Consider a branching factor of average 20;
|
|
|
|
# A prefect-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW.
|
|
|
|
crawlingDepth=2
|
|
|
|
crawlingIfOlder=525600
|
|
|
|
crawlingDomFilterDepth=-1
|
|
|
|
crawlingDomMaxPages=-1
|
|
|
|
localIndexing=true
|
|
|
|
|
|
|
|
# Filter for crawlinig; may be used to restrict a crawl to a specific domain
|
|
|
|
# URLs are only indexed and further crawled if they match this filter
|
|
|
|
crawlingFilter=.*
|
|
|
|
crawlingQ=false
|
|
|
|
storeHTCache=false
|
|
|
|
storeTXCache=true
|
|
|
|
|
|
|
|
# default crawl profile entries
|
|
|
|
# if these entries are empty, then a new entry will be generated
|
|
|
|
defaultProxyProfile=
|
|
|
|
defaultRemoteProfile=
|
|
|
|
|
|
|
|
# peers may initiate remote crawling tasks.
|
|
|
|
# every peer may allow or disallow to be used as crawling-peer;
|
|
|
|
# you can also set a maximum crawl depth that can be requested or accepted
|
|
|
|
# order=parameters for requester; response=parameters for responder
|
|
|
|
# these values apply only for senior-senior - communication
|
|
|
|
# The delay value is number of seconds bewteen two separate orders
|
|
|
|
crawlOrder=true
|
|
|
|
crawlOrderDepth=0
|
|
|
|
crawlOrderDelay=8
|
|
|
|
crawlResponse=true
|
|
|
|
crawlResponseDepth=0
|
|
|
|
|
|
|
|
# indexing-exclusion - rules
|
|
|
|
# There rules are important to reduce the number of words that are indexed
|
|
|
|
# We distinguish three different sets of stop-words:
|
|
|
|
# static - excludes all words given in the file yacy.stopwords from indexing,
|
|
|
|
# dynamic - excludes all words from indexing which are listed by statistic rules,
|
|
|
|
# parental - excludes all words from indexing which had been indexed in the parent web page.
|
|
|
|
xsstopw=true
|
|
|
|
xdstopw=true
|
|
|
|
xpstopw=true
|
|
|
|
|
|
|
|
# performance-settings
|
|
|
|
# delay-times for permanent loops (milliseconds)
|
|
|
|
# the idlesleep is the pause that an proces sleeps if the last call to the
|
|
|
|
# process job was without execution of anything;
|
|
|
|
# the busysleep is the pause after a full job execution
|
|
|
|
# the prereq-value is a memory pre-requisite: that much bytes must
|
|
|
|
# be available/free in the heap; othervise the loop is not executed
|
|
|
|
# and another idlesleep is performed
|
|
|
|
20_dhtdistribution_idlesleep=20000
|
|
|
|
20_dhtdistribution_busysleep=2000
|
|
|
|
20_dhtdistribution_memprereq=8388608
|
|
|
|
30_peerping_idlesleep=120000
|
|
|
|
30_peerping_busysleep=120000
|
|
|
|
30_peerping_memprereq=1048576
|
|
|
|
40_peerseedcycle_idlesleep=1800000
|
|
|
|
40_peerseedcycle_busysleep=1200000
|
|
|
|
40_peerseedcycle_memprereq=4194304
|
|
|
|
50_localcrawl_idlesleep=10000
|
|
|
|
50_localcrawl_busysleep=100
|
|
|
|
50_localcrawl_memprereq=1048576
|
|
|
|
50_localcrawl_isPaused=false
|
|
|
|
61_globalcrawltrigger_idlesleep=10000
|
|
|
|
61_globalcrawltrigger_busysleep=200
|
|
|
|
61_globalcrawltrigger_memprereq=1048576
|
|
|
|
61_globalcrawltrigger_isPaused=false
|
|
|
|
62_remotetriggeredcrawl_idlesleep=10000
|
|
|
|
62_remotetriggeredcrawl_busysleep=200
|
|
|
|
62_remotetriggeredcrawl_memprereq=1048576
|
|
|
|
62_remotetriggeredcrawl_isPaused=false
|
|
|
|
70_cachemanager_idlesleep=5000
|
|
|
|
70_cachemanager_busysleep=0
|
|
|
|
70_cachemanager_memprereq=1048576
|
|
|
|
80_indexing_idlesleep=2000
|
|
|
|
80_indexing_busysleep=100
|
|
|
|
80_indexing_memprereq=2097152
|
|
|
|
82_crawlstack_idlesleep=5000
|
|
|
|
82_crawlstack_busysleep=0
|
|
|
|
82_crawlstack_memprereq=1048576
|
|
|
|
90_cleanup_idlesleep=300000
|
|
|
|
90_cleanup_busysleep=300000
|
|
|
|
90_cleanup_memprereq=0
|
|
|
|
|
|
|
|
# multiprocessor-settings
|
|
|
|
# you may want to run time-consuming processes on several processors
|
|
|
|
# the most time-consuming process is the indexing-Process
|
|
|
|
# We implemented an option to run several of these processes here
|
|
|
|
# setting the number of processes to Zero is not allowed
|
|
|
|
# If you have a double-processor system,
|
|
|
|
# a cluster value of '2' would be appropriate
|
|
|
|
80_indexing_cluster=1
|
|
|
|
|
|
|
|
# ram cache for database files
|
|
|
|
|
|
|
|
# ram cache for assortment cache cluster (for all 64 files)
|
|
|
|
ramCacheRWI = 8388608
|
|
|
|
|
|
|
|
# ram cache for responseHeader.db
|
|
|
|
ramCacheHTTP = 4194304
|
|
|
|
|
|
|
|
# ram cache for urlHash.db
|
|
|
|
ramCacheLURL = 4194304
|
|
|
|
|
|
|
|
# ram cache for urlNotice.db
|
|
|
|
ramCacheNURL = 4194304
|
|
|
|
|
|
|
|
# ram cache for urlErr.db
|
|
|
|
ramCacheEURL = 8192
|
|
|
|
|
|
|
|
# ram cache for seedDBs
|
|
|
|
ramCacheDHT = 8192
|
|
|
|
|
|
|
|
# ram cache for message.db
|
|
|
|
ramCacheMessage = 8192
|
|
|
|
|
|
|
|
# ram cache for wiki.db
|
|
|
|
ramCacheWiki = 8192
|
|
|
|
|
|
|
|
# ram cache for blog.db
|
|
|
|
ramCacheBlog = 2048
|
|
|
|
|
|
|
|
# ram cache for news1.db
|
|
|
|
ramCacheNews = 8192
|
|
|
|
|
|
|
|
# ram cache for robotsTxt.db
|
|
|
|
ramCacheRobots = 2097152
|
|
|
|
|
|
|
|
# ram cache for crawlProfile.db
|
|
|
|
ramCacheProfiles = 8192
|
|
|
|
|
|
|
|
# ram cache for stack crawl thread db
|
|
|
|
ramCachePreNURL = 4194304
|
|
|
|
|
|
|
|
# default memory settings for startup of yacy
|
|
|
|
# is only valid in unix/shell environments and
|
|
|
|
# not for first startup of YaCy
|
|
|
|
|
|
|
|
# -Xmx<size> set maximum Java heap size
|
|
|
|
javastart_Xmx=Xmx64m
|
|
|
|
|
|
|
|
# -Xms<size> set initial Java heap size
|
|
|
|
javastart_Xms=Xms10m
|
|
|
|
|
|
|
|
# performance properties for the word index cache
|
|
|
|
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
|
|
|
|
# ram cache during indexing. When YaCy is shut down, this cache must be
|
|
|
|
# flushed to disc; this may last some minutes.
|
|
|
|
# The low value is valid for crawling tasks, the high value is valid for
|
|
|
|
# remote index transmissions and search requests
|
|
|
|
# maxWaitingWordFlush gives the number of seconds that the shutdown
|
|
|
|
# may last for the word flush
|
|
|
|
wordCacheMaxCount = 12000
|
|
|
|
|
|
|
|
# Specifies if yacy can be used as transparent http proxy.
|
|
|
|
#
|
|
|
|
# Please note that you also have to reconfigure your firewall
|
|
|
|
# before you can use yacy as transparent proxy. On linux this
|
|
|
|
# can be done like this:
|
|
|
|
# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \
|
|
|
|
# --dport 80 -j DNAT --to 192.168.0.1:8080
|
|
|
|
#
|
|
|
|
# With this iptables filter listed above all http traffic that
|
|
|
|
# comes from your private network (in this case 192.168.0.0)
|
|
|
|
# and goes to any webserver listening on port 80 will be forwarded
|
|
|
|
# by the firewall to yacy running on port 8080 (192.168.0.1:8080)
|
|
|
|
isTransparentProxy=false
|
|
|
|
|
|
|
|
# Specifies if yacy should use the http connection keep-alive feature
|
|
|
|
connectionKeepAliveSupport=true
|
|
|
|
|
|
|
|
# Specifies if the proxy should send the via header according to RFC
|
|
|
|
proxy.sendViaHeader=true
|
|
|
|
|
|
|
|
# Configuration options needed to configure server port forwarding
|
|
|
|
portForwardingEnabled=false
|
|
|
|
portForwardingUseProxy=false
|
|
|
|
portForwardingPort=
|
|
|
|
|
|
|
|
portForwardingHost=
|
|
|
|
portForwardingHostPort=22
|
|
|
|
portForwardingHostUser=
|
|
|
|
portForwardingHostPwd=
|
|
|
|
|
|
|
|
# msgForwarding: Specifies if yacy should forward received messages via
|
|
|
|
# email to the configured email address
|
|
|
|
msgForwardingEnabled=false
|
|
|
|
msgForwardingCmd=/usr/sbin/sendmail
|
|
|
|
msgForwardingTo=root@localhost
|
|
|
|
|
|
|
|
#onlineCautionDelay: delay time after proxy usage before crawling is resumed
|
|
|
|
onlineCautionDelay=30000
|
|
|
|
|
|
|
|
# Some configuration values for the crawler
|
|
|
|
crawler.acceptLanguage=en-us,en;q=0.5
|
|
|
|
crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
|
|
|
crawler.clientTimeout=9000
|
|
|
|
|
|
|
|
# maximum number of crawler threads
|
|
|
|
crawler.MaxActiveThreads = 10
|
|
|
|
crawler.MaxIdleThreads = 7
|
|
|
|
crawler.MinIdleThreads = 5
|
|
|
|
|
|
|
|
# maximum number of crawl-stacker threads
|
|
|
|
stacker.MaxActiveThreads = 50
|
|
|
|
stacker.MaxIdleThreads = 10
|
|
|
|
stacker.MinIdleThreads = 5
|
|
|
|
|
|
|
|
# maximum size of indexing queue
|
|
|
|
indexer.slots = 100
|
|
|
|
|
|
|
|
# specifies if yacy should set it's own referer if no referer URL
|
|
|
|
# was set by the client.
|
|
|
|
useYacyReferer = true
|
|
|
|
|
|
|
|
# allow only 443(https-port) for https-proxy?
|
|
|
|
# if you want to tunnel other protokols, set to false
|
|
|
|
secureHttps = true
|
|
|
|
|
|
|
|
# specifies if the httpdFileHandler should cache
|
|
|
|
# the template-files from the htroot directory
|
|
|
|
enableTemplateCache = true
|
|
|
|
|
|
|
|
# specifies if the http post body should be transfered
|
|
|
|
# using content-encoding gzip during index transfer
|
|
|
|
# a) indexDistribution: which is done periodically if you have enabled
|
|
|
|
# Index Distribution via IndexControl_p.html
|
|
|
|
# b) indexTransfer: which can be used to transfer the whole index of a peer
|
|
|
|
# this can be started via IndexTransfer_p.html
|
|
|
|
# c) indexControl: which can be triggered manually via IndexControl_p.html to
|
|
|
|
# transfer a chosen subset of the peer index
|
|
|
|
indexDistribution.gzipBody = true
|
|
|
|
indexTransfer.gzipBody = true
|
|
|
|
indexControl.gzipBody = true
|
|
|
|
|
|
|
|
# defining timeouts for index- transfer/distribution/control
|
|
|
|
indexControl.timeout = 60000
|
|
|
|
indexDistribution.timeout = 60000
|
|
|
|
indexTransfer.timeout = 120000
|
|
|
|
|
|
|
|
# defining max. allowed amount of open files during index- transfer/distribution
|
|
|
|
indexDistribution.maxOpenFiles = 800
|
|
|
|
indexTransfer.maxOpenFiles = 800
|
|
|
|
|
|
|
|
# Distribution of Citation-Reference (CR-) files
|
|
|
|
# The distribution is done in two steps:
|
|
|
|
# first step to anonymize the records
|
|
|
|
# second step to forward to collecting peer
|
|
|
|
# to anonymize the data even against the intermediate peer
|
|
|
|
# a specific precentage is also sent again to other peers.
|
|
|
|
# for key-numbers please see de.anomic.plasma.plasmaRankingDistribution
|
|
|
|
CRDistOn = true
|
|
|
|
CRDist0Path = GLOBAL/010_owncr
|
|
|
|
CRDist0Method = 1
|
|
|
|
CRDist0Percent = 0
|
|
|
|
CRDist0Target =
|
|
|
|
CRDist1Path = GLOBAL/014_othercr
|
|
|
|
CRDist1Method = 9
|
|
|
|
CRDist1Percent = 30
|
|
|
|
CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000,suma-lab.de:8080
|
|
|
|
|
|
|
|
#
|
|
|
|
storagePeerHash =
|
|
|
|
|
|
|
|
# Search sequence settings
|
|
|
|
# collection:
|
|
|
|
# time = time to get a RWI out of RAM cache, assortments and WORDS files
|
|
|
|
# count = maximum number of RWI-entries that shall be collected
|
|
|
|
#
|
|
|
|
# join:
|
|
|
|
# time = time to perform the join between all collected RWIs
|
|
|
|
# count = maximum number of entries that shall be joined
|
|
|
|
#
|
|
|
|
# presort:
|
|
|
|
# time = time to do a sort of the joined URL-records
|
|
|
|
# count = maximum number of entries that shall be pre-sorted
|
|
|
|
#
|
|
|
|
# urlfetch:
|
|
|
|
# time = time to fetch the real URLs from the LURL database
|
|
|
|
# count = maximum number of urls that shall be fetched
|
|
|
|
#
|
|
|
|
# postsort:
|
|
|
|
# time = time for final sort of URLs
|
|
|
|
# count = maximum number oof URLs that shall be retrieved during sort
|
|
|
|
#
|
|
|
|
# filter:
|
|
|
|
# time = time to filter out unwanted urls (like redundant urls)
|
|
|
|
# count = maximum number of urls that shall be filtered
|
|
|
|
#
|
|
|
|
# snippetfetch:
|
|
|
|
# time = time to fetch snippets for selected URLs
|
|
|
|
# count = maximum number of snipptes to be fetched
|
|
|
|
#
|
|
|
|
# all values are percent
|
|
|
|
# time-percent is the percent of total search time
|
|
|
|
# count-percent is the percent of total wanted urls in result
|
|
|
|
# we distinguish local and remote search times
|
|
|
|
searchProcessLocalTime_c = 25
|
|
|
|
searchProcessLocalCount_c = 10000000
|
|
|
|
searchProcessLocalTime_j = 10
|
|
|
|
searchProcessLocalCount_j = 1000000
|
|
|
|
searchProcessLocalTime_r = 10
|
|
|
|
searchProcessLocalCount_r =100000
|
|
|
|
searchProcessLocalTime_u = 30
|
|
|
|
searchProcessLocalCount_u = 10000
|
|
|
|
searchProcessLocalTime_o = 10
|
|
|
|
searchProcessLocalCount_o = 100
|
|
|
|
searchProcessLocalTime_f = 5
|
|
|
|
searchProcessLocalCount_f = 100
|
|
|
|
searchProcessLocalTime_s = 10
|
|
|
|
searchProcessLocalCount_s = 30
|
|
|
|
|
|
|
|
searchProcessRemoteTime_c = 25
|
|
|
|
searchProcessRemoteCount_c = 1000000
|
|
|
|
searchProcessRemoteTime_j = 10
|
|
|
|
searchProcessRemoteCount_j = 1000000
|
|
|
|
searchProcessRemoteTime_r = 10
|
|
|
|
searchProcessRemoteCount_r = 1000
|
|
|
|
searchProcessRemoteTime_u = 30
|
|
|
|
searchProcessRemoteCount_u = 1000
|
|
|
|
searchProcessRemoteTime_o = 10
|
|
|
|
searchProcessRemoteCount_o = 1000
|
|
|
|
searchProcessRemoteTime_f = 5
|
|
|
|
searchProcessRemoteCount_f = 100
|
|
|
|
searchProcessRemoteTime_s = 10
|
|
|
|
searchProcessRemoteCount_s = 10
|
|
|
|
|
|
|
|
# path to ranking directory containing ranking reference files
|
|
|
|
rankingPath = DATA/RANKING
|
|
|
|
|
|
|
|
# a list of domain name patterns that should not be cached by the httpc dns cache
|
|
|
|
httpc.nameCacheNoCachingPatterns = .*.dyndns.org, .*.dynalias.org
|
|
|
|
|
|
|
|
#externalRedirectors
|
|
|
|
#squid Redirector compatible
|
|
|
|
externalRedirector=
|
|
|
|
|
|
|
|
svnRevision=0
|
|
|
|
|
|
|
|
currentSkin=
|