You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
944 lines
37 KiB
944 lines
37 KiB
###
|
|
### YaCy Init File
|
|
###
|
|
# These properties will be loaded upon installation.
|
|
# They are used only once for set-up.
|
|
# If you make changes to this file and want these to make any effect,
|
|
# you must delete the yacy.conf file in DATA/SETTINGS
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# the HTTP service configurations
|
|
|
|
# port number where the server should bind to
|
|
# e.g. 8080
|
|
# #eth0:8080
|
|
# 192.168.0.1:8080
|
|
port = 8080
|
|
|
|
# use UPnP [true/false]
|
|
upnp.enabled = true
|
|
# remote host on UPnP device (for more than one connection)
|
|
upnp.remoteHost =
|
|
|
|
#sometimes you may want yacy to bind to another port, than the one reachable from outside.
|
|
#then set bindPort to the port yacy should bind on, and port to the port, visible from outside
|
|
#to run yacy on port 8080, reachable from port 80, set bindPort=8080, port=80 and use
|
|
#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8080
|
|
#(of course you need to customize the ips)
|
|
bindPort =
|
|
|
|
# SSL support:
|
|
#
|
|
# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS
|
|
#
|
|
# English speaking user read below:
|
|
#
|
|
# With this you can access your peer using https://localhost:8080
|
|
#
|
|
# There are two possibilities to specify which certificate should
|
|
# be used by YaCy.
|
|
#
|
|
# 1) Create a new certificate:
|
|
#
|
|
# *) For testing purposes, you can create a keystore with a self-signed certificate,
|
|
# using the following command:
|
|
# C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert
|
|
#
|
|
# *) Then configure the keyStoreXXXX properties accordingly, e.g.
|
|
# keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore
|
|
# keyStorePassword = mypwd
|
|
#
|
|
# 2) Import an existing certificate:
|
|
#
|
|
# Alternatively you can import an existing certificate in pkcs12 format into
|
|
# the keystore.
|
|
#
|
|
# This can be done by setting the pkcs12XXX properties accordingly, e.g.
|
|
# pkcs12ImportFile = c:/temp/keystore.pkcs12
|
|
# pkcs12ImportPwd = test
|
|
#
|
|
# If the property keyStore is not specified, then a new keystore file
|
|
# DATA/SETTINGS/myPeerKeystore will be created.
|
|
|
|
keyStore =
|
|
keyStorePassword =
|
|
pkcs12ImportFile =
|
|
pkcs12ImportPwd =
|
|
|
|
# server tracking: maximum time a track entry is hold in the internal cache
|
|
# value is in milliseconds, default is one hour
|
|
server.maxTrackingTime = 3600000
|
|
|
|
# maximum number of tracks per host
|
|
server.maxTrackingCount = 1000
|
|
|
|
# maximum number of hosts that are tracked
|
|
server.maxTrackingHostCount = 100
|
|
|
|
# maximum file sizes: since some users experience problems with too large files
|
|
# the file size of database files can be limited. Larger files can be used to get a
|
|
# better IO performance and to use less RAM; however, if the size must be limited
|
|
# because of limitations of the file system, the maximum size can be set here
|
|
filesize.max.win = 2147483647
|
|
filesize.max.other = 8589934591
|
|
|
|
# Network Definition
|
|
# There can be separate YaCy networks, and managed sub-groups of the general network.
|
|
# The essentials of the network definition are attached in separate property files.
|
|
# The property here can also be a url where the definition can be loaded.
|
|
# In case of privately managed networks, this configuration must be changed BEFORE it is released
|
|
# to the members of the separated network peers.
|
|
network.unit.definition = defaults/yacy.network.freeworld.unit
|
|
#network.unit.definition = defaults/yacy.network.intranet.unit
|
|
network.group.definition = defaults/yacy.network.group
|
|
|
|
# Update process properties
|
|
# The update server location is given in the network.unit.definition,
|
|
# but the settings for update processing and cycles are individual.
|
|
# the update process can be either 'manual' (no automatic lookup for new versions),
|
|
# 'guided' (automatic lookup, but user is asked before update is performed',
|
|
# or 'auto' (whenever an update is available, the update is loaded and installed)
|
|
update.process = manual
|
|
# the cycle value applies only if the process is automatic or guided. The value means hours.
|
|
# There is currently a fixed minimum number of hours of 24 hours for updates
|
|
update.cycle = 168
|
|
# a version number blacklist can restrict automatic or guided updates to a specific
|
|
# range of version numbers. The restriction is done with a blacklist (standard regexpr)
|
|
# It is recommended to set this list to low developer version numbers
|
|
update.blacklist = ...[123]
|
|
# a update can also restricted with a concept property, which can decide if an
|
|
# update is only valid if it either is a main release or any svn release including new development releases
|
|
# Valid keywords are 'main' and 'any'
|
|
update.concept = any
|
|
# the following values are set automatically:
|
|
# the lookup time when the last time a lookup to the network update server(s) where done
|
|
update.time.lookup = 0
|
|
# the download time when the last time a release was downloaded
|
|
update.time.download = 0
|
|
# the deploy time when the last update was done; milliseconds since epoch
|
|
update.time.deploy = 0
|
|
# delete old downloaded files after this amount of days to free disk space
|
|
# the latest release is always kept
|
|
update.deleteOld = 30
|
|
# only install sign files
|
|
update.onlySignedFiles = 1
|
|
|
|
# restart-option
|
|
# a peer can be re-started periodically
|
|
# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
|
|
restart.process = off
|
|
# the restart.cycle is the number of hours that must pass before a restart is done
|
|
restart.cycle = 20
|
|
# the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
|
|
# when the restart should be performed
|
|
restart.hour = 03
|
|
# the following values are set automatically
|
|
restart.time = 0
|
|
|
|
# clusters within a network:
|
|
# every network can have an unlimited number of clusters. Clusters may be also completely
|
|
# sealed and have no connection to other peers. When a cluster does not use the
|
|
# p2p protocol and the bootstraping mechanism to contact other peers, we call them
|
|
# Robinson peers. They can appear in different 'visibilities':
|
|
# - privatepeer: no connection and no data exchange to any other peer
|
|
# - privatecluster: connections only to self-defined addresses (other peers in same mode)
|
|
# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes
|
|
# - publicpeer: a single peer without cluster connection, but visible for p2p nodes
|
|
# all public robinson peers should use a peer tag string to be searcheable if in the
|
|
# search request these tags appear
|
|
cluster.mode=publicpeer
|
|
cluster.peers.yacydomain=localpeer.yacy
|
|
cluster.peers.ipport=localhost:8080
|
|
|
|
# bootstrapLoadTimeout
|
|
# this is the time-out for loading of the seedlist files during bootstraping
|
|
# the time should not be too long, since loading of the seedlist is not parallelized
|
|
# and a not successful loading of a seed file may prevent a peer from becoming
|
|
# a (at least) junior status. If the time-out is too short, there is the danger
|
|
# that the peer stays in virgin mode
|
|
bootstrapLoadTimeout = 6000
|
|
|
|
# time-out of client control socket in milliseconds
|
|
# since this applies only to the client-proxy connection,
|
|
# it can be rather short
|
|
# milliseconds
|
|
clientTimeout = 10000
|
|
|
|
# maximal number of httpd sessions
|
|
# a client may open several connections at once, and the httpdMaxBusySessions value sets
|
|
# a limit on the number of concurrent connections
|
|
httpdMaxBusySessions = 200
|
|
|
|
# default root path for the file server
|
|
# may be overridden by the htdocs parameter
|
|
# users shall be encouraged to use the htdocs path for individual content,
|
|
# not this path defined here
|
|
htRootPath = htroot
|
|
htTemplatePath = htroot/env/templates
|
|
|
|
# the htroot path
|
|
# root path for the httpd file server
|
|
htDefaultPath=htroot
|
|
|
|
# individual htroot folder
|
|
# every user may publicize her/his own web pages
|
|
# these pages shall be placed in the path defined here
|
|
# the htdocs path shares its content with the htroot path
|
|
htDocsPath = DATA/HTDOCS
|
|
|
|
# alternative path for the repository path of the web server: the URL
|
|
# http://localhost:8080/repository
|
|
# points to DATA/HTDOCS/repository, but can be altered with this repository path
|
|
# hint: the repository path is the default path for intranet indexing. The easiest ways
|
|
# to do a indexing of the local storage system is to set a path here for the repository
|
|
# that points to the root path of the files that shall be indexed
|
|
repositoryPath=DATA/HTDOCS/repository
|
|
|
|
# the default files (typically index.html), if no file name is given
|
|
# The complete path to this file is created by combination with the rootPath
|
|
# you can set a list of defaults, separated by comma
|
|
# the first one is preferred
|
|
defaultFiles = ConfigBasic.html,index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt
|
|
|
|
# locale-options: YaCy supports localization.
|
|
# Web pages for special languages are located in the htLocalePath
|
|
# The htLocaleLang defines a list of language options as <dir>/<named-language>
|
|
# the <dir> must exist as sub-path to htLocalePath
|
|
# the htLocaleSelection selects from the given locales, value=one-of-<dir>
|
|
locale.source=locales
|
|
locale.work=DATA/LOCALE/locales
|
|
locale.translated_html=DATA/LOCALE/htroot
|
|
locale.lang=default/English,de/Deutsch,fr/Français,nl/Nederlands,it/Italiano,es/Español,pt/Portugês,fi/Suomi,se/Svenska,dk/Dansk,gr/Eλληvικα,sk/Slovensky
|
|
locale.language=default
|
|
|
|
# virtual host for httpdFileServlet access
|
|
# for example http://<fileHost>/ shall access the file servlet and
|
|
# return the defaultFile at rootPath
|
|
# either way, http://<fileHost>/ denotes the same as http://localhost:<port>/
|
|
# for the preconfigured value 'localpeer', the URL is:
|
|
# http://localpeer/
|
|
fileHost = localpeer
|
|
|
|
# specify the path to the MIME matching file table
|
|
mimeTable = defaults/httpd.mime
|
|
|
|
# specify the path to the sessionid name file
|
|
sessionidNamesFile = defaults/sessionid.names
|
|
|
|
# a path to the file cache, used for the internal proxy and as crawl buffer
|
|
# This will be used if the server is addressed as a proxy
|
|
proxyCache = DATA/HTCACHE
|
|
|
|
# the maximum disc cache size for files in Cache in megabytes
|
|
# default: 32 Gigabyte
|
|
proxyCacheSize = 32768
|
|
|
|
# a path to the surrogate input directory
|
|
surrogates.in = DATA/SURROGATES/in
|
|
|
|
# a path to the surrogate output directory
|
|
surrogates.out = DATA/SURROGATES/out
|
|
|
|
# a path to the dictionaries directory
|
|
# this directory also contains subdirectories for input sources, the did-you-mean function and other
|
|
dictionaries = DATA/DICTIONARIES
|
|
|
|
# storage place for new releases
|
|
releases = DATA/RELEASE
|
|
|
|
# time limits for the crawler:
|
|
# these times (milliseconds) are the shortest times for an access of the crawler to the same domain
|
|
# the crawler may read files faster than that, but never from the same domain faster than these time intervals
|
|
# a delta of 500 milliseconds means that no more than two files are taken from the same server
|
|
# there is a hard-coded limit which prevents that the used time is shorter that these default times
|
|
# the time-limits are distinguished for local and global crawls: there is no limit for an intranet-crawl.
|
|
minimumLocalDelta = 0
|
|
minimumGlobalDelta = 500
|
|
|
|
# the following mime-types are a blacklist for indexing:
|
|
# parser.mime.deny: specifies mime-types that shall not be indexed
|
|
parser.mime.deny=
|
|
parser.extensions.deny=
|
|
|
|
# Promotion Strings
|
|
# These strings appear in the Web Mask of the YACY search client
|
|
# Set these Strings to cusomize your peer and give any message to
|
|
# other peer users
|
|
promoteSearchPageGreeting = P2P Web Search
|
|
# if the following property is set to true, the network name is used as greeting
|
|
promoteSearchPageGreeting.useNetworkName = false
|
|
# the following attributes can be used to define a custom image and home page on the search page
|
|
promoteSearchPageGreeting.homepage = http://yacy.net
|
|
promoteSearchPageGreeting.largeImage = /env/grafics/YaCyLogo_120ppi.png
|
|
promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png
|
|
|
|
# the path to the public reverse word index for text files (web pages)
|
|
# the primary path is relative to the data root, the secondary path is an absolute path
|
|
# when the secondary path should be equal to the primary, it must be declared empty
|
|
indexPrimaryPath=DATA/INDEX
|
|
|
|
# the commons are words that appear in the index more than 64k times in references.
|
|
# Since indexes with such size cannot be handled efficiently, they are sorted in such a way that references with high ranking
|
|
# are stored back into the index, and references with bad ranking are sorted out. Such sorted-out references can be stored
|
|
# for later use (but there is no at this time). If the sorted-out references should be stored, the following property should be
|
|
# set to true. If set to false, they are abandoned (deleted), and previously stored commons are removed.
|
|
index.storeCommons=false
|
|
|
|
# the path to the LISTS files. Most lists are used to filter web content
|
|
listsPath=DATA/LISTS
|
|
|
|
# path to additional databases, like messages, blog data and bookmarks
|
|
workPath=DATA/WORK
|
|
|
|
# the path to the SKINS files.
|
|
skinPath=DATA/SKINS
|
|
|
|
# the yellow-list; URL's elements
|
|
# (the core of an URL; like 'yahoo' in 'de.yahoo.com')
|
|
# appearing in this list will not get a manipulated user agent string
|
|
proxyYellowList=yacy.yellow
|
|
|
|
# the black-list; URLs appearing in this list will not be loaded;
|
|
# instead always a 404 is returned
|
|
# all these files will be placed in the listsPath
|
|
BlackLists.Shared=url.default.black
|
|
BlackLists.DefaultList=url.default.black
|
|
|
|
#these are not needed as default. they just keep the values from beeing deleted ...
|
|
proxy.BlackLists=url.default.black
|
|
crawler.BlackLists=url.default.black
|
|
dht.BlackLists=url.default.black
|
|
search.BlackLists=url.default.black
|
|
surftips.BlackLists=url.default.black
|
|
news.BlackLists=url.default.black
|
|
|
|
proxyCookieBlackList=cookie.default.black
|
|
proxyCookieWhiteList=cookie.default.black
|
|
|
|
# the blue-list;
|
|
# no search result is locally presented that has any word of the bluelist
|
|
# in the search words, the URL or the URL's description
|
|
plasmaBlueList=yacy.blue
|
|
|
|
# this proxy may in turn again access another proxy
|
|
# if you wish to do that, specify it here
|
|
# if you want to switch on the proxy use, set remoteProxyUse=true
|
|
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
|
|
remoteProxyUse=false
|
|
remoteProxyUse4Yacy=true
|
|
remoteProxyUse4SSL=true
|
|
|
|
remoteProxyHost=192.168.2.2
|
|
remoteProxyPort=4239
|
|
remoteProxyUser=
|
|
remoteProxyPwd=
|
|
|
|
remoteProxyNoProxy=10\..*,127.*,172.(1[6-9]|2[0-9]|3[0-1])\..*,169.254.*,192.168.*,localhost
|
|
|
|
# the proxy may filter the content of transferred web pages
|
|
# the bluelist removes specific keywords from web pages
|
|
proxyBlueList=yacy.blue
|
|
|
|
# security settigns
|
|
# we provide proxy and server security through a 2-stage security gate:
|
|
# 1st stage: firewall-like access control trough ip filter for clients
|
|
# 2nd stage: password settings for proxy, server and server administrators
|
|
# by default, these settings are weak to simplify set-up and testing
|
|
# every user/adiministrator shall be encouraged to change these settings
|
|
# your can change them also online during run-time on
|
|
# http://localhost:8080/
|
|
|
|
# proxyClient: client-ip's that may connect the proxy for proxy service
|
|
# if several ip's are allowed then they must be separated by a ','
|
|
# any ip may contain the wildcard-sign '*'
|
|
#proxyClient=192.168.0.4
|
|
proxyClient=localhost,127.0.0.1,192.168.*,10\..*
|
|
|
|
# YaCyHop: allow public usage of proxy for yacy-protocol
|
|
# this enables usage of the internal http proxy for everyone,
|
|
# if the file path starts with /yacy/
|
|
# This is used to enable anonymization of yacy protocol requests
|
|
# Instead of asking a remote peer directly, a peer in between is asked
|
|
# to prevent that the asked peer knows which peer asks.
|
|
YaCyHop=true
|
|
|
|
# serverClient: client-ip's that may connect to the web server,
|
|
# thus are allowed to use the search service
|
|
# if you set this to another value, search requst from others
|
|
# are blocked, but you will also be blocked from using others
|
|
# search services.
|
|
serverClient=*
|
|
|
|
# use_proxyAccounts: set to true to restrict proxy-access to some identified users.
|
|
#use User_p.html to create some Users.
|
|
use_proxyAccounts=false
|
|
|
|
# adminAccount: a user:password - pair for administration of
|
|
# settings through the web interface
|
|
# should be set to a secret. By default it is without a password
|
|
# but you are encouraged to set it to another value on the page
|
|
# http://localhost:8080/ConfigBasic.html
|
|
#adminAccount=admin:mysecretpassword
|
|
adminAccount=
|
|
adminAccountBase64MD5=
|
|
|
|
# special access handling for users from localhost:
|
|
# access from localhost may be granted with administration authority
|
|
# if this flag is set. It is set to true by default to make usage of YaCy easy
|
|
# if you use YaCy on a headless server, you should set this to false
|
|
# or configure this on http://localhost:8080/ConfigBasic.html
|
|
# during the first 10 minutes of operation of YaCy;
|
|
# if the admin account password is still empty after 10 minutes a random
|
|
# password is generated an access is then ONLY from localhost, which will cause
|
|
# inaccessibility for installations on headless servers.
|
|
adminAccountForLocalhost=true
|
|
|
|
# if you are running a principal peer, you must update the following variables
|
|
# The upload method that should be used to upload the seed-list file to
|
|
# a public accessible webserver where it can be loaded by other peers.
|
|
#
|
|
# You can set the seedUploadMethod-Property to
|
|
# - None
|
|
# - Ftp
|
|
# - File
|
|
# - Scp (only if you have installed the optional addon)
|
|
#
|
|
seedUploadMethod=none
|
|
|
|
# This is the most common method to upload the seed-list
|
|
#
|
|
# This is an ftp account with all relevant information.
|
|
# The update is only made if there had been changes in between.
|
|
seedFTPServer=
|
|
seedFTPAccount=
|
|
seedFTPPassword=
|
|
seedFTPPath=
|
|
|
|
# alternatively to an FTP account, a peer can also become a principal peer
|
|
# if the seed-list can be generated as a file and that file is also accessible from
|
|
# the internet. In this case, omit any ftp settings and set this path here.
|
|
# if this path stays empty, an ftp account is considered
|
|
# however, you must always set a seedURL because it is used to check if the
|
|
# file is actually accessible from the internet
|
|
seedFilePath=
|
|
|
|
# Settings needed to upload the seed-list file via scp
|
|
#
|
|
# Please note that this upload method can only be used if you have installed
|
|
# this optional upload method.
|
|
seedScpServer=
|
|
seedScpServerPort=
|
|
seedScpAccount=
|
|
seedScpPassword=
|
|
seedScpPath=
|
|
|
|
# every peer should have a name. inded, we try to give every peer an unique ID,
|
|
# which is necessary for internal organization of the index sharing, but the
|
|
# peer's name is purely informal. No function but information is applied.
|
|
# please change this at your pleasure
|
|
peerName=anomic
|
|
|
|
# every peer periodically scans for other peers. you can set the time
|
|
# of the period here (minutes)
|
|
peerCycle=2
|
|
|
|
# Debug mode for YACY network: this will trigger that also local ip's are
|
|
# accepted as peer addresses
|
|
yacyDebugMode=false
|
|
|
|
#staticIP if you have a static IP, you can use this setting
|
|
staticIP=
|
|
|
|
# each time YaCy starts up, it can trigger the local browser to show the
|
|
# status page. This is active by default, to make it easier for first-time
|
|
# users to understand what this application does. You can disable browser
|
|
# pop-up here or set a different start page, like the search page
|
|
# the browser type is optional and works only under certain conditions
|
|
browserPopUpTrigger=true
|
|
browserPopUpPage=ConfigBasic.html
|
|
browserPopUpApplication=firefox
|
|
|
|
# defines if the YaCy icon appears in the system tray on supported platforms
|
|
trayIcon=true
|
|
trayIcon.force=false
|
|
tray.label=YaCy
|
|
|
|
# index sharing attributes: by default, sharing is on.
|
|
# If you want to use YaCy only for local indexing (robinson mode),
|
|
# you may switch this off
|
|
allowDistributeIndex=true
|
|
allowDistributeIndexWhileCrawling=false
|
|
allowDistributeIndexWhileIndexing=true
|
|
allowReceiveIndex=true
|
|
allowUnlimitedReceiveIndexFrom=
|
|
indexReceiveBlockBlacklist=true
|
|
|
|
# the frequency is the number of links per minute, that the peer allowes
|
|
# _every_ other peer to send to this peer
|
|
defaultWordReceiveFrequency=100
|
|
defaultLinkReceiveFrequency=30
|
|
# the default may be overridden for each peer individually, these
|
|
# settings are only available through the online interface
|
|
|
|
# prefetch parameters
|
|
# the prefetch depth assigns a specific depth to the prefetch mechanism
|
|
# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
|
|
# embedded URLs, but since embedded image links are loaded by the browser
|
|
# this means that only embedded anchors are prefetched additionally
|
|
# a prefetch of 2 would result in loading of all images and anchor pages
|
|
# of all embedded anchors. Be careful with this value, since even a prefetch
|
|
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
|
|
proxyPrefetchDepth=0
|
|
proxyStoreHTCache=true
|
|
proxyIndexingRemote=false
|
|
proxyIndexingLocalText=true
|
|
proxyIndexingLocalMedia=true
|
|
|
|
# proxy usage only for .yacy-Domains for autoconfig
|
|
proxyYacyOnly=false
|
|
|
|
# From the 'IndexCreate' menu point you can also define a crawling start point.
|
|
# The crawling works the same way as the prefetch, but it is possible to
|
|
# assign a different crawling depth.
|
|
# Be careful with this number. Consider a branching factor of average 20;
|
|
# A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW.
|
|
crawlingDepth=3
|
|
crawlingIfOlder=-1
|
|
crawlingDomFilterDepth=-1
|
|
crawlingDomMaxPages=-1
|
|
indexText=true
|
|
indexMedia=true
|
|
|
|
# Filter for crawling; may be used to restrict a crawl to a specific domain
|
|
# URLs are only indexed and further crawled if they match this filter
|
|
crawlingFilter=.*
|
|
crawlingQ=false
|
|
storeHTCache=true
|
|
storeTXCache=true
|
|
|
|
# peers may initiate remote crawling tasks.
|
|
# every peer may allow or disallow to be used as crawling-peer;
|
|
# you can also set a maximum crawl depth that can be requested or accepted
|
|
# order=parameters for requester; response=parameters for responder
|
|
# these values apply only for senior-senior - communication
|
|
# The delay value is number of seconds bewteen two separate orders
|
|
crawlOrder=true
|
|
crawlOrderDepth=0
|
|
crawlOrderDelay=8
|
|
crawlResponse=true
|
|
crawlResponseDepth=0
|
|
|
|
# indexing-exclusion - rules
|
|
# There rules are important to reduce the number of words that are indexed
|
|
# We distinguish three different sets of stop-words:
|
|
# static - excludes all words given in the file yacy.stopwords from indexing,
|
|
# dynamic - excludes all words from indexing which are listed by statistic rules,
|
|
# parental - excludes all words from indexing which had been indexed in the parent web page.
|
|
xsstopw=true
|
|
xdstopw=true
|
|
xpstopw=true
|
|
|
|
# Topwords filtering
|
|
# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords
|
|
# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version
|
|
filterOutStopwordsFromTopwords=true
|
|
|
|
# performance-settings
|
|
# delay-times for permanent loops (milliseconds)
|
|
# the idlesleep is the pause that an proces sleeps if the last call to the
|
|
# process job was without execution of anything;
|
|
# the busysleep is the pause after a full job execution
|
|
# the prereq-value is a memory pre-requisite: that much bytes must
|
|
# be available/free in the heap; othervise the loop is not executed
|
|
# and another idlesleep is performed
|
|
20_dhtdistribution_idlesleep=15000
|
|
20_dhtdistribution_busysleep=10000
|
|
20_dhtdistribution_memprereq=12582912
|
|
30_peerping_idlesleep=120000
|
|
30_peerping_busysleep=120000
|
|
30_peerping_memprereq=2097152
|
|
40_peerseedcycle_idlesleep=1800000
|
|
40_peerseedcycle_busysleep=1200000
|
|
40_peerseedcycle_memprereq=4194304
|
|
50_localcrawl_idlesleep=2000
|
|
50_localcrawl_busysleep=20
|
|
50_localcrawl_memprereq=12582912
|
|
50_localcrawl_isPaused=false
|
|
60_remotecrawlloader_idlesleep=60000
|
|
60_remotecrawlloader_busysleep=10000
|
|
60_remotecrawlloader_memprereq=12582912
|
|
60_remotecrawlloader_isPaused=false
|
|
62_remotetriggeredcrawl_idlesleep=10000
|
|
62_remotetriggeredcrawl_busysleep=1000
|
|
62_remotetriggeredcrawl_memprereq=12582912
|
|
62_remotetriggeredcrawl_isPaused=false
|
|
70_surrogates_idlesleep=10000
|
|
70_surrogates_busysleep=0
|
|
70_surrogates_memprereq=12582912
|
|
90_cleanup_idlesleep=300000
|
|
90_cleanup_busysleep=300000
|
|
90_cleanup_memprereq=0
|
|
|
|
# additional attributes:
|
|
# performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
|
|
# is used to flush the RAM cache, which is the major part of the IO in YaCy
|
|
performanceProfile=defaults/yacy.init
|
|
performanceSpeed=100
|
|
performanceIO=10
|
|
|
|
# cleanup-process:
|
|
# properties for tasks that are performed during cleanup
|
|
cleanup.deletionProcessedNews = true
|
|
cleanup.deletionPublishedNews = true
|
|
|
|
# default memory settings for startup of yacy
|
|
# is valid in unix/shell and windows environments but
|
|
# not for first startup of YaCy
|
|
|
|
# -Xmx<size> and -Xms<size> maximum/init Java heap size
|
|
# if a high performance for large search indexes is wanted, then setting the values to equal number is recommended
|
|
# if YaCy shall be nice in not-only-yacy environments, then the Xms value may be lower
|
|
javastart_Xmx=Xmx600m
|
|
javastart_Xms=Xms180m
|
|
|
|
# YaCy is able to use RAM copies of database tables. This needs a lot of RAM
|
|
# To switch copying of file tables int RAM on, use this property
|
|
# this value is automatically set to true, if more than one gigabyte RAM is available
|
|
ramcopy=false
|
|
|
|
# some java versions may be limited to a specific array size
|
|
# of 134217727 entries. To prevent that tables of that size are generated,
|
|
# set this property to false
|
|
# If you want to have better performance and switch ramcopy on, try also to
|
|
# set this property to true
|
|
# this value is automatically set to true, if more than two gigabyte is available
|
|
exceed134217727=false
|
|
|
|
# priority of the yacy-process
|
|
# is valid in unix/shell and windows environments but
|
|
# not for first startup of YaCy
|
|
# UNIX: corresponds to the nice-level
|
|
# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low
|
|
javastart_priority=10
|
|
|
|
# performance properties for the word index cache
|
|
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
|
|
# ram cache during indexing. If you want to increase indexing speed, increase this
|
|
# value i.e. up to one million, but increase also the memory limit to a minimum of 2GB
|
|
wordCacheMaxCount = 100000
|
|
|
|
# Specifies if yacy can be used as transparent http proxy.
|
|
#
|
|
# Please note that you also have to reconfigure your firewall
|
|
# before you can use yacy as transparent proxy. On linux this
|
|
# can be done like this:
|
|
# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \
|
|
# --dport 80 -j DNAT --to 192.168.0.1:8080
|
|
#
|
|
# With this iptables filter listed above all http traffic that
|
|
# comes from your private network (in this case 192.168.0.0)
|
|
# and goes to any webserver listening on port 80 will be forwarded
|
|
# by the firewall to yacy running on port 8080 (192.168.0.1:8080)
|
|
isTransparentProxy=false
|
|
|
|
# Specifies if yacy should use the http connection keep-alive feature
|
|
connectionKeepAliveSupport=true
|
|
|
|
# Specifies the timeout the proxy sould use
|
|
proxy.clientTimeout = 30000
|
|
|
|
# Specifies if the proxy should send the via header according to RFC
|
|
proxy.sendViaHeader=true
|
|
|
|
# Specifies if the proxy should send the X-Forwarded-For header
|
|
proxy.sendXForwardedForHeader=true
|
|
|
|
# Enable cookie monitoring
|
|
proxy.monitorCookies=false
|
|
|
|
# msgForwarding: Specifies if yacy should forward received messages via
|
|
# email to the configured email address
|
|
msgForwardingEnabled=false
|
|
msgForwardingCmd=/usr/sbin/sendmail
|
|
msgForwardingTo=root@localhost
|
|
|
|
#crawlPause: delay time after specific functions before crawling is resumed
|
|
crawlPause.proxy=15000
|
|
crawlPause.localsearch=9000
|
|
crawlPause.remotesearch=3000
|
|
|
|
# Some configuration values for the crawler
|
|
crawler.clientTimeout=9000
|
|
|
|
# http crawler specific settings; size in bytes
|
|
crawler.http.acceptEncoding=gzip
|
|
crawler.http.acceptLanguage=en-us,en;q=0.5
|
|
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
|
crawler.http.maxFileSize=1048576
|
|
|
|
# ftp crawler specific settings; size in bytes
|
|
crawler.ftp.maxFileSize=1048576
|
|
|
|
# smb crawler specific settings: maximum size
|
|
crawler.smb.maxFileSize=100000000
|
|
|
|
# smb crawler specific settings: maximum size
|
|
crawler.file.maxFileSize=100000000
|
|
|
|
# maximum number of crawler threads
|
|
crawler.MaxActiveThreads = 200
|
|
|
|
# maximum size of indexing queue
|
|
indexer.slots = 100
|
|
|
|
# maximum size of stacker queue
|
|
stacker.slots = 2000
|
|
|
|
# search domains. If set to false then that search is not available
|
|
search.text = true
|
|
search.image = true
|
|
search.audio = true
|
|
search.video = true
|
|
search.app = true
|
|
|
|
# number of search results displayed by default
|
|
search.items = 10
|
|
|
|
# specifies if yacy should set it's own referer if no referer URL
|
|
# was set by the client.
|
|
useYacyReferer = false
|
|
|
|
# allow only 443(https-port) for https-proxy?
|
|
# if you want to tunnel other protocols, set to false
|
|
secureHttps = true
|
|
|
|
# specifies if the httpdFileHandler should cache
|
|
# the template-files from the htroot directory
|
|
enableTemplateCache = true
|
|
|
|
# specifies if the http post body should be transfered
|
|
# using content-encoding gzip during index transfer
|
|
# a) indexDistribution: which is done periodically if you have enabled
|
|
# Index Distribution via IndexControl_p.html
|
|
# b) indexTransfer: which can be used to transfer the whole index of a peer
|
|
# this can be started via IndexTransfer_p.html
|
|
# c) indexControl: which can be triggered manually via IndexControl_p.html to
|
|
# transfer a chosen subset of the peer index
|
|
indexDistribution.gzipBody = true
|
|
indexTransfer.gzipBody = true
|
|
indexControl.gzipBody = true
|
|
|
|
# defining timeouts for index- transfer/distribution/control
|
|
indexControl.timeout = 60000
|
|
indexDistribution.timeout = 60000
|
|
indexTransfer.timeout = 120000
|
|
|
|
# defining max. allowed amount of open files during index- transfer/distribution
|
|
indexDistribution.maxOpenFiles = 800
|
|
indexTransfer.maxOpenFiles = 800
|
|
|
|
# sizes for index distribution
|
|
indexDistribution.minChunkSize = 10
|
|
indexDistribution.maxChunkSize = 1000
|
|
indexDistribution.startChunkSize = 200
|
|
indexDistribution.maxChunkFails = 1
|
|
|
|
# Distribution of Citation-Reference (CR-) files
|
|
# The distribution is done in two steps:
|
|
# first step to anonymize the records
|
|
# second step to forward to a collecting peer
|
|
# to anonymize the data even against the intermediate peer
|
|
# a specific precentage is also sent again to other peers.
|
|
# for key-numbers please see de.anomic.plasma.plasmaRankingDistribution
|
|
CRDistOn = true
|
|
CRDist0Path = GLOBAL/010_owncr
|
|
CRDist0Method = 1
|
|
CRDist0Percent = 0
|
|
CRDist0Target =
|
|
CRDist1Path = GLOBAL/014_othercr
|
|
CRDist1Method = 9
|
|
CRDist1Percent = 30
|
|
CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000
|
|
|
|
# Search sequence settings
|
|
# collection:
|
|
# time = time to get a RWI out of RAM cache, assortments and WORDS files
|
|
# count = maximum number of RWI-entries that shall be collected
|
|
#
|
|
# join:
|
|
# time = time to perform the join between all collected RWIs
|
|
# count = maximum number of entries that shall be joined
|
|
#
|
|
# presort:
|
|
# time = time to do a sort of the joined URL-records
|
|
# count = maximum number of entries that shall be pre-sorted
|
|
#
|
|
# urlfetch:
|
|
# time = time to fetch the real URLs from the LURL database
|
|
# count = maximum number of urls that shall be fetched
|
|
#
|
|
# postsort:
|
|
# time = time for final sort of URLs
|
|
# count = maximum number oof URLs that shall be retrieved during sort
|
|
#
|
|
# filter:
|
|
# time = time to filter out unwanted urls (like redundant urls)
|
|
# count = maximum number of urls that shall be filtered
|
|
#
|
|
# snippetfetch:
|
|
# time = time to fetch snippets for selected URLs
|
|
# count = maximum number of snipptes to be fetched
|
|
#
|
|
# all values are percent
|
|
# time-percent is the percent of total search time
|
|
# count-percent is the percent of total wanted urls in result
|
|
# we distinguish local and remote search times
|
|
searchProcessLocalTime_c = 44
|
|
searchProcessLocalCount_c = 10000000
|
|
searchProcessLocalTime_j = 8
|
|
searchProcessLocalCount_j = 1000000
|
|
searchProcessLocalTime_r = 8
|
|
searchProcessLocalCount_r =100000
|
|
searchProcessLocalTime_u = 20
|
|
searchProcessLocalCount_u = 10000
|
|
searchProcessLocalTime_o = 10
|
|
searchProcessLocalCount_o = 100
|
|
searchProcessLocalTime_f = 5
|
|
searchProcessLocalCount_f = 100
|
|
searchProcessLocalTime_s = 5
|
|
searchProcessLocalCount_s = 30
|
|
|
|
searchProcessRemoteTime_c = 44
|
|
searchProcessRemoteCount_c = 1000000
|
|
searchProcessRemoteTime_j = 8
|
|
searchProcessRemoteCount_j = 1000000
|
|
searchProcessRemoteTime_r = 8
|
|
searchProcessRemoteCount_r = 1000
|
|
searchProcessRemoteTime_u = 20
|
|
searchProcessRemoteCount_u = 1000
|
|
searchProcessRemoteTime_o = 10
|
|
searchProcessRemoteCount_o = 1000
|
|
searchProcessRemoteTime_f = 5
|
|
searchProcessRemoteCount_f = 100
|
|
searchProcessRemoteTime_s = 5
|
|
searchProcessRemoteCount_s = 10
|
|
|
|
# timeouts for snippet fetching in ms
|
|
# timeout_text is for text-snippets, timeout_media for media, e.g. images
|
|
timeout_text = 10000
|
|
timeout_media = 15000
|
|
|
|
# path to ranking directory containing ranking reference files
|
|
rankingPath = DATA/RANKING
|
|
|
|
# a list of domain name patterns that should not be cached by the httpc dns cache
|
|
httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu
|
|
|
|
#externalRedirectors
|
|
#squid Redirector compatible
|
|
externalRedirector=
|
|
|
|
svnRevision=0
|
|
|
|
currentSkin=pdblue
|
|
|
|
# flag to show if pages shall be usable for non-admin users
|
|
# this can be applied to the Surftips.html and yacysearch.html page
|
|
publicSurftips = true
|
|
publicSearchpage = true
|
|
|
|
# Wiki access rights
|
|
# the built-in wiki system allows by default only that the administrator is allowed to make changes
|
|
# this can be changed. There are three options:
|
|
# admin - only the admin has write right
|
|
# all - everybody has write right
|
|
# user - the admin and every user registered in the user db has write right
|
|
WikiAccess = admin
|
|
|
|
# Search Profiles
|
|
# we will support different search profiles
|
|
# this is currently only a single default profile
|
|
# If this profile setting is empty, a hard-coded profile from plasmaSearchRanking is used
|
|
rankingProfile =
|
|
|
|
#optional extern thumbnail program.
|
|
#the program must accept the invocation PROGRAM http://url /path/to/filename
|
|
thumbnailProgram =
|
|
|
|
# settings for the peer's local robots.txt
|
|
# the following restrictions are possible (comma-separated):
|
|
# - all : entire domain is disallowed
|
|
# - blog : the blog-pages
|
|
# - bookmarks : the bookmark-page
|
|
# - dirs : all directories in htroot (standard setting, as there is no usable information in)
|
|
# - fileshare : all files in the peer's file share (DATA/HTDOCS/share)
|
|
# - homepage : all files on the peer's home page (DATA/HTDOCS/www)
|
|
# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways)
|
|
# - news : the news-page
|
|
# - network : the network-pages
|
|
# - status : peer's status page
|
|
# - surftips : the surftips-page
|
|
# - wiki : the wiki-page
|
|
httpd.robots.txt = locked,dirs
|
|
|
|
# class to use for parsing wikicode
|
|
wikiParser.class = de.anomic.data.wikiCode
|
|
|
|
# settings for automatic deletion of old entries in passive and potential seed-db
|
|
# time means max time (in days) a peer may not have been seen before it is deleted
|
|
routing.deleteOldSeeds.permission = true
|
|
routing.deleteOldSeeds.time = 30
|
|
|
|
# options to remember the default search engines when using the search compare features
|
|
compare_yacy.left = YaCy
|
|
compare_yacy.right = YaCy
|
|
|
|
# minimum free disk space for crawling (MiB)
|
|
disk.free = 3000
|
|
# minimum for DHT
|
|
disk.free.hardlimit = 1000
|
|
|
|
# minimum memory to accept dht-in (KB)
|
|
memory.acceptDHT = 50000
|
|
memory.disabledDHT = false
|
|
|
|
# setting if execution of CGI files is allowed or not
|
|
cgi.allow = false
|
|
cgi.suffixes = cgi,pl
|
|
|
|
# whether this is a version for a web browser
|
|
browserintegration = false
|
|
|
|
# content integration settings
|
|
content.phpbb3.urlstub = http://<mydomain>/
|
|
content.phpbb3.dbtype = mysql
|
|
content.phpbb3.dbhost = localhost
|
|
content.phpbb3.dbport = 3306
|
|
content.phpbb3.dbname = forum
|
|
content.phpbb3.tableprefix = phpbb_
|
|
content.phpbb3.dbuser = notroot
|
|
content.phpbb3.dbpw = joshua
|
|
content.phpbb3.ppf = 1000
|
|
content.phpbb3.dumpfile =
|
|
|
|
# segment assignment for index storage processes in YaCy:
|
|
# each process can store its index result in it's own index segment
|
|
segment.process.receipts_tmp = default
|
|
segment.process.queries_tmp = default
|
|
segment.process.dhtin_tmp = default
|
|
segment.process.dhtout_tmp = default
|
|
segment.process.proxy_tmp = default
|
|
segment.process.localcrawling_tmp = default
|
|
segment.process.remotecrawling_tmp = default
|
|
segment.process.default_tmp = default
|
|
|
|
# search engine teaser: an about box in search results
|
|
# this is only shown, if the about.body is filled
|
|
about.headline =
|
|
about.body =
|
|
|
|
# search heuristics
|
|
heuristic.site = false
|
|
heuristic.scroogle = false
|
|
|