### ### YaCy Init File ### # These properties will be loaded upon installation. # They are used only once for set-up. # If you make changes to this file and want these to make any effect, # you must delete the yacy.conf file in DATA/SETTINGS # ---------------------------------------------------------------------------- # port number where the server should bind to port = 8090 # optinal ssl port (https port) the server should bind to port.ssl = 8443 # prefix for new default peer names peernameprefix=_anon # use UPnP [true/false] upnp.enabled = true # remote host on UPnP device (for more than one connection) upnp.remoteHost = #sometimes you may want yacy to bind to another port, than the one reachable from outside. #then set bindPort to the port yacy should bind on, and port to the port, visible from outside #to run yacy on port 8090, reachable from port 80, set bindPort=8090, port=80 and use #iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8090 #(of course you need to customize the ips) bindPort = # SSL support: # # For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS # # English speaking user read below: # # With this you can access your peer using https://localhost:8443 # # There are two possibilities to specify which certificate should # be used by YaCy. # # 1) Create a new certificate: # # *) For testing purposes, you can create a keystore with a self-signed certificate, # using the following command: # C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert # # *) Then configure the keyStoreXXXX properties accordingly, e.g. # keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore # keyStorePassword = mypwd # # 2) Import an existing certificate: # # Alternatively you can import an existing certificate in pkcs12 format into # the keystore. # # This can be done by setting the pkcs12XXX properties accordingly, e.g. # pkcs12ImportFile = c:/temp/keystore.pkcs12 # pkcs12ImportPwd = test # # If the property keyStore is not specified, then a new keystore file # DATA/SETTINGS/myPeerKeystore will be created. keyStore=defaults/freeworldKeystore keyStorePassword=freeworld pkcs12ImportFile = pkcs12ImportPwd = # the keyStore is only used, if server.https is set to true # if server.https=true, then the YaCy web interface is available at # https://localhost:/ and at http://localhost:/ server.https=false # property that collects the names of all servlets that had been used so far # that is used to track if the user has already done some configuration steps # if the used missed configuration steps that should be done, then a help system # is possible which leads the used based on the list of servlets that had been used # the list distinguishes called and submitted servlets server.servlets.called = server.servlets.submitted = # server tracking: maximum time a track entry is hold in the internal cache # value is in milliseconds, default is one hour server.maxTrackingTime = 3600000 # maximum number of tracks per host server.maxTrackingCount = 1000 # maximum number of hosts that are tracked server.maxTrackingHostCount = 100 # maximum file sizes: since some users experience problems with too large files # the file size of database files can be limited. Larger files can be used to get a # better IO performance and to use less RAM; however, if the size must be limited # because of limitations of the file system, the maximum size can be set here filesize.max.win = 2147483647 filesize.max.other = 8589934591 # Network Definition # There can be separate YaCy networks, and managed sub-groups of the general network. # The essentials of the network definition are attached in separate property files. # The property here can also be a url where the definition can be loaded. # In case of privately managed networks, this configuration must be changed BEFORE it is released # to the members of the separated network peers. network.unit.definition = defaults/yacy.network.freeworld.unit #network.unit.definition = defaults/yacy.network.webportal.unit #network.unit.definition = defaults/yacy.network.intranet.unit # distinguish intranet/internet IPs: # if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the # url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup # to check if a host is in the internet oder an intranet can be omited. # This option is only valid if the network.unit.domain property is set to 'any' network.unit.domain.nocheck = false # in addition to non-dht networks a client may have its own agent name # this option is only used if the value is non-empty and network.unit.dht = false # that means it is not usable in YaCy p2p-configurations, only in private portal configurations network.unit.tenant.agent = # Update process properties # The update server location is given in the network.unit.definition, # but the settings for update processing and cycles are individual. # the update process can be either 'manual' (no automatic lookup for new versions), # 'guided' (automatic lookup, but user is asked before update is performed', # or 'auto' (whenever an update is available, the update is loaded and installed) update.process = manual # the cycle value applies only if the process is automatic or guided. The value means hours. # There is currently a fixed minimum number of hours of 24 hours for updates update.cycle = 168 # a version number blacklist can restrict automatic or guided updates to a specific # range of version numbers. The restriction is done with a blacklist (standard regexpr) # It is recommended to set this list to low developer version numbers update.blacklist = # a update can also restricted with a concept property, which can decide if an # update is only valid if it either is a main release or any release including new development releases # Valid keywords are 'main' and 'any' update.concept = any # the following values are set automatically: # the lookup time when the last time a lookup to the network update server(s) where done update.time.lookup = 0 # the download time when the last time a release was downloaded update.time.download = 0 # the deploy time when the last update was done; milliseconds since epoch update.time.deploy = 0 # delete old downloaded files after this amount of days to free disk space # the latest release is always kept update.deleteOld = 30 # only install sign files update.onlySignedFiles = 1 # restart-option # a peer can be re-started periodically # restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below) restart.process = off # the restart.cycle is the number of hours that must pass before a restart is done restart.cycle = 20 # the restart.hour is a pattern that must match with the hour string (two-digit, 24h) # when the restart should be performed restart.hour = 03 # the following values are set automatically restart.time = 0 # clusters within a network: # every network can have an unlimited number of clusters. Clusters may be also completely # sealed and have no connection to other peers. When a cluster does not use the # p2p protocol and the bootstraping mechanism to contact other peers, we call them # Robinson peers. They can appear in different 'visibilities': # - privatepeer: no connection and no data exchange to any other peer # - privatecluster: connections only to self-defined addresses (other peers in same mode) # - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes # - publicpeer: a single peer without cluster connection, but visible for p2p nodes # all public robinson peers should use a peer tag string to be searcheable if in the # search request these tags appear cluster.mode=publicpeer cluster.peers.yacydomain=localpeer.yacy cluster.peers.ipport=localhost:8090 # bootstrapLoadTimeout # this is the time-out for loading of the seedlist files during bootstraping # If the time-out is too short, there is the danger that the peer stays in virgin mode bootstrapLoadTimeout = 20000 # time-out of client control socket in milliseconds # since this applies only to the client-proxy connection, # it can be rather short # milliseconds clientTimeout = 10000 # maximal number of httpd sessions # a client may open several connections at once, and the httpdMaxBusySessions value sets # a limit on the number of concurrent connections httpdMaxBusySessions = 200 # default root path for the file server # may be overridden by the htdocs parameter # users shall be encouraged to use the htdocs path for individual content, # not this path defined here htRootPath = htroot # the htroot path # root path for the httpd file server htDefaultPath=htroot # individual htroot folder # every user may publicize her/his own web pages # these pages shall be placed in the path defined here # the htdocs path shares its content with the htroot path htDocsPath = DATA/HTDOCS # the default files (typically index.html), if no file name is given # The complete path to this file is created by combination with the rootPath # you can set a list of defaults, separated by comma # the first one is preferred defaultFiles = index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt # locale-options: YaCy supports localization. # Web pages for special languages are located in the htLocalePath # The htLocaleLang defines a list of language options as / # the must exist as sub-path to htLocalePath # the htLocaleSelection selects from the given locales, value=one-of- locale.source=locales locale.translated_html=DATA/LOCALE/htroot locale.language=default # virtual host for httpdFileServlet access # for example http:/// shall access the file servlet and # return the defaultFile at rootPath # either way, http:/// denotes the same as http://localhost:/ # for the preconfigured value 'localpeer', the URL is: # http://localpeer/ fileHost = localpeer # specify the path to the MIME matching file table mimeTable = defaults/httpd.mime # specify the path to the sessionid name file sessionidNamesFile = defaults/sessionid.names # a path to the file cache, used for the internal proxy and as crawl buffer # This will be used if the server is addressed as a proxy proxyCache = DATA/HTCACHE # the maximum disc cache size for files in Cache in megabytes # default: 4 Gigabyte proxyCacheSize = 4096 # you can use the proxy with fresh/stale rules or in a always-fresh mode proxyAlwaysFresh = false # a path to the surrogate input directory surrogates.in = DATA/SURROGATES/in # a path to the surrogate output directory surrogates.out = DATA/SURROGATES/out # a path to the dictionaries directory # this directory also contains subdirectories for input sources, the did-you-mean function and other dictionaries = DATA/DICTIONARIES # a path to the classification directory # each subdirectory is the name of a context (which becomes a navigator) with '.txt' files # containing texts to teach a bayesian filter. One of the files must be named 'negative.txt'. # The text files can be created with the Export functionality using the option "Only Text". classification = DATA/CLASSIFICATION # storage place for new releases releases = DATA/RELEASE # the following mime-types are a blacklist for indexing: # parser.mime.deny: specifies mime-types that shall not be indexed parser.mime.deny= parser.extensions.deny= parser.enableAudioTags=false # experimental single-page parser for pdf files: split one pdf into individual pages; # the key is the property name in the post arguments that gets a page number assigned, # page numbers start with 1 parser.pdf.individualpages=false parser.pdf.individualpages.key=page # Promotion Strings # These strings appear in the Web Mask of the YACY search client # Set these Strings to cusomize your peer and give any message to # other peer users promoteSearchPageGreeting = Web Search by the People, for the People # if the following property is set to true, the network name is used as greeting promoteSearchPageGreeting.useNetworkName = false # the following attributes can be used to define a custom image and home page on the search page promoteSearchPageGreeting.homepage = http://yacy.net promoteSearchPageGreeting.largeImage = /env/grafics/YaCyLogo_120ppi.png promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png # the path to the public reverse word index for text files (web pages) # the primary path is relative to the data root, the secondary path is an absolute path # when the secondary path should be equal to the primary, it must be declared empty indexPrimaryPath=DATA/INDEX # the path to index archive dumps indexArchivePath=DATA/ARCHIVE # the path to the LISTS files. Most lists are used to filter web content listsPath=DATA/LISTS # path to additional databases, like messages, blog data and bookmarks workPath=DATA/WORK # the path to the SKINS files. skinPath=DATA/SKINS # the yellow-list; URL's elements # (the core of an URL; like 'yahoo' in 'de.yahoo.com') # appearing in this list will not get a manipulated user agent string proxyYellowList=yacy.yellow # the black-list; URLs appearing in this list will not be loaded; # instead always a 404 is returned # all these files will be placed in the listsPath BlackLists.Shared=url.default.black BlackLists.DefaultList=url.default.black #these are not needed as default. they just keep the values from beeing deleted ... proxy.BlackLists=url.default.black crawler.BlackLists=url.default.black dht.BlackLists=url.default.black search.BlackLists=url.default.black surftips.BlackLists=url.default.black news.BlackLists=url.default.black # the blue-list; # no search result is locally presented that has any word of the bluelist # in the search words, the URL or the URL's description plasmaBlueList=yacy.blue # this proxy may in turn again access another proxy # if you wish to do that, specify it here # if you want to switch on the proxy use, set remoteProxyUse=true # remoteProxyNoProxy is a no-proxy pattern list for the remote proxy remoteProxyUse=false remoteProxyUse4SSL=true remoteProxyHost=192.168.2.2 remoteProxyPort=4239 remoteProxyUser= remoteProxyPwd= remoteProxyNoProxy=10\..*,127\..*,172\.(1[6-9]|2[0-9]|3[0-1])\..*,169\.254\..*,192\.168\..*,localhost,0:0:0:0:0:0:0:1 # the proxy may filter the content of transferred web pages # the bluelist removes specific keywords from web pages proxyBlueList=yacy.blue # security settigns # we provide proxy and server security through a 2-stage security gate: # 1st stage: firewall-like access control trough ip filter for clients # 2nd stage: password settings for proxy, server and server administrators # by default, these settings are weak to simplify set-up and testing # every user/adiministrator shall be encouraged to change these settings # your can change them also online during run-time on # http://localhost:8090/ # proxyClient: client-ip's that may connect the proxy for proxy service # if several ip's are allowed then they must be separated by a ',' # regular expressions may be used #proxyClient=192.168.0.4 proxyClient=localhost,127\.0\.0\.1,192\.168\..*,10\..*,0:0:0:0:0:0:0:1.* # YaCyHop: allow public usage of proxy for yacy-protocol # this enables usage of the internal http proxy for everyone, # if the file path starts with /yacy/ # This is used to enable anonymization of yacy protocol requests # Instead of asking a remote peer directly, a peer in between is asked # to prevent that the asked peer knows which peer asks. YaCyHop=true # serverClient: client-ip's that may connect to the web server, # thus are allowed to use the search service # if you set this to another value, search requst from others # are blocked, but you will also be blocked from using others # search services. serverClient=* # use_proxyAccounts: set to true to restrict proxy-access to some identified users. #use User_p.html to create some Users. use_proxyAccounts=true # adminAccount: a user:password - pair for administration of # settings through the web interface # should be set to a secret. By default it is without a password # but you are encouraged to set it to another value on the page # http://localhost:8090/ConfigAccounts_p.html #adminAccount=admin:mysecretpassword adminAccount= adminAccountBase64MD5= adminAccountUserName=admin # special access handling for users from localhost: # access from localhost may be granted with administration authority # if this flag is set. It is set to true by default to make usage of YaCy easy # if you use YaCy on a headless server, you should set this to false # or configure this on http://localhost:8090/ConfigAccounts_p.html # during the first 10 minutes of operation of YaCy; # if the admin account password is still empty after 10 minutes a random # password is generated an access is then ONLY from localhost, which will cause # inaccessibility for installations on headless servers. adminAccountForLocalhost=true # adminAccountAllPages: if set to false, then all pages without the extension "_p" are # accessible without authorization. Some servlets may individually decide to use or request # administration rights. If adminAccountAllPages is set to true, then administration # rights are needed to access all pages without any exception. Setting adminAccountAllPages # to true therefore closes the YaCy web pages for everyone. adminAccountAllPages=false # adminRealm: a internal name (like a group name) for the login setting of the admin frontend # ATTENTION: changing this name will invalidate all currently password hashes # - With DIGEST authentication mode is this realm name of generated password hashes # (RFC 2617 standard and recommendation). If you want to share password configuration # with additional machines they have to belong to the same realm # - authentication defaults to BASIC # - and can be configured in defaults/web.xml , tag #adminRealm=YaCy-AdminUI adminRealm=The YaCy access is limited to administrators. If you don't know the password, you can change it using /bin/passwd.sh # if you are running a principal peer, you must update the following variables # The upload method that should be used to upload the seed-list file to # a public accessible webserver where it can be loaded by other peers. # # You can set the seedUploadMethod-Property to # - None # - Ftp # - File # - Scp (only if you have installed the optional addon) # seedUploadMethod=none # This is the most common method to upload the seed-list # # This is an ftp account with all relevant information. # The update is only made if there had been changes in between. seedFTPServer= seedFTPAccount= seedFTPPassword= seedFTPPath= # alternatively to an FTP account, a peer can also become a principal peer # if the seed-list can be generated as a file and that file is also accessible from # the internet. In this case, omit any ftp settings and set this path here. # if this path stays empty, an ftp account is considered # however, you must always set a seedURL because it is used to check if the # file is actually accessible from the internet seedFilePath= # Settings needed to upload the seed-list file via scp # # Please note that this upload method can only be used if you have installed # this optional upload method. seedScpServer= seedScpServerPort= seedScpAccount= seedScpPassword= seedScpPath= # every peer periodically scans for other peers. you can set the time # of the period here (minutes) peerCycle=2 # debug flags debug.search.local.dht.off=false debug.search.local.solr.off=false debug.search.remote.dht.off=false debug.search.remote.dht.testlocal=false debug.search.remote.solr.off=false debug.search.remote.solr.testlocal=false #staticIP if you have a static IP, you can use this setting staticIP= # each time YaCy starts up, it can trigger the local browser to show the # status page. This is active by default, to make it easier for first-time # users to understand what this application does. You can disable browser # pop-up here or set a different start page, like the search page browserPopUpTrigger=true browserPopUpPage=index.html # a forward page can be given for the index.html page # when a user accesses the index.html page, he/she is forwarded to the page # as given by indexForward. This is by default not defined which means 'no forward' indexForward = # defines if the YaCy icon appears in the system tray on supported platforms tray.icon.enabled=true tray.icon.force=false tray.icon.label=YaCy tray.menu.enabled=true # index sharing attributes: by default, sharing is on. # If you want to use YaCy only for local indexing (robinson mode), # you may switch this off allowDistributeIndex=true allowDistributeIndexWhileCrawling=false allowDistributeIndexWhileIndexing=true allowReceiveIndex=true allowReceiveIndex.search=true indexReceiveBlockBlacklist=true # the frequency is the number of links per minute, that the peer allowes # _every_ other peer to send to this peer defaultWordReceiveFrequency=100 defaultLinkReceiveFrequency=30 # the default may be overridden for each peer individually, these # settings are only available through the online interface # prefetch parameters # the prefetch depth assigns a specific depth to the prefetch mechanism # prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all # embedded URLs, but since embedded image links are loaded by the browser # this means that only embedded anchors are prefetched additionally # a prefetch of 2 would result in loading of all images and anchor pages # of all embedded anchors. Be careful with this value, since even a prefetch # of 2 would result in hundreds of prefetched URLs for each single proxy fill. proxyPrefetchDepth=0 proxyStoreHTCache=true proxyIndexingRemote=false proxyIndexingLocalText=true proxyIndexingLocalMedia=true # proxy usage only for .yacy-Domains for autoconfig proxyYacyOnly=false # enable proxy via url (/proxy.html?url=http://yacy.net) proxyURL=false proxyURL.access=127.0.0.1,0:0:0:0:0:0:0:1 # which urls to rewrite to /proxy.html?url=x (values: all, domainlist) proxyURL.rewriteURLs=domainlist proxyURL.useforresults=false # Autocrawl configuration autocrawl=false autocrawl.index.text=true autocrawl.index.media=true autocrawl.ratio=50 autocrawl.rows=100 autocrawl.days=1 autocrawl.query=*:* autocrawl.deep.depth=3 autocrawl.shallow.depth=1 # From the 'IndexCreate' menu point you can also define a crawling start point. # The crawling works the same way as the prefetch, but it is possible to # assign a different crawling depth. # Be careful with this number. Consider a branching factor of average 20; # A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. crawlingDepth=3 crawlingDirectDocByURL=true crawlingIfOlder=-1 crawlingDomFilterDepth=-1 crawlingDomMaxPages=-1 indexText=true indexMedia=true # Filter for crawling; may be used to restrict a crawl to a specific domain # URLs are only indexed and further crawled if they match this filter crawlingFilter=.* crawlingQ=true followFrames=true obeyHtmlRobotsNoindex=true obeyHtmlRobotsNofollow=false storeHTCache=true storeTXCache=true # peers may initiate remote crawling tasks. # every peer may allow or disallow to be used as crawling-peer; # you can also set a maximum crawl depth that can be requested or accepted # order=parameters for requester; response=parameters for responder # these values apply only for senior-senior - communication # The delay value is number of seconds bewteen two separate orders # crawlOrder: default value for remote crawl starts # crawlResponse: set to true if a peer should retrieve remote crawl urls from other peers crawlOrder=true crawlOrderDepth=0 crawlResponse=false crawlResponseDepth=0 # indexing-exclusion - rules # There rules are important to reduce the number of words that are indexed # We distinguish three different sets of stop-words: # static - excludes all words given in the file yacy.stopwords from indexing, # dynamic - excludes all words from indexing which are listed by statistic rules, # parental - excludes all words from indexing which had been indexed in the parent web page. xsstopw=true xdstopw=true xpstopw=true # Topwords filtering # If set to true, all stopwords (stopwords.yacy) are filtered from the topwords # Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version filterOutStopwordsFromTopwords=true # crawling steering: must-match/must-not-match crawlingIPMustMatch=.* crawlingIPMustNotMatch= # the default country codes are all codes for countries in Europe crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU # collections for index data separation # these collections can either be used to produce search tenants. # The collection is used in the site-parameter in the GSA interface. # Collections are assigned during crawl-time and defined in the crawl start. # The YaCyScheme field collection_sxt must be switched on to use this field. collection=user # performance-settings # delay-times for permanent loops (milliseconds) # the idlesleep is the pause that an proces sleeps if the last call to the # process job was without execution of anything; # the busysleep is the pause after a full job execution # the prereq-value is a memory pre-requisite: that much bytes must # be available/free in the heap; othervise the loop is not executed # and another idlesleep is performed 20_dhtdistribution_idlesleep=30000 20_dhtdistribution_busysleep=15000 20_dhtdistribution_memprereq=12582912 20_dhtdistribution_loadprereq=2.0 30_peerping_idlesleep=30000 30_peerping_busysleep=30000 30_peerping_memprereq=2097152 30_peerping_loadprereq=4.0 40_peerseedcycle_idlesleep=1800000 40_peerseedcycle_busysleep=1200000 40_peerseedcycle_memprereq=4194304 40_peerseedcycle_loadprereq=2.0 50_localcrawl_idlesleep=2000 50_localcrawl_busysleep=10 50_localcrawl_memprereq=25165824 50_localcrawl_loadprereq=6.0 50_localcrawl_isPaused=false 55_autocrawl_idlesleep=10000 55_autocrawl_busysleep=10000 55_autocrawl_memprereq=25165824 55_autocrawl_loadprereq=6.0 60_remotecrawlloader_idlesleep=4000 60_remotecrawlloader_busysleep=800 60_remotecrawlloader_memprereq=12582912 60_remotecrawlloader_loadprereq=8.0 60_remotecrawlloader_isPaused=false 62_remotetriggeredcrawl_idlesleep=2000 62_remotetriggeredcrawl_busysleep=200 62_remotetriggeredcrawl_memprereq=12582912 62_remotetriggeredcrawl_loadprereq=8.0 62_remotetriggeredcrawl_isPaused=false 70_surrogates_idlesleep=10000 70_surrogates_busysleep=0 70_surrogates_memprereq=12582912 70_surrogates_loadprereq=8.0 720_ccimport_idlesleep=100 720_ccimport_busysleep=1000 720_ccimport_memprereq=1048576 720_ccimport_loadprereq=8.0 730_ccfilter_idlesleep=100 730_ccfilter_busysleep=1000 730_ccfilter_memprereq=1048576 730_ccfilter_loadprereq=8.0 85_scheduler_idlesleep=60000 85_scheduler_busysleep=60000 85_scheduler_memprereq=1048576 85_scheduler_loadprereq=4.0 90_cleanup_idlesleep=300000 90_cleanup_busysleep=300000 90_cleanup_memprereq=0 90_cleanup_loadprereq=16.0 reindexSolr_idlesleep=1000 reindexSolr_busysleep=1 reindexSolr_memprereq=10485760 reindexSolr_loadprereq=9.0 # additional attributes: # performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time # is used to flush the RAM cache, which is the major part of the IO in YaCy performanceProfile=defaults/yacy.init performanceSpeed=100 performanceIO=10 # cleanup-process: # properties for tasks that are performed during cleanup cleanup.deletionProcessedNews = true cleanup.deletionPublishedNews = true cleanup.failedSearchURLtimeout = 86400000 # default memory settings for startup of yacy # is valid in unix/shell and windows environments but # not for first startup of YaCy # -Xmx and -Xms maximum/init Java heap size # if a high performance for large search indexes is wanted, then setting the values to equal number is recommended # if YaCy shall be nice in not-only-yacy environments, then the Xms value may be lower javastart_Xmx=Xmx600m javastart_Xms=Xms90m # YaCy is able to use RAM copies of database tables. This needs a lot of RAM. # To switch on copying of file tables int RAM, there must be enough memory # The memory that is available at startup time is used to switch the feature on # The tableCachingLimit is the amount of free RAM at startup time to switch on the feature tableCachingLimit=419430400 # some java versions may be limited to a specific array size # of 134217727 entries. To prevent that tables of that size are generated, # set this property to false # If you want to have better performance and switch ramcopy on, try also to # set this property to true # this value is automatically set to true, if more than two gigabyte is available exceed134217727=false # priority of the yacy-process # is valid in unix/shell and windows environments but # not for first startup of YaCy # UNIX: corresponds to the nice-level # WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low javastart_priority=10 # performance properties for the word index cache # wordCacheMaxLow/High is the number of word indexes that shall be held in the # ram cache during indexing. If you want to increase indexing speed, increase this # value i.e. up to one million, but increase also the memory limit to a minimum of 2GB wordCacheMaxCount = 50000 # Specifies if yacy can be used as transparent http proxy. # # Please note that you also have to reconfigure your firewall # before you can use yacy as transparent proxy. On linux this # can be done like this: # iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \ # --dport 80 -j DNAT --to 192.168.0.1:8090 # # With this iptables filter listed above all http traffic that # comes from your private network (in this case 192.168.0.0) # and goes to any webserver listening on port 80 will be forwarded # by the firewall to yacy running on port 8090 (192.168.0.1:8090) isTransparentProxy=false # Specifies the timeout the proxy sould use proxy.clientTimeout = 60000 # Specifies if the proxy should send the via header according to RFC proxy.sendViaHeader=true # Specifies if the proxy should send the X-Forwarded-For header proxy.sendXForwardedForHeader=true # Enable cookie monitoring proxy.monitorCookies=false # msgForwarding: Specifies if yacy should forward received messages via # email to the configured email address msgForwardingEnabled=false msgForwardingCmd=/usr/sbin/sendmail msgForwardingTo=root@localhost #crawlPause: delay time after specific functions before crawling is resumed crawlPause.proxy=10 crawlPause.localsearch=50 crawlPause.remotesearch=10 # Some configuration values for the crawler crawler.clientTimeout=30000 # http crawler specific settings; size in bytes crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 crawler.http.acceptEncoding=gzip crawler.http.acceptLanguage=en-us,en;q=0.5 crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 crawler.http.maxFileSize=10485760 crawler.http.FollowRedirects=true crawler.http.RecordRedirects=false # ftp crawler specific settings; size in bytes crawler.ftp.maxFileSize=10485760 # smb crawler specific settings: maximum size crawler.smb.maxFileSize=100000000 # smb crawler specific settings: maximum size crawler.file.maxFileSize=100000000 # maximum number of crawler threads crawler.MaxActiveThreads = 200 # maximum number of same hosts in crawler threads crawler.MaxSameHostInQueue = 20 # default latency is the start value of the average of remote server response time crawler.defaultAverageLatency = 500 # the latency factor is a factor that is applied to the average remote server latency. # The result is the minimum remote server access delay time crawler.latencyFactor = 0.5 # The onDemandLimit is the maximum number of crawl queues that are concurrently opened # at the same time. If the number of hosts exceeds this number, onDemand queues are opened # which are opened each time a queue is accessed which creates high IO load. On the other # hand, having too many entries in onDemandLimit may exceed the maximum number of file # pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number # defined here crawler.onDemandLimit = 1000 # maximum size of indexing queue indexer.slots = 100 # maximum size of stacker queue stacker.slots = 2000 # search options: show advanced options on main search page search.options = true # search domains. If set to false then that search is not available search.text = true search.image = true search.audio = false search.video = false search.app = false # number of search results per page displayed by default search.items = 10 # target for search results; this is the href target attribute inside every search result link # possible values: # "_blank" (new window), "_self" (same window), "_parent" (the parent frame of a frameset), # "_top" (top of all frames), "searchresult" (a default custom page name for search results) # a special pattern can be given for exceptions to the default target according to urls search.target = _self search.target.special = _self search.target.special.pattern = # search result lines may show additional information for each search hit # these information pieces may be switched on or off search.result.show.date = true search.result.show.size = false search.result.show.metadata = false search.result.show.parser = false search.result.show.citation = true search.result.show.pictures = false search.result.show.cache = true search.result.show.proxy = false search.result.show.hostbrowser = true search.result.show.vocabulary = false search.result.show.vocabulary.omit = search.result.show.snapshots = false # search navigators: comma-separated list of default values for search navigation. # can be temporary different if search string is given with differen navigation values # assigning no value(s) means that no navigation is shown search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language #search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language,collections,date # search result verification and snippet fetch caching rules # each search result can be verified byloading the link from the web # this can be enhanced using a cache. In some cases it may be appropriate # to not verify the link at all and do not compute a snippet # the possible cases are: # nocache: no use of web cache, load all snippets online # iffresh: use the cache if the cache exists and is fresh otherwise load online # ifexist: use the cache if the cache exist or load online # cacheonly: never go online, use all content from cache. If no cache entry exist, # consider content nevertheless as available and show result without snippet # false: no link verification and not snippet generation: # all search results are valid without verification search.verify = ifexist search.excludehosts= search.excludehosth= # in case that a link verification fails then the corresponding index reference can be # deleted to clean up the index. If this property is set then failed index verification in # the cases of nocache, iffresh and ifexist causes an index deletion search.verify.delete = true # remote search details remotesearch.maxcount = 10 remotesearch.maxtime = 3000 remotesearch.result.store=true # Maximum size allowed (in bytes) for a remote document result to be stored to local index. Defaults to -1, which means no limit. remotesearch.result.store.maxsize=-1 remotesearch.maxload.rwi=8.0 remotesearch.maxload.solr=4.0 # specifies if yacy should set it's own referer if no referer URL # was set by the client. useYacyReferer = false # specifies if the http post body should be transfered # using content-encoding gzip during index transfer # a) indexDistribution: which is done periodically if you have enabled # Index Distribution via IndexControl_p.html # b) indexTransfer: which can be used to transfer the whole index of a peer # this can be started via IndexTransfer_p.html # c) indexControl: which can be triggered manually via IndexControl_p.html to # transfer a chosen subset of the peer index indexDistribution.gzipBody = true indexTransfer.gzipBody = true indexControl.gzipBody = true # defining timeouts for index- transfer/distribution/control indexControl.timeout = 60000 indexDistribution.timeout = 60000 indexTransfer.timeout = 120000 # defining max. allowed amount of open files during index- transfer/distribution indexDistribution.maxOpenFiles = 800 indexTransfer.maxOpenFiles = 800 # sizes for index distribution indexDistribution.minChunkSize = 10 indexDistribution.maxChunkSize = 1000 indexDistribution.startChunkSize = 200 indexDistribution.maxChunkFails = 1 # limit of references per term & blob to the younges of this value # a value of <= 0 disables this feature (no limit) # a value of e.g. 100000 can improve stability and reduce load while searching very popular words index.maxReferences = 0 # Search sequence settings # collection: # time = time to get a RWI out of RAM cache, assortments and WORDS files # count = maximum number of RWI-entries that shall be collected # # join: # time = time to perform the join between all collected RWIs # count = maximum number of entries that shall be joined # # presort: # time = time to do a sort of the joined URL-records # count = maximum number of entries that shall be pre-sorted # # urlfetch: # time = time to fetch the real URLs from the LURL database # count = maximum number of urls that shall be fetched # # postsort: # time = time for final sort of URLs # count = maximum number oof URLs that shall be retrieved during sort # # filter: # time = time to filter out unwanted urls (like redundant urls) # count = maximum number of urls that shall be filtered # # snippetfetch: # time = time to fetch snippets for selected URLs # count = maximum number of snipptes to be fetched # # all values are percent # time-percent is the percent of total search time # count-percent is the percent of total wanted urls in result # we distinguish local and remote search times searchProcessLocalTime_c = 44 searchProcessLocalCount_c = 10000000 searchProcessLocalTime_j = 8 searchProcessLocalCount_j = 1000000 searchProcessLocalTime_r = 8 searchProcessLocalCount_r =100000 searchProcessLocalTime_u = 20 searchProcessLocalCount_u = 10000 searchProcessLocalTime_o = 10 searchProcessLocalCount_o = 100 searchProcessLocalTime_f = 5 searchProcessLocalCount_f = 100 searchProcessLocalTime_s = 5 searchProcessLocalCount_s = 30 searchProcessRemoteTime_c = 44 searchProcessRemoteCount_c = 1000000 searchProcessRemoteTime_j = 8 searchProcessRemoteCount_j = 1000000 searchProcessRemoteTime_r = 8 searchProcessRemoteCount_r = 1000 searchProcessRemoteTime_u = 20 searchProcessRemoteCount_u = 1000 searchProcessRemoteTime_o = 10 searchProcessRemoteCount_o = 1000 searchProcessRemoteTime_f = 5 searchProcessRemoteCount_f = 100 searchProcessRemoteTime_s = 5 searchProcessRemoteCount_s = 10 # timeouts for snippet fetching in ms # timeout_text is for text-snippets, timeout_media for media, e.g. images timeout_text = 10000 timeout_media = 15000 # a list of domain name patterns that should not be cached by the httpc dns cache httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu #externalRedirectors #squid Redirector compatible externalRedirector= # the Yacy Version this config was created with Version= # old version value (keep to allow conversion of .conf, until next main releas > 1.83) svnRevision=0 currentSkin=pdbootstrap # flag to show if pages shall be usable for non-admin users # this can be applied to the Surftips.html and yacysearch.html page publicSurftips = true publicSearchpage = true # flag to show if the top navigation bar shall be shown to all users # if this is disabled, then the user must navigate manually from the search page # to /Status.html to get the main memu bar back publicTopmenu = true # Wiki access rights # the built-in wiki system allows by default only that the administrator is allowed to make changes # this can be changed. There are three options: # admin - only the admin has write right # all - everybody has write right # user - the admin and every user registered in the user db has write right WikiAccess = admin # Search Profiles # we will support different search profiles # If this profile setting is empty, a hard-coded profile is used to initialise the values search.ranking.rwi.profile = # The boost fields contains all fields which shall be searched together with a boost. non-mentioned fields are not searched. # Boost queries are added to all queries; functions evaluate a value which is either added or multiplied with the ranking. # The field boostfunctionmode can be either 'add' or 'multiply' to describe the mode. # All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name # The bostfields setting is of special importance as these are the fields used to query for search terms search.ranking.solr.collection.boostname.tmpa.0=Default Profile search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0 search.ranking.solr.collection.filterquery.tmpa.0= search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 search.ranking.solr.collection.boostfunction.tmpb.0= search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1 search.ranking.solr.collection.filterquery.tmpa.1= search.ranking.solr.collection.boostquery.tmpa.1= search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1) search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a single domain only, i.e. if a site:-operator is used search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0,keywords^2.0,description_txt^1.5,author^1.0 search.ranking.solr.collection.filterquery.tmpa.2= search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0 search.ranking.solr.collection.boostfunction.tmpb.2= search.ranking.solr.collection.boostname.tmpa.3=_unused3 search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0 search.ranking.solr.collection.filterquery.tmpa.3= search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4 search.ranking.solr.collection.boostfunction.tmpb.3= # the following values are used to identify duplicate content search.ranking.solr.doubledetection.minlength=3 search.ranking.solr.doubledetection.quantrate=0.5f # Another attribute for double content is a 'greedy' ignoring of a http url is present for each https and vice versa # The same may be true for documents with leading 'www.' subdomain and without. # The following attributes will cause that https is preferred over http and with-www is preferred over without-www search.ranking.uniqueheuristic.preferhttps = false search.ranking.uniqueheuristic.preferwwwprefix = true #optional extern thumbnail program. #the program must accept the invocation PROGRAM http://url /path/to/filename thumbnailProgram = # settings for the peer's local robots.txt # the following restrictions are possible (comma-separated): # - all : entire domain is disallowed # - blog : the blog-pages # - bookmarks : the bookmark-page # - dirs : all directories in htroot (standard setting, as there is no usable information in) # - fileshare : all files in the peer's file share (DATA/HTDOCS/share) # - homepage : all files on the peer's home page (DATA/HTDOCS/www) # - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways) # - news : the news-page # - network : the network-pages # - status : peer's status page # - surftips : the surftips-page # - wiki : the wiki-page httpd.robots.txt = locked,dirs,bookmarks,network,news,status,profile # class to use for parsing wikicode wikiParser.class = de.anomic.data.wikiCode # settings for automatic deletion of old entries in passive and potential seed-db # time means max time (in days) a peer may not have been seen before it is deleted routing.deleteOldSeeds.permission = true routing.deleteOldSeeds.time = 30 # options to remember the default search engines when using the search compare features compare_yacy.left = YaCy compare_yacy.right = startpage.com # minimum free disk space for crawling (MiB) disk.free = 3000 # minimum for DHT disk.free.hardlimit = 1000 # ResourceObserver settings # We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot) # under/overshot states in the system are supposed to be regulated to match the steady-state value # autoregulation of resource states # ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached! # the autoregulate function starts workin if resources reach over/undershot values and the auto-regulation tries to regulate to the steadystate value resource.disk.free.autoregulate=true resource.disk.used.autoregulate=false # the target steady-state of minimum disk space left (MB) resource.disk.free.min.steadystate=4096 # the undershot below the steady-state of minimum disk free as absolute size (MB) resource.disk.free.min.undershot=2048 # the target steady-state of maximum disk space for YaCy (MB) resource.disk.used.max.steadystate=2097152 # the overshot above the steady-state of disk space for YaCy (absolute) (MB) resource.disk.used.max.overshot=4194304 # minimum memory to accept dht-in (MiB) memory.acceptDHTabove = 50 memory.disabledDHT = false # wether using standard memory strategy - or try generation memory strategy memory.standardStrategy = true # setting if execution of CGI files is allowed or not cgi.allow = false cgi.suffixes = cgi,pl # content integration settings content.phpbb3.urlstub = http:/// content.phpbb3.dbtype = mysql content.phpbb3.dbhost = localhost content.phpbb3.dbport = 3306 content.phpbb3.dbname = forum content.phpbb3.tableprefix = phpbb_ content.phpbb3.dbuser = notroot content.phpbb3.dbpw = joshua content.phpbb3.ppf = 1000 content.phpbb3.dumpfile = # search engine teaser: an about box in search results # this is only shown, if the about.body is filled about.headline=Please support YaCy! about.body=
If you run a YaCy server, feel free to replace our donation plea with your own support message, use the Portal Configuration servlet.
donation.iframesource=http://yacy.net/include/donate.html donation.iframetarget=env/donate.html # search heuristics heuristic.site = false heuristic.searchresults = false heuristic.searchresults.crawlglobal = false heuristic.opensearch = false # colours for generic design # white color_background = #FFFFFF # dark blue/grey color_text = #18294A # success/green color_legend = #5cb85c # brand/blue color_tableheader = #84B3DE # dark/light grey (for tables) color_tableitem = #dddddd color_tableitem2 = #eeeeee # light red color_tablebottom = #F2DEDE color_borderline = #888888 color_signbad = #990000 color_signgood = #009900 color_signother = #000099 # dark blue color_searchheadline = #2145ca # green / success/3*2 color_searchurl = #1c65ba color_searchurlhover = #1c65ba # federated index storage and federated search functionality # federated search means that other search engines may be used together with the built-in indexing. # each federated search may be able to be used as remote indexing service and/or as remote search service. # a typical use case for a federated search is a concurrent search from opensearch sources. # a typical use case for a remote indexing service is a remote solr index. YaCy supports remote solr indexes. # solr indexes can be filled if enabled is set to true # the remote index scheme is the same as produced by the SolrCell; see http://wiki.apache.org/solr/ExtractingRequestHandler # because this default scheme is used the default example scheme can be used as solr configuration # to use this, do the following: # - set federated.service.solr.indexing.enabled = true # - download solr from http://www.apache.org/dyn/closer.cgi/lucene/solr/ # - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar' # - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes. # - to check whats in solr after indexing, open http://localhost:8983/solr/admin/ federated.service.solr.indexing.enabled = false federated.service.solr.indexing.url = http://127.0.0.1:8983/solr federated.service.solr.indexing.sharding = MODULO_HOST_MD5 # the lazy attribute causes that fields containing "" or 0 are not added and not written federated.service.solr.indexing.lazy = true federated.service.solr.indexing.timeout = 60000 federated.service.solr.indexing.writeEnabled = true # temporary definition of backend services to use. # After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db. # To get a handle for a migration, these values are defined as temporary, if the migration starts the values are renamed # and defined with different default values. # The citation service is used for ranking; this is a reverse linking index. It should be on before and after the migration. # It can be switched off if only a remote solr index is used. core.service.fulltext = true core.service.rwi.tmp = true core.service.citation.tmp = true core.service.webgraph.tmp = false # Augmentation settings parserAugmentation = false parserAugmentation.RDFa = false # Content control settings contentcontrol.enabled = false contentcontrol.bookmarklist = contentcontrol contentcontrol.mandatoryfilterlist = yacy contentcontrol.smwimport.enabled = false contentcontrol.smwimport.baseurl = contentcontrol.smwimport.purgelistoninit = true contentcontrol.smwimport.targetlist = contentcontrol contentcontrol.smwimport.defaultcategory = yacy # host browser settings browser.autoload = false browser.load4everyone = false # greedy learning: fast information acquisition heuristic for new peers # to make greedy learning work, it must be enabled in the network definition # the user may switch it off at any time, but if the automatic learning limit is reached # then the active flag is set to false automatically and this will switch to that state # automatically by the cleanup process each time if the user switches it on again. # While the switch in on, it will cause that the user-submitted search will be done along # with some heuristics like: loading linked documents and adding a twitter search. # When the learning mode is finished, the user may switch on individual heuristics by himself. greedylearning.active = true # postprocessing steering postprocessing.maximum_load = 2.5 postprocessing.minimum_ram = 536870912 postprocessing.partialUpdate = true # Custom user agents for 'allip' networks: # This user agent is only available if the network is set to 'allip' (which is a non-limited domain 'network' # without p2p options). Changing this will NOT change the default YaCy user agent, it will only provide an # agent which is available at crawl start within 'allip'. The userAgent.name is the identifier for the # robots.txt file which YaCy always obeys for the given name or a wildcard for robot types. # If any part of this custom user agent name or string includes the phrase 'yacy', it will be IGNORED # to prevent fraud, DoS or bad behavior in the name of YaCy. # To use this user agent option, you must define completely different names and strings # and remove the given example here, which will be ignored by default. crawler.userAgent.name = yacybot crawler.userAgent.string = yacybot ($$SYSTEM$$) http://yacy.net/bot.html crawler.userAgent.minimumdelta = 500 crawler.userAgent.clienttimeout = 10000 # experiments with timeout requests timeoutrequests = true # interface decorations decoration.audio = false decoration.grafics.linkstructure = true decoration.hostanalysis = false decoration.simpleheadernavbar = navbar-default