### ### YaCy Init File ### # These properties will be loaded upon installation. # They are used only once for set-up. # If you make changes to this file and want these to make any effect, # you must delete the httpProxy.conf file in DATA/SETTINGS # ---------------------------------------------------------------------------- # the HTTP service configurations # port number where the server should bind to # e.g. 8080 # #eth0:8080 # 192.168.0.1:8080 port = 8080 #sometimes you may want yacy to bind to another port, than the one reachable from outside. #then set bindPort to the port yacy should bind on, and port to the port, visible from outside #to run yacy on port 8080, reachable from port 80, set bindPort=8080, port=80 and use #iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8080 #(of course you need to customize the ips) bindPort = # SSL support: # # For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS # # English speaking user read below: # # With this you can access your peer using https://localhost:8080 # # There are two possibilities to specify which certificate should # be used by YaCy. # # 1) Create a new certificate: # # *) For testing purposes, you can create a keystore with a self-signed certificate, # using the following command: # C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert # # *) Then configure the keyStoreXXXX properties accordingly, e.g. # keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore # keyStorePassword = mypwd # # 2) Import an existing certificate: # # Alternatively you can import an existing certificate in pkcs12 format into # the keystore. # # This can be done by setting the pkcs12XXX properties accordingly, e.g. # pkcs12ImportFile = c:/temp/keystore.pkcs12 # pkcs12ImportPwd = test # # If the property keyStore is not specified, then a new keystore file # DATA/SETTINGS/myPeerKeystore will be created. keyStore = keyStorePassword = pkcs12ImportFile = pkcs12ImportPwd = # server tracking: maximum time a track entry is hold in the internal cache # value is in milliseconds, default is one hour server.maxTrackingTime = 3600000 # Network Definition # There can be separate YaCy networks, and managed sub-groups of the general network. # The essentials of the network definition are attached in separate property files. # The property here can also be a url where the definition can be loaded. # In case of privately managed networks, this configuration must be changed BEFORE it is released # to the members of the separated network peers. network.unit.definition = yacy.network.unit network.group.definition = yacy.network.group # Update process properties # The update server location is given in the network.unit.definition, # but the settings for update processing and cycles are individual. # the update process can be either 'manual' (no automatic lookup for new versions), # 'guided' (automatic lookup, but user is asked before update is performed', # or 'auto' (whenever an update is available, the update is loaded and installed) update.process = manual # the cycle value applies only if the process is automatic or guided. The value means hours. # There is currently a fixed minimum number of hours of 24 hours for updates update.cycle = 168 # a version number blacklist can restrict automatic or guided updates to a specific # range of version numbers. The restriction is done with a blacklist (standard regexpr) # It is recommended to set this list to low developer version numbers update.blacklist = ....[123] # a update can also restricted with a concept property, which can decide if an # update is only valid if it either is a main release or any svn release including new development releases # Valid keywords are 'main' and 'any' update.concept = any # the following values are set automatically: # the lookup time when the last time a lookup to the network update server(s) where done update.time.lookup = 0 # the download time when the last time a release was downloaded update.time.download = 0 # the deploy time when the last update was done; milliseconds since epoch update.time.deploy = 0 # clusters within a network: # every network can have an unlimited number of clusters. Clusters may be also completely # sealed and have no connection to other peers. When a cluster does not use the # p2p protocol and the bootstraping mechanism to contact other peers, we call them # Robinson peers. They can appear in different 'visibilities': # - privatepeer: no connection and no data exchange to any other peer # - privatecluster: connections only to self-defined addresses (other peers in same mode) # - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes # - publicpeer: a single peer without cluster connection, but visible for p2p nodes # all public robinson peers should use a peer tag string to be searcheable if in the # search request these tags appear cluster.mode=publicpeer cluster.peers.yacydomain=localpeer.yacy cluster.peers.ipport=localhost:8080 # bootstrapLoadTimeout # this is the time-out for loading of the seedlist files during bootstraping # the time should not be too long, since loading of the seedlist is not parallelized # and a not successful loading of a seed file may prevent a peer from becoming # a (at least) junior status. If the time-out is too short, there is the danger # that the peer stays in virgin mode bootstrapLoadTimeout = 6000 # time-out of client control socket in milliseconds # since this applies only to the client-proxy connection, # it can be rather short # milliseconds clientTimeout = 10000 # maximal number of httpd sessions # a client may open several connections at one, and the httpdMaxActiveSessions value sets # a limit on the number of concurrent connections httpdMaxActiveSessions = 150 httpdMaxIdleSessions = 75 httpdMinIdleSessions = 5 # default root path for the file server # may be overridden by the htdocs parameter # users shall be encouraged to use the htdocs path for individual content, # not this path defined here htRootPath = htroot htTemplatePath = htroot/env/templates # the htroot path # root path for the httpd file server htDefaultPath=htroot # individual htroot folder # every user may publicise her/his own web pages # these pages shall be placed in the path defined here # the htdocs path shares its content with the htroot path htDocsPath = DATA/HTDOCS # the default files (typically index.html), if no file name is given # The complete path to this file is created by combination with the rootPath # you can set a list of defaults, separated by comma # the first one is priorized defaultFiles = ConfigBasic.html,index.html,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt # locale-options: YaCy supports localization. # Web pages for special languages are located in the htLocalePath # The htLocaleLang defines a list of language options as / # the must exist as sub-path to htLocalePath # the htLocaleSelection selects from the given locales, value=one-of- locale.source=locales locale.work=DATA/LOCALE/locales locale.translated_html=DATA/LOCALE/htroot locale.lang=default/English,de/Deutsch,fr/Français,nl/Nederlands,it/Italiano,es/Español,pt/Portugês,fi/Suomi,se/Svenska,dk/Dansk,gr/Eλληvικα,sk/Slovensky locale.language=default # virtual host for httpdFileServlet access # for example http:/// shall access the file servlet and # return the defaultFile at rootPath # either way, http:/// denotes the same as http://localhost:/ # for the preconfigured value 'localpeer', the URL is: # http://localpeer/ fileHost = localpeer # specify the path to the MIME matching file table mimeConfig = httpd.mime # a path to the file cache, used for the internal proxy and as crawl buffer # This will be used if the server is addressed as a proxy proxyCache = DATA/HTCACHE # the maximum disc cache size for files in proxyCache in megabytes proxyCacheSize = 100 proxyCacheSize__pro = 1024 # storage place for new releases releases = DATA/RELEASE # use the mostly direct mapping of URLs to Filenames # makes it easy watching the content of the cache using file browsers # problems arise when a file already exists where a new entry expects a directory # or vice versa. # when set to false, the file names are set to the hash of the URL and the # directory is build from protokoll, hostname and port, as with the old # layout. # the advantage of this scheme is that no directory/file collisions can # occurr. # switching this flag will take effect after a restart of yacy. # files that are present under the previously used layout will be renamed # to the new location and thus be accessible immediately. so an accumulated # cache is still usable after the switch. # possible values are {tree, hash} proxyCacheLayout = hash # the migration flag shows, if the different layout shall be migrated from one to another proxyCacheMigration = true # the following mime-types are the whitelist for indexing # # parseableRealtimeMimeTypes: specifies mime-types that can be indexed on the fly # parseableMime: specifies mime-types that can be indexed but not on the fly parseableRealtimeMimeTypes=application/xhtml+xml,text/html,text/plain,text/sgml parseableMimeTypes= parseableMimeTypes__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.CRAWLER= parseableMimeTypes.CRAWLER__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.PROXY= parseableMimeTypes.PROXY__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.ICAP= parseableMimeTypes.ICAP__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.URLREDIRECTOR= parseableMimeTypes.URLREDIRECTOR__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml # media extension string # a comma-separated list of extensions that denote media file formats # this is important to recognize - tags as not-html reference # These files will be excluded from indexing _(Please keep extensions in alphabetical order)_ mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bmp,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp # Promotion Strings # These strings appear in the Web Mask of the YACY search client # Set these Strings to cusomize your peer and give any message to # other peer users promoteSearchPageGreeting = # the path to the PLASMA database of the web spider dbPath=DATA/PLASMADB # the path to the public reverse word index for text files (web pages) # the primary path is relative to the data root, the secondary path is an absolute path # when the secondary path should be equal to the primary, it must be declared empty indexPrimaryPath=DATA/INDEX indexSecondaryPath= # the path to the LISTS files. Most lists are used to filter web content listsPath=DATA/LISTS # the path to the SKINS files. skinPath=DATA/SKINS # the yellow-list; URL's elements # (the core of an URL; like 'yahoo' in 'de.yahoo.com') # appearing in this list will not get a manipulated user agent string proxyYellowList=yacy.yellow # the black-list; URLs appearing in this list will not be loaded; # instead always a 404 is returned # all these files will be placed in the listsPath BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern BlackLists.Shared=url.default.black BlackLists.DefaultList=url.default.black #these are not needed as default. they just keep the values from beeing deleted ... proxy.BlackLists=url.default.black crawler.BlackLists=url.default.black dht.BlackLists=url.default.black search.BlackLists=url.default.black surftips.BlackLists=url.default.black news.BlackLists=url.default.black proxyCookieBlackList=cookie.default.black proxyCookieWhiteList=cookie.default.black # the blue-list; # no search result is locally presented that has any word of the bluelist # in the search words, the URL or the URL's description plasmaBlueList=yacy.blue # this proxy may in turn again access another proxy # if you wish to do that, specify it here # if you want to switch on the proxy use, set remoteProxyUse=true # remoteProxyNoProxy is a no-proxy pattern list for the remote proxy remoteProxyUse=false remoteProxyUse4Yacy=true remoteProxyUse4SSL=true remoteProxyHost=192.168.2.2 remoteProxyPort=4239 remoteProxyUser= remoteProxyPwd= remoteProxyNoProxy=192.*,10.*,127.*,localhost # the proxy may filter the content of transferred web pages # the bluelist removes specific keywords from web pages proxyBlueList=yacy.blue # security settigns # we provide proxy and server security through a 2-stage security gate: # 1st stage: firewall-like access control trough ip filter for clients # 2nd stage: password settings for proxy, server and server administrators # by default, these settings are weak to simplify set-up and testing # every user/adiministrator shall be encouraged to change these settings # your can change them also online during run-time on # http://localhost:8080/ # proxyClient: client-ip's that may connect the proxy for proxy service # if several ip's are allowed then they must be separated by a ',' # any ip may contain the wildcard-sign '*' #proxyClient=192.168.0.4 proxyClient=localhost,127.0.0.1,192.168.*,10.* # YaCyHop: allow public usage of proxy for yacy-protocol # this enables usage of the internal http proxy for everyone, # if the file path starts with /yacy/ # This is used to enable anonymization of yacy protocol requests # Instead of asking a remote peer directly, a peer in between is asked # to prevent that the asked peer knows which peer asks. YaCyHop=true # serverClient: client-ip's that may connect to the web server, # thus are allowed to use the search service # if you set this to another value, search requst from others # are blocked, but you will also be blocked from using others # search services. serverClient=* # use_proxyAccounts: set to true to restrict proxy-access to some identified users. #use User_p.html to create some Users. use_proxyAccounts=false # serverAccount: a user:password - pair for web server access # this is the access to the 'public' pages on the server # should be always open, but you get the option here # if set to a user:password, you get a conflict with the administration account # future versions will check if the server is unprotected, # because the p2p-index-sharing function will use the http server for # data exchange. # example #serverAccount=admin:mysecretpassword serverAccount= serverAccountBase64MD5= # adminAccount: a user:password - pair for administration of # settings through the web interface # should be set to a secret. By default it is without a password # but you are encouraged to set it to another value on the page # http://localhost:8080/ #adminAccount=admin:mysecretpassword adminAccount= adminAccountBase64MD5= # if you are running a principal peer, you must update the following variables # The upload method that should be used to upload the seed-list file to # a public accessible webserver where it can be loaded by other peers. # # You can set the seedUploadMethod-Property to # - None # - Ftp # - File # - Scp (only if you have installed the optional addon) # seedUploadMethod=none # The URL to the seed list file seedURL= # This is the most common method to upload the seed-list # # This is an ftp account with all relevant information. # The update is only made if there had been changes in between. seedFTPServer= seedFTPAccount= seedFTPPassword= seedFTPPath= # alternatively to an FTP account, a peer can also become a principal peer # if the seed-list can be generated as a file and that file is also accessible from # the internet. In this case, omit any ftp settings and set this path here. # if this path stays empty, an ftp account is considered # however, you must always set a seedURL because it is used to check if the # file is actually accessible from the internet seedFilePath= # Settings needed to upload the seed-list file via scp # # Please note that this upload method can only be used if you have installed # this optional upload method. seedScpServer= seedScpServerPort= seedScpAccount= seedScpPassword= seedScpPath= # every peer should have a name. inded, we try to give every peer an unique ID, # which is necessary for internal organization of the index sharing, but the # peer's name is purely informal. No function but information is applied. # please change this at your pleasure peerName=anomic # every peer periodically scans for other peers. you can set the time # of the period here (minutes) peerCycle=2 # The p2p maintenance can run in either of two online modes: # - don't process jobs and only access available in cache -> mode 0 # - process any job only if we are online, which is technically only the case # if the proxy is used -> mode 1 # - process jobs periodically, with periodes according to peerCycle -> mode 2 onlineMode=2 # Debug mode for YACY network: this will trigger that also local ip's are # accepted as peer addresses yacyDebugMode=false #staticIP if you have a static IP, you can use this setting staticIP= # each time the proxy starts up, it can trigger the local browser to show the # status page. This is active by default, to make it easier for first-time # users to understand what this application does. You can disable browser # pop-up here or set a different start page, like the search page # the browser type is optional and works only under certain conditions #browserPopUpTrigger=false browserPopUpTrigger=true #browserPopUpPage=index.html browserPopUpPage=Status.html browserPopUpApplication=netscape # the proxy saves it's own seed information. It is positive for the network if # the seed does not change it's configuration often (or not at all). # The reason for that is that the seed hash is the target for the # distributed hash table distribution function. # The following file will contain the saved seed: yacyOwnSeedFile=DATA/YACYDB/mySeed.txt yacyDB=DATA/YACYDB # index sharing attributes: by default, sharing is on. # If you want to use YaCy only for local indexing (robinson mode), # you may switch this off allowDistributeIndex=true allowDistributeIndexWhileCrawling=false allowDistributeIndexWhileIndexing=true allowReceiveIndex=true allowUnlimitedReceiveIndexFrom= indexReceiveBlockBlacklist=true # the frequency is the number of links per minute, that the peer allowes # _every_ other peer to send to this peer defaultWordReceiveFrequency=100 defaultLinkReceiveFrequency=30 # the default may be overridden for each peer individually, these # settings are only available through the online interface # prefetch parameters # the prefetch depth assigns a specific depth to the prefetch mechanism # prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all # embedded URLs, but since embedded image links are loaded by the browser # this means that only embedded anchors are prefetched additionally # a prefetch of 2 would result in loading of all images and anchor pages # of all embedded anchors. Be careful with this value, since even a prefetch # of 2 would result in hundreds of prefetched URLs for each single proxy fill. proxyPrefetchDepth=0 proxyStoreHTCache=true proxyIndexingRemote=false proxyIndexingLocalText=true proxyIndexingLocalMedia=true # From the 'IndexCreate' menu point you can also define a crawling start point. # The crawling works the same way as the prefetch, but it is possible to # assign a different crawling depth. # Be careful with this number. Consider a branching factor of average 20; # A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. crawlingDepth=3 crawlingIfOlder=525600 crawlingDomFilterDepth=-1 crawlingDomMaxPages=-1 indexText=true indexMedia=true # Filter for crawlinig; may be used to restrict a crawl to a specific domain # URLs are only indexed and further crawled if they match this filter crawlingFilter=.* crawlingQ=false storeHTCache=false storeTXCache=true # peers may initiate remote crawling tasks. # every peer may allow or disallow to be used as crawling-peer; # you can also set a maximum crawl depth that can be requested or accepted # order=parameters for requester; response=parameters for responder # these values apply only for senior-senior - communication # The delay value is number of seconds bewteen two separate orders crawlOrder=true crawlOrderDepth=0 crawlOrderDelay=8 crawlResponse=true crawlResponseDepth=0 # indexing-exclusion - rules # There rules are important to reduce the number of words that are indexed # We distinguish three different sets of stop-words: # static - excludes all words given in the file yacy.stopwords from indexing, # dynamic - excludes all words from indexing which are listed by statistic rules, # parental - excludes all words from indexing which had been indexed in the parent web page. xsstopw=true xdstopw=true xpstopw=true # Topwords filtering # If set to true, all stopwords (stopwords.yacy) are filtered from the topwords # Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version filterOutStopwordsFromTopwords=true # performance-settings # delay-times for permanent loops (milliseconds) # the idlesleep is the pause that an proces sleeps if the last call to the # process job was without execution of anything; # the busysleep is the pause after a full job execution # the prereq-value is a memory pre-requisite: that much bytes must # be available/free in the heap; othervise the loop is not executed # and another idlesleep is performed 20_dhtdistribution_idlesleep=30000 20_dhtdistribution_busysleep=10000 20_dhtdistribution_memprereq=6291456 30_peerping_idlesleep=120000 30_peerping_busysleep=120000 30_peerping_memprereq=1048576 40_peerseedcycle_idlesleep=1800000 40_peerseedcycle_busysleep=1200000 40_peerseedcycle_memprereq=2097152 50_localcrawl_idlesleep=2000 50_localcrawl_busysleep=250 50_localcrawl_busysleep__pro=100 50_localcrawl_memprereq=4194304 50_localcrawl_isPaused=false 61_globalcrawltrigger_idlesleep=10000 61_globalcrawltrigger_busysleep=500 61_globalcrawltrigger_memprereq=2097152 61_globalcrawltrigger_isPaused=false 62_remotetriggeredcrawl_idlesleep=10000 62_remotetriggeredcrawl_busysleep=1000 62_remotetriggeredcrawl_memprereq=6291456 62_remotetriggeredcrawl_isPaused=false 70_cachemanager_idlesleep=1000 70_cachemanager_busysleep=0 70_cachemanager_memprereq=1048576 80_indexing_idlesleep=1000 80_indexing_busysleep=200 80_indexing_busysleep__pro=10 80_indexing_memprereq=6291456 82_crawlstack_idlesleep=5000 82_crawlstack_busysleep=50 82_crawlstack_busysleep__pro=10 82_crawlstack_memprereq=1048576 90_cleanup_idlesleep=300000 90_cleanup_busysleep=300000 90_cleanup_memprereq=0 # cleanup-process: # properties for tasks that are performed during cleanup cleanup.deletionProcessedNews = true cleanup.deletionPublishedNews = true # multiprocessor-settings # you may want to run time-consuming processes on several processors # the most time-consuming process is the indexing-Process # We implemented an option to run several of these processes here # setting the number of processes to Zero is not allowed # If you have a double-processor system, # a cluster value of '2' would be appropriate 80_indexing_cluster=1 # ram cache init timeout for database files # ram cache for collection index ramCacheRWI_time = 30000 # ram cache for responseHeader.db ramCacheHTTP_time = 1000 # ram cache for urlHash.db ramCacheLURL_time = 10000 # ram cache for stack crawl thread db ramCachePreNURL_time = 3000 # ram cache for urlNotice.db ramCacheNURL_time = 2000 # ram cache for urlErr.db ramCacheEURL_time = 1000 # ram cache for seedDBs ramCacheDHT_time = 1000 # ram cache for message.db ramCacheMessage_time = 500 # ram cache for wiki.db ramCacheWiki_time = 500 # ram cache for blog.db ramCacheBlog_time = 500 # ram cache for news1.db ramCacheNews_time = 1000 # ram cache for robotsTxt.db ramCacheRobots_time = 0 # ram cache for crawlProfile.db ramCacheProfiles_time= 500 # default memory settings for startup of yacy # is valid in unix/shell and windows environments but # not for first startup of YaCy # -Xmx and -Xms maximum/init Java heap size # both values should be equal, # othervise the YaCy-internal memory supervision does not work javastart_Xmx=Xmx96m javastart_Xms=Xms96m javastart_Xmx__pro=Xmx512m javastart_Xms__pro=Xms512m # priority of the yacy-process # is valid in unix/shell and windows environments but # not for first startup of YaCy # UNIX: corresponds to the nice-level # WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low javastart_priority=0 # performance properties for the word index cache # wordCacheMaxLow/High is the number of word indexes that shall be held in the # ram cache during indexing. When YaCy is shut down, this cache must be # flushed to disc; this may last some minutes. wordCacheMaxCount = 20000 wordCacheInitCount = 30000 wordFlushSize = 500 wordCacheMaxCount__pro = 60000 wordCacheInitCount__pro = 80000 wordFlushSize__pro = 1000 # Specifies if yacy can be used as transparent http proxy. # # Please note that you also have to reconfigure your firewall # before you can use yacy as transparent proxy. On linux this # can be done like this: # iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \ # --dport 80 -j DNAT --to 192.168.0.1:8080 # # With this iptables filter listed above all http traffic that # comes from your private network (in this case 192.168.0.0) # and goes to any webserver listening on port 80 will be forwarded # by the firewall to yacy running on port 8080 (192.168.0.1:8080) isTransparentProxy=false # Specifies if yacy should use the http connection keep-alive feature connectionKeepAliveSupport=true # Specifies the timeout the proxy sould use proxy.clientTimeout = 30000 # Specifies if the proxy should send the via header according to RFC proxy.sendViaHeader=true # Specifies if the proxy should send the X-Forwarded-For header proxy.sendXForwardedForHeader=true # Configuration options needed to configure server port forwarding portForwarding.Enabled=false portForwarding.Type=none # port forwarding via sch portForwarding.sch=de.anomic.server.portForwarding.sch.serverPortForwardingSch portForwarding.sch.UseProxy=false portForwarding.sch.Port= portForwarding.sch.Host= portForwarding.sch.HostPort=22 portForwarding.sch.HostUser= portForwarding.sch.HostPwd= # port forwarding via upnp portForwarding.upnp=de.anomic.server.portForwarding.upnp.serverPortForwardingUpnp # msgForwarding: Specifies if yacy should forward received messages via # email to the configured email address msgForwardingEnabled=false msgForwardingCmd=/usr/sbin/sendmail msgForwardingTo=root@localhost #onlineCautionDelay: delay time after proxy usage before crawling is resumed onlineCautionDelay=10000 # Some configuration values for the crawler crawler.clientTimeout=9000 # http crawler specific settings; size in bytes crawler.http.acceptEncoding=gzip crawler.http.acceptLanguage=en-us,en;q=0.5 crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 crawler.http.maxFileSize=262144 crawler.http.maxFileSize__pro=1048576 # ftp crawler specific settings; size in bytes crawler.ftp.maxFileSize=262144 crawler.ftp.maxFileSize__pro=1048576 # maximum number of crawler threads crawler.MaxActiveThreads = 30 crawler.MaxIdleThreads = 5 # maximum number of crawl-stacker threads stacker.MaxActiveThreads = 50 stacker.MaxIdleThreads = 10 stacker.MinIdleThreads = 5 # maximum size of indexing queue indexer.slots = 40 indexer.slots__pro = 80 # specifies if yacy should set it's own referer if no referer URL # was set by the client. useYacyReferer = true useYacyReferer__pro = false # allow only 443(https-port) for https-proxy? # if you want to tunnel other protocols, set to false secureHttps = true # specifies if the httpdFileHandler should cache # the template-files from the htroot directory enableTemplateCache = true # specifies if the http post body should be transfered # using content-encoding gzip during index transfer # a) indexDistribution: which is done periodically if you have enabled # Index Distribution via IndexControl_p.html # b) indexTransfer: which can be used to transfer the whole index of a peer # this can be started via IndexTransfer_p.html # c) indexControl: which can be triggered manually via IndexControl_p.html to # transfer a chosen subset of the peer index indexDistribution.gzipBody = true indexTransfer.gzipBody = true indexControl.gzipBody = true # defining timeouts for index- transfer/distribution/control indexControl.timeout = 60000 indexDistribution.timeout = 60000 indexTransfer.timeout = 120000 # defining max. allowed amount of open files during index- transfer/distribution indexDistribution.maxOpenFiles = 800 indexTransfer.maxOpenFiles = 800 # sizes for index distribution indexDistribution.minChunkSize = 10 indexDistribution.maxChunkSize = 1000 indexDistribution.startChunkSize = 200 indexDistribution.maxChunkFails = 1 # defines if the peer should reject incoming index transfer # request if a given limit is reached indexDistribution.transferRWIReceiptLimitEnabled = true # defines the word cache for DHT reaches this level # the peer reports itself as busy indexDistribution.dhtReceiptLimit = 1000 # Distribution of Citation-Reference (CR-) files # The distribution is done in two steps: # first step to anonymize the records # second step to forward to collecting peer # to anonymize the data even against the intermediate peer # a specific precentage is also sent again to other peers. # for key-numbers please see de.anomic.plasma.plasmaRankingDistribution CRDistOn = true CRDist0Path = GLOBAL/010_owncr CRDist0Method = 1 CRDist0Percent = 0 CRDist0Target = CRDist1Path = GLOBAL/014_othercr CRDist1Method = 9 CRDist1Percent = 30 CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000,suma-lab.de:8080 # Hash of the peer, you would like to store to the data your installation collected. storagePeerHash = # Search sequence settings # collection: # time = time to get a RWI out of RAM cache, assortments and WORDS files # count = maximum number of RWI-entries that shall be collected # # join: # time = time to perform the join between all collected RWIs # count = maximum number of entries that shall be joined # # presort: # time = time to do a sort of the joined URL-records # count = maximum number of entries that shall be pre-sorted # # urlfetch: # time = time to fetch the real URLs from the LURL database # count = maximum number of urls that shall be fetched # # postsort: # time = time for final sort of URLs # count = maximum number oof URLs that shall be retrieved during sort # # filter: # time = time to filter out unwanted urls (like redundant urls) # count = maximum number of urls that shall be filtered # # snippetfetch: # time = time to fetch snippets for selected URLs # count = maximum number of snipptes to be fetched # # all values are percent # time-percent is the percent of total search time # count-percent is the percent of total wanted urls in result # we distinguish local and remote search times searchProcessLocalTime_c = 44 searchProcessLocalCount_c = 10000000 searchProcessLocalTime_j = 8 searchProcessLocalCount_j = 1000000 searchProcessLocalTime_r = 8 searchProcessLocalCount_r =100000 searchProcessLocalTime_u = 20 searchProcessLocalCount_u = 10000 searchProcessLocalTime_o = 10 searchProcessLocalCount_o = 100 searchProcessLocalTime_f = 5 searchProcessLocalCount_f = 100 searchProcessLocalTime_s = 5 searchProcessLocalCount_s = 30 searchProcessRemoteTime_c = 44 searchProcessRemoteCount_c = 1000000 searchProcessRemoteTime_j = 8 searchProcessRemoteCount_j = 1000000 searchProcessRemoteTime_r = 8 searchProcessRemoteCount_r = 1000 searchProcessRemoteTime_u = 20 searchProcessRemoteCount_u = 1000 searchProcessRemoteTime_o = 10 searchProcessRemoteCount_o = 1000 searchProcessRemoteTime_f = 5 searchProcessRemoteCount_f = 100 searchProcessRemoteTime_s = 5 searchProcessRemoteCount_s = 10 # timeouts for snippet fetching in ms # timeout_text is for text-snippets, timeout_media for media, e.g. images timeout_text = 10000 timeout_media = 15000 # path to ranking directory containing ranking reference files rankingPath = DATA/RANKING # a list of domain name patterns that should not be cached by the httpc dns cache httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu #externalRedirectors #squid Redirector compatible externalRedirector= svnRevision=0 currentSkin= # temporary flag for new database structure. set only true for testing # ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION # table-types: RAM = 0, TREE = 1, FLEX = 2; tableTypeForPreNURL=0 # flag to show if pages shall be usable for non-admin users # this can be applied to the Surftips.html and yacysearch.html page publicSurftips = true publicSearchpage = true # a Java Properties file containig a list of SOAP services that should deployed # additionally to the default services. E.g. # soap.serviceDeploymentList = DATA/SETTINGS/myServices.properties # # One entry in the property file should must have the following format: # servicename=fullClassname # # e.g. # test=org.myservices.test # # Servicename is the name that shoud be used to access the service, e.g. # if the service name is "test" then the service can be reached using # http://localhost:8080/soap/test # # The WSDL document that belongs to the deployed service could be reached # using # http://localhost:8080/soap/test?wsdl # soap.serviceDeploymentList = # Wiki access rights # the built-in wiki system allows by default only that the administrator is allowed to make changes # this can be changed. There are three options: # admin - only the admin has write right # all - everybody has write right # user - the admin and every user registered in the user db has write right WikiAccess = admin # Search Profiles # we will support different search profiles # this is currently only a single default profile # If this profile setting is empty, a hard-coded profile from plasmaSearchRanking is used rankingProfile = #optional extern thumbnail program. #the program must accept the invocation PROGRAM http://url /path/to/filename thumbnailProgram = # settings for the peer's local robots.txt # the following restrictions are possible (comma-separated): # - all : entire domain is disallowed # - blog : the blog-pages # - bookmarks : the bookmark-page # - dirs : all directories in htroot (standard setting, as there is no usable information in) # - fileshare : all files in the peer's file share (DATA/HTDOCS/share) # - homepage : all files on the peer's home page (DATA/HTDOCS/www) # - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways) # - news : the news-page # - network : the network-pages # - status : peer's status page # - surftips : the surftips-page # - wiki : the wiki-page httpd.robots.txt = locked,dirs # class to use for parsing wikicode wikiParser.class = de.anomic.data.wikiCode # settings for automatic deletion of old entries in passive and potential seed-db # time means max time (in days) a peer may not have been seen before it is deleted routing.deleteOldSeeds.permission = true routing.deleteOldSeeds.permission__pro = false routing.deleteOldSeeds.time = 7 routing.deleteOldSeeds.time__pro = 30