- robots.txt is a servlet now - no need to rewrite the whole file each time a section is added or removed - user-defined disallows, added manually, won't be overwritten anymore - new config-setting: httpd.robots.txt, holding names of the disallowed sections git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3423 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
9623bf7bbe
commit
88245e44d8
@ -0,0 +1,71 @@
|
|||||||
|
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import de.anomic.http.httpHeader;
|
||||||
|
import de.anomic.http.httpdRobotsTxtConfig;
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.server.serverObjects;
|
||||||
|
import de.anomic.server.serverSwitch;
|
||||||
|
import de.anomic.server.servletProperties;
|
||||||
|
|
||||||
|
public class robots {
|
||||||
|
|
||||||
|
public static servletProperties respond(httpHeader header, serverObjects post, serverSwitch env) {
|
||||||
|
final servletProperties prop = new servletProperties();
|
||||||
|
final httpdRobotsTxtConfig rbc = ((plasmaSwitchboard)env).robotstxtConfig;
|
||||||
|
|
||||||
|
if (rbc.isAllDisallowed()) {
|
||||||
|
prop.put(httpdRobotsTxtConfig.ALL, 1);
|
||||||
|
} else {
|
||||||
|
if (rbc.isBlogDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BLOG, 1);
|
||||||
|
if (rbc.isBookmarksDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.BOOKMARKS, 1);
|
||||||
|
if (rbc.isFileshareDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.FILESHARE, 1);
|
||||||
|
if (rbc.isHomepageDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.HOMEPAGE, 1);
|
||||||
|
if (rbc.isNetworkDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NETWORK, 1);
|
||||||
|
if (rbc.isNewsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.NEWS, 1);
|
||||||
|
if (rbc.isStatusDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.STATUS, 1);
|
||||||
|
if (rbc.isSurftipsDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.SURFTIPS, 1);
|
||||||
|
if (rbc.isWikiDisallowed()) prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.WIKI, 1);
|
||||||
|
|
||||||
|
if (rbc.isLockedDisallowed() || rbc.isDirsDisallowed()) {
|
||||||
|
final ArrayList[] p = getFiles(env.getConfig(plasmaSwitchboard.HTROOT_PATH, plasmaSwitchboard.HTROOT_PATH_DEFAULT));
|
||||||
|
if (rbc.isLockedDisallowed()) {
|
||||||
|
prop.put(httpdRobotsTxtConfig.LOCKED, p[0].size());
|
||||||
|
for (int i=0; i<p[0].size(); i++)
|
||||||
|
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.LOCKED + "_" + i + "_page", p[0].get(i));
|
||||||
|
}
|
||||||
|
if (rbc.isDirsDisallowed()) {
|
||||||
|
prop.put(httpdRobotsTxtConfig.DIRS, p[1].size());
|
||||||
|
for (int i=0; i<p[1].size(); i++)
|
||||||
|
prop.put(httpdRobotsTxtConfig.ALL + "_" + httpdRobotsTxtConfig.DIRS + "_" + i + "_dir", p[1].get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return prop;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ArrayList[] getFiles(String htrootPath) {
|
||||||
|
final File htroot = new File(htrootPath);
|
||||||
|
if (!htroot.exists()) return null;
|
||||||
|
final ArrayList htrootFiles = new ArrayList();
|
||||||
|
final ArrayList htrootDirs = new ArrayList();
|
||||||
|
final String[] htroots = htroot.list();
|
||||||
|
File file;
|
||||||
|
for (int i=0, dot; i<htroots.length; i++) {
|
||||||
|
if (htroots[i].equals("www")) continue;
|
||||||
|
file = new File(htroot, htroots[i]);
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
htrootDirs.add("/" + file.getName());
|
||||||
|
} else if (
|
||||||
|
(dot = htroots[i].lastIndexOf('.')) < 2 ||
|
||||||
|
htroots[i].charAt(dot - 2) == '_' && htroots[i].charAt(dot - 1) == 'p'
|
||||||
|
) {
|
||||||
|
htrootFiles.add("/" + file.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ArrayList[] { htrootFiles, htrootDirs };
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,54 @@
|
|||||||
|
# robots.txt for #[clientname]#.yacy
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
|
||||||
|
#(all)#
|
||||||
|
|
||||||
|
#{dirs}#
|
||||||
|
# dirs
|
||||||
|
Disallow: /#[dir]##{/dirs}#
|
||||||
|
|
||||||
|
#{locked}#
|
||||||
|
# locked
|
||||||
|
Disallow: /#[page]##{/locked}#
|
||||||
|
|
||||||
|
#(wiki)#::
|
||||||
|
# wiki
|
||||||
|
Disallow: /Wiki.html#(/wiki)#
|
||||||
|
|
||||||
|
#(blog)#::
|
||||||
|
# blog
|
||||||
|
Disallow: /Blog.html
|
||||||
|
Disallow: /Blog.rss
|
||||||
|
Disallow: /Blog.xml#(/blog)#
|
||||||
|
|
||||||
|
#(news)#::
|
||||||
|
# news
|
||||||
|
Disallow: /News.html#(/news)#
|
||||||
|
|
||||||
|
#(status)#::
|
||||||
|
# status
|
||||||
|
Disallow: /Status.html#(/status)#
|
||||||
|
|
||||||
|
#(network)#::
|
||||||
|
# network
|
||||||
|
Disallow: /Network.html
|
||||||
|
Disallow: /Network.csv
|
||||||
|
Disallow: /Network.xml#(/network)#
|
||||||
|
|
||||||
|
#(homepage)#::
|
||||||
|
# homepage
|
||||||
|
Disallow: /www#(/homepage)#
|
||||||
|
|
||||||
|
#(fileshare)#::
|
||||||
|
# fileshare
|
||||||
|
Disallow: /share#(/fileshare)#
|
||||||
|
|
||||||
|
#(surftips)#::
|
||||||
|
# surftips
|
||||||
|
Disallow: /Surftips.html#(/surftips)#
|
||||||
|
|
||||||
|
::
|
||||||
|
# all
|
||||||
|
Disallow: /
|
||||||
|
#(/all)#
|
@ -0,0 +1,220 @@
|
|||||||
|
// httpdRobotsTxtConfig.java
|
||||||
|
// ---------
|
||||||
|
// part of YaCy
|
||||||
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
|
// first published on http://www.anomic.de
|
||||||
|
// Frankfurt, Germany, 2007
|
||||||
|
// Created 22.02.2007
|
||||||
|
//
|
||||||
|
// This file is contributed by Franz Brauße
|
||||||
|
//
|
||||||
|
// $LastChangedDate: $
|
||||||
|
// $LastChangedRevision: $
|
||||||
|
// $LastChangedBy: $
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
//
|
||||||
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||||
|
// running) means that you agree that the Author(s) is (are) not responsible
|
||||||
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||||
|
// by usage of this softare or this documentation. The usage of this software
|
||||||
|
// is on your own risk. The installation and usage (starting/running) of this
|
||||||
|
// software may allow other people or application to access your computer and
|
||||||
|
// any attached devices and is highly dependent on the configuration of the
|
||||||
|
// software which must be done by the user of the software; the author(s) is
|
||||||
|
// (are) also not responsible for proper configuration and usage of the
|
||||||
|
// software, even if provoked by documentation provided together with
|
||||||
|
// the software.
|
||||||
|
//
|
||||||
|
// Any changes to this file according to the GPL as documented in the file
|
||||||
|
// gpl.txt aside this file in the shipment you received can be done to the
|
||||||
|
// lines that follows this copyright notice here, but changes must not be
|
||||||
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
|
// the intact and unchanged copyright notice.
|
||||||
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
|
package de.anomic.http;
|
||||||
|
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.server.serverSwitch;
|
||||||
|
|
||||||
|
public final class httpdRobotsTxtConfig {
|
||||||
|
|
||||||
|
public static final String WIKI = "wiki";
|
||||||
|
public static final String BLOG = "blog";
|
||||||
|
public static final String BOOKMARKS = "bookmarks";
|
||||||
|
public static final String HOMEPAGE = "homepage";
|
||||||
|
public static final String FILESHARE = "fileshare";
|
||||||
|
public static final String SURFTIPS = "surftips";
|
||||||
|
public static final String NEWS = "news";
|
||||||
|
public static final String STATUS = "status";
|
||||||
|
public static final String LOCKED = "locked";
|
||||||
|
public static final String DIRS = "dirs";
|
||||||
|
public static final String NETWORK = "network";
|
||||||
|
public static final String ALL = "all";
|
||||||
|
|
||||||
|
private boolean allDisallowed = false;
|
||||||
|
private boolean lockedDisallowed = true;
|
||||||
|
private boolean dirsDisallowed = true;
|
||||||
|
private boolean wikiDisallowed = false;
|
||||||
|
private boolean blogDisallowed = false;
|
||||||
|
private boolean fileshareDisallowed = false;
|
||||||
|
private boolean homepageDisallowed = false;
|
||||||
|
private boolean newsDisallowed = false;
|
||||||
|
private boolean statusDisallowed = false;
|
||||||
|
private boolean networkDisallowed = false;
|
||||||
|
private boolean surftipsDisallowed = false;
|
||||||
|
private boolean bookmarksDisallowed = false;
|
||||||
|
|
||||||
|
public httpdRobotsTxtConfig() { }
|
||||||
|
|
||||||
|
public httpdRobotsTxtConfig(String[] active) {
|
||||||
|
if (active == null) return;
|
||||||
|
for (int i=0; i<active.length; i++) {
|
||||||
|
if (active[i] == null) continue;
|
||||||
|
if (active[i].equals(BLOG)) { this.blogDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(WIKI)) { this.wikiDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(BOOKMARKS)) { this.bookmarksDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(HOMEPAGE)) { this.homepageDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(FILESHARE)) { this.fileshareDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(SURFTIPS)) { this.surftipsDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(NEWS)) { this.newsDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(STATUS)) { this.statusDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(NETWORK)) { this.networkDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(LOCKED)) { this.lockedDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(DIRS)) { this.dirsDisallowed = true; continue; }
|
||||||
|
if (active[i].equals(ALL)) { this.allDisallowed = true; continue; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static httpdRobotsTxtConfig init(serverSwitch env) {
|
||||||
|
String cfg = env.getConfig(plasmaSwitchboard.ROBOTS_TXT, plasmaSwitchboard.ROBOTS_TXT_DEFAULT);
|
||||||
|
if (cfg == null) return new httpdRobotsTxtConfig();
|
||||||
|
return new httpdRobotsTxtConfig(cfg.split(","));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if (this.allDisallowed) return ALL;
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
if (this.blogDisallowed) sb.append(BLOG).append(",");
|
||||||
|
if (this.bookmarksDisallowed) sb.append(BOOKMARKS).append(",");
|
||||||
|
if (this.dirsDisallowed) sb.append(DIRS).append(",");
|
||||||
|
if (this.fileshareDisallowed) sb.append(FILESHARE).append(",");
|
||||||
|
if (this.homepageDisallowed) sb.append(HOMEPAGE).append(",");
|
||||||
|
if (this.lockedDisallowed) sb.append(LOCKED).append(",");
|
||||||
|
if (this.networkDisallowed) sb.append(NETWORK).append(",");
|
||||||
|
if (this.newsDisallowed) sb.append(NEWS).append(",");
|
||||||
|
if (this.statusDisallowed) sb.append(STATUS).append(",");
|
||||||
|
if (this.surftipsDisallowed) sb.append(SURFTIPS).append(",");
|
||||||
|
if (this.wikiDisallowed) sb.append(WIKI).append(",");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isAllDisallowed() {
|
||||||
|
return allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAllDisallowed(boolean allDisallowed) {
|
||||||
|
this.allDisallowed = allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLockedDisallowed() {
|
||||||
|
return lockedDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLockedDisallowed(boolean lockedDisallowed) {
|
||||||
|
this.lockedDisallowed = lockedDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDirsDisallowed() {
|
||||||
|
return dirsDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDirsDisallowed(boolean dirsDisallowed) {
|
||||||
|
this.dirsDisallowed = dirsDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isBlogDisallowed() {
|
||||||
|
return blogDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBlogDisallowed(boolean blogDisallowed) {
|
||||||
|
this.blogDisallowed = blogDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isBookmarksDisallowed() {
|
||||||
|
return bookmarksDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBookmarksDisallowed(boolean bookmarksDisallowed) {
|
||||||
|
this.bookmarksDisallowed = bookmarksDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFileshareDisallowed() {
|
||||||
|
return fileshareDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFileshareDisallowed(boolean fileshareDisallowed) {
|
||||||
|
this.fileshareDisallowed = fileshareDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHomepageDisallowed() {
|
||||||
|
return homepageDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHomepageDisallowed(boolean homepageDisallowed) {
|
||||||
|
this.homepageDisallowed = homepageDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isNetworkDisallowed() {
|
||||||
|
return networkDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNetworkDisallowed(boolean networkDisallowed) {
|
||||||
|
this.networkDisallowed = networkDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isNewsDisallowed() {
|
||||||
|
return newsDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNewsDisallowed(boolean newsDisallowed) {
|
||||||
|
this.newsDisallowed = newsDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isStatusDisallowed() {
|
||||||
|
return statusDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStatusDisallowed(boolean statusDisallowed) {
|
||||||
|
this.statusDisallowed = statusDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSurftipsDisallowed() {
|
||||||
|
return surftipsDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSurftipsDisallowed(boolean surftipsDisallowed) {
|
||||||
|
this.surftipsDisallowed = surftipsDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isWikiDisallowed() {
|
||||||
|
return wikiDisallowed || this.allDisallowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWikiDisallowed(boolean wikiDisallowed) {
|
||||||
|
this.wikiDisallowed = wikiDisallowed;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue