yacy_search_server/htroot/ConfigHeuristics_p.java

// ConfigHeuristics_p.java
// --------------------
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 26.06.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2012-12-19 $
// $LastChangedRevision: $
// $LastChangedBy: reger $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


import java.io.File;

import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.Configuration;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.robots.RobotsTxtEntry;
import net.yacy.data.WorkTables;
import net.yacy.search.Switchboard;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.Locale;

import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.FederateSearchManager;

import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.storage.Files;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.schema.WebgraphSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;

public class ConfigHeuristics_p {

    public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {

        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();

        String osderrmsg = "";
        if (post != null) {

            // store this call as api call
            sb.tables.recordAPICall(post, "ConfigHeuristics.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "heuristic settings");

            if (post.containsKey("site_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SITE, true);
            if (post.containsKey("site_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SITE, false);
            if (post.containsKey("searchresult_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, true);
            if (post.containsKey("searchresult_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false);
            if (post.containsKey("searchresultglobal_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, true);
            if (post.containsKey("searchresultglobal_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false);
            if (post.containsKey("opensearch_on")) {
                sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, true);
                // re-read config
                FederateSearchManager.getManager().init(sb.getDataPath().getAbsolutePath()+ "DATA/SETTINGS/heuristicopensearch.conf");
                if (FederateSearchManager.getManager().getSize() == 0) {
                    osderrmsg = "no active search targets are configured";
                }
            }
            if (post.containsKey("opensearch_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, false);
            if (post.containsKey("discoverosd")) {
                final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name())
                        && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()));
                if (metafieldavailable) {
                    //OpenSearchConnector osc = new OpenSearchConnector(sb, false);
                    if (FederateSearchManager.getManager().discoverFromSolrIndex(sb)) {
                        osderrmsg = "started background search for target systems, refresh page after some minutes";
                    } else {
                        osderrmsg = "Error: webgraph Solr index not enabled";
                    }
                } else {                    
                    osderrmsg = "Solr webgraph index needs to be available and fields target_rel_s, target_protocol_s, target_urlstub_s on";
                }
            }

            final String tmpurl = post.get("ossys_newurl");
            // if user entered new opensearch url but hit the wrong button, simulate "add" button
            if (tmpurl != null && !tmpurl.isEmpty()) post.put("addnewosd", 1);

            if (post.containsKey("addnewosd")) {
                // add new entry to config file
                final String tmpname = post.get("ossys_newtitle");
                if (tmpname != null && tmpurl !=null) {
                    if (!tmpname.isEmpty() && !tmpurl.isEmpty() && tmpurl.toLowerCase(Locale.ROOT).contains("{searchterms}")) {
                     	/* Check eventual robots.txt policy */
                      	RobotsTxtEntry robotsEntry = null;
						try {
							MultiProtocolURL templateURL = new MultiProtocolURL(tmpurl);

							if (sb.robots != null) {
								robotsEntry = sb.robots.getEntry(templateURL,
										ClientIdentification.yacyInternetCrawlerAgent);
							}

							if (robotsEntry != null && robotsEntry.isDisallowed(templateURL)) {
								osderrmsg = "URL template is disallowed by the host robots.xt";
							} else {
								final String tmpcomment = post.get("ossys_newcomment");
								FederateSearchManager.getManager().addOpenSearchTarget(tmpname, tmpurl, false,
										tmpcomment);
							}
						} catch (final MalformedURLException ex) {
							osderrmsg = "URL template is malformed.";
						}
                    } else {
                    	osderrmsg = "Url template must contain '{searchTerms}'";
                    }
                }
            }

            if (post.containsKey("setopensearch")) {
                // read index schema table flags
                writeopensearchcfg (sb,post);
             }

            if (post.containsKey("switchsolrfieldson")) {              
                final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name()) 
                    && ( sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()) ) ;
                
                if (!metafieldavailable) {
                    SchemaConfiguration.Entry entry;
                    entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_rel_s.name());
                    if (entry != null && !entry.enabled()) {
                        entry.setEnable(true);
                    }
                    entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_protocol_s.name());
                    if (entry != null && !entry.enabled()) {
                        entry.setEnable(true);
                    }
                    entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_urlstub_s.name());
                    if (entry != null && !entry.enabled()) {
                        entry.setEnable(true);
                    }
                    try {
                        sb.index.fulltext().getWebgraphConfiguration().commit();
                    } catch (final IOException e) {
                    }
                    sb.setConfig(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, true); 
                }
            }

            // copy default opensearch heuristic config with sample entries
            if (post.containsKey("copydefaultosdconfig") || post.containsKey("resettodefaultosdlist")) {
                // prepare a solr index profile switch list
                final File osdDefaultConfig = new File(sb.getAppPath(), "defaults/heuristicopensearch.conf");
                final File osdConfig = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
                if ((post.containsKey("resettodefaultosdlist") || !osdConfig.exists()) && osdDefaultConfig.exists()) {
                    try {
                        Files.copy(osdDefaultConfig, osdConfig);
                        File defdir = new File(sb.dataPath, "DATA/SETTINGS/federatecfg");
                        if (!defdir.exists()) {
                            Files.copy(new File(sb.appPath, "defaults/federatecfg"), defdir);
                        }
                    } catch (final IOException ex) {
                        osderrmsg = "file I/O error during copy";
                    }
                } else {osderrmsg = "config file exists or default doesn't exist";}
            }
        }

        final boolean showmetafieldbutton = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name())
                && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()))
                && sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false);
        if (!showmetafieldbutton) prop.put("osdsolrfieldswitch",1);
        prop.put("site.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ? 1 : 0);
        prop.put("searchresult.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false) ? 1 : 0);
        prop.put("searchresultglobal.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false) ? 1 : 0);
        prop.put("opensearch.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) ? 1 : 0);

        // display config file content
        final File f = new File (sb.getDataPath(),"DATA/SETTINGS/heuristicopensearch.conf");

        try {
            Configuration p = new Configuration(f);
            int c = 0;
            boolean dark = false;
            Iterator<Configuration.Entry> i = p.entryIterator();
            while (i.hasNext()) {
                SchemaConfiguration.Entry e = i.next();
                prop.put("osdcfg_" + c + "_dark", dark ? 1 : 0);
                dark = !dark;
                prop.put("osdcfg_" + c + "_checked", e.enabled() ? 1 : 0);
                prop.putHTML("osdcfg_" + c + "_title", e.key());
                prop.putHTML("osdcfg_" + c + "_comment", e.getComment() != null ? e.getComment() : "");

                String tmps = e.getValue();
                prop.putHTML("osdcfg_" + c + "_url", tmps);
                final int lastpos = tmps.lastIndexOf("/");
                if (lastpos > 6) { // after http://x or ftp://x
                    tmps = tmps.substring(0, lastpos);
                    prop.putHTML("osdcfg_" + c + "_urlhostlink", tmps);
                } else prop.putHTML("osdcfg_" + c + "_urlhostlink", "#");

                c++;
            }
            prop.put("osdcfg", c);
        } catch (final IOException e1) {
            ConcurrentLog.warn("OpenSearchConnector", "file not found " + f.getAbsolutePath());
            prop.put("osdcfg", 0);
        }
        prop.putHTML("osderrmsg",osderrmsg);
        return prop;
    }

    private static void writeopensearchcfg(final Switchboard sb, final serverObjects post) {
        // read index schema table flags

        final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
        try {
            Configuration cfg = new Configuration(f);
            final Iterator<Configuration.Entry> cfgentries = cfg.entryIterator();
            Configuration.Entry entry;
            boolean modified = false; // flag to remember changes
            while (cfgentries.hasNext()) {
                entry = cfgentries.next();
                final String sfn = post.get("ossys_url_" + entry.key());
                if (sfn != null) {
                    if (!sfn.equals(entry.getValue())) {
                        entry.setValue(sfn);
                        modified = true;
                    }
                }
                // set enable flag
                String v = post.get("ossys_" + entry.key());
                boolean c = v != null && v.equals("checked");
                if (entry.enabled() != c) {
                    entry.setEnable(c);
                    modified = true;
                }
                // delete entry from config
                v = post.get("ossys_del_" + entry.key());
                c = v != null && v.equals("checked");
                if (c) {
                    cfgentries.remove();
                    modified = true;
                }
            }
            if (modified) { // save settings to config file if modified
                try {
                    cfg.commit();
                } catch (final IOException ex) {
                }
            }
        } catch (final IOException e) {
            ConcurrentLog.warn("OpenSearchConnector", "file not found " + f.getAbsolutePath());
        }
        
        // re-read config (and create/update work table)
        if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, true)) {
            FederateSearchManager.getManager().init(f.getAbsolutePath());
        }
    }
}