// ConfigHeuristics_p.java // -------------------- // (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 26.06.2010 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate: 2012-12-19 $ // $LastChangedRevision: $ // $LastChangedBy: reger $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import java.io.File; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.storage.Configuration; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.data.WorkTables; import net.yacy.search.Switchboard; import java.io.IOException; import java.net.MalformedURLException; import java.util.Iterator; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.FederateSearchManager; import net.yacy.cora.federate.solr.SchemaConfiguration; import net.yacy.cora.storage.Files; import net.yacy.search.SwitchboardConstants; import net.yacy.search.schema.WebgraphSchema; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; public class ConfigHeuristics_p { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); String osderrmsg = ""; if (post != null) { // store this call as api call sb.tables.recordAPICall(post, "ConfigHeuristics.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "heuristic settings"); if (post.containsKey("site_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SITE, true); if (post.containsKey("site_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SITE, false); if (post.containsKey("searchresult_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, true); if (post.containsKey("searchresult_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false); if (post.containsKey("searchresultglobal_on")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, true); if (post.containsKey("searchresultglobal_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false); if (post.containsKey("opensearch_on")) { sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, true); // re-read config FederateSearchManager.getManager().init(sb.getDataPath().getAbsolutePath()+ "DATA/SETTINGS/heuristicopensearch.conf"); if (FederateSearchManager.getManager().getSize() == 0) { osderrmsg = "no active search targets are configured"; } } if (post.containsKey("opensearch_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, false); if (post.containsKey("discoverosd")) { final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name()) && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name())); if (metafieldavailable) { //OpenSearchConnector osc = new OpenSearchConnector(sb, false); if (FederateSearchManager.getManager().discoverFromSolrIndex(sb)) { osderrmsg = "started background search for target systems, refresh page after some minutes"; } else { osderrmsg = "Error: webgraph Solr index not enabled"; } } else { osderrmsg = "Solr webgraph index needs to be available and fields target_rel_s, target_protocol_s, target_urlstub_s on"; } } final String tmpurl = post.get("ossys_newurl"); // if user entered new opensearch url but hit the wrong button, simulate "add" button if (tmpurl != null && !tmpurl.isEmpty()) post.put("addnewosd", 1); if (post.containsKey("addnewosd")) { // add new entry to config file final String tmpname = post.get("ossys_newtitle"); if (tmpname != null && tmpurl !=null) { if (!tmpname.isEmpty() && !tmpurl.isEmpty() && tmpurl.toLowerCase().contains("{searchterms}")) { /* Check eventual robots.txt policy */ RobotsTxtEntry robotsEntry = null; try { MultiProtocolURL templateURL = new MultiProtocolURL(tmpurl); if (sb.robots != null) { robotsEntry = sb.robots.getEntry(templateURL, ClientIdentification.yacyInternetCrawlerAgent); } if (robotsEntry != null && robotsEntry.isDisallowed(templateURL)) { osderrmsg = "URL template is disallowed by the host robots.xt"; } else { final String tmpcomment = post.get("ossys_newcomment"); FederateSearchManager.getManager().addOpenSearchTarget(tmpname, tmpurl, false, tmpcomment); } } catch (final MalformedURLException ex) { osderrmsg = "URL template is malformed."; } } else { osderrmsg = "Url template must contain '{searchTerms}'"; } } } if (post.containsKey("setopensearch")) { // read index schema table flags writeopensearchcfg (sb,post); } if (post.containsKey("switchsolrfieldson")) { final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name()) && ( sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()) ) ; if (!metafieldavailable) { SchemaConfiguration.Entry entry; entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_rel_s.name()); if (entry != null && !entry.enabled()) { entry.setEnable(true); } entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_protocol_s.name()); if (entry != null && !entry.enabled()) { entry.setEnable(true); } entry = sb.index.fulltext().getWebgraphConfiguration().get(WebgraphSchema.target_urlstub_s.name()); if (entry != null && !entry.enabled()) { entry.setEnable(true); } try { sb.index.fulltext().getWebgraphConfiguration().commit(); } catch (final IOException e) { } sb.setConfig(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, true); } } // copy default opensearch heuristic config with sample entries if (post.containsKey("copydefaultosdconfig") || post.containsKey("resettodefaultosdlist")) { // prepare a solr index profile switch list final File osdDefaultConfig = new File(sb.getAppPath(), "defaults/heuristicopensearch.conf"); final File osdConfig = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); if ((post.containsKey("resettodefaultosdlist") || !osdConfig.exists()) && osdDefaultConfig.exists()) { try { Files.copy(osdDefaultConfig, osdConfig); File defdir = new File(sb.dataPath, "DATA/SETTINGS/federatecfg"); if (!defdir.exists()) { Files.copy(new File(sb.appPath, "defaults/federatecfg"), defdir); } } catch (final IOException ex) { osderrmsg = "file I/O error during copy"; } } else {osderrmsg = "config file exists or default doesn't exist";} } } final boolean showmetafieldbutton = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name()) && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name())) && sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false); if (!showmetafieldbutton) prop.put("osdsolrfieldswitch",1); prop.put("site.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ? 1 : 0); prop.put("searchresult.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS, false) ? 1 : 0); prop.put("searchresultglobal.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false) ? 1 : 0); prop.put("opensearch.checked", sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) ? 1 : 0); // display config file content final File f = new File (sb.getDataPath(),"DATA/SETTINGS/heuristicopensearch.conf"); try { Configuration p = new Configuration(f); int c = 0; boolean dark = false; Iterator i = p.entryIterator(); while (i.hasNext()) { SchemaConfiguration.Entry e = i.next(); prop.put("osdcfg_" + c + "_dark", dark ? 1 : 0); dark = !dark; prop.put("osdcfg_" + c + "_checked", e.enabled() ? 1 : 0); prop.putHTML("osdcfg_" + c + "_title", e.key()); prop.putHTML("osdcfg_" + c + "_comment", e.getComment() != null ? e.getComment() : ""); String tmps = e.getValue(); prop.putHTML("osdcfg_" + c + "_url", tmps); final int lastpos = tmps.lastIndexOf("/"); if (lastpos > 6) { // after http://x or ftp://x tmps = tmps.substring(0, lastpos); prop.putHTML("osdcfg_" + c + "_urlhostlink", tmps); } else prop.putHTML("osdcfg_" + c + "_urlhostlink", "#"); c++; } prop.put("osdcfg", c); } catch (final IOException e1) { ConcurrentLog.warn("OpenSearchConnector", "file not found " + f.getAbsolutePath()); prop.put("osdcfg", 0); } prop.putHTML("osderrmsg",osderrmsg); return prop; } private static void writeopensearchcfg(final Switchboard sb, final serverObjects post) { // read index schema table flags final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); try { Configuration cfg = new Configuration(f); final Iterator cfgentries = cfg.entryIterator(); Configuration.Entry entry; boolean modified = false; // flag to remember changes while (cfgentries.hasNext()) { entry = cfgentries.next(); final String sfn = post.get("ossys_url_" + entry.key()); if (sfn != null) { if (!sfn.equals(entry.getValue())) { entry.setValue(sfn); modified = true; } } // set enable flag String v = post.get("ossys_" + entry.key()); boolean c = v != null && v.equals("checked"); if (entry.enabled() != c) { entry.setEnable(c); modified = true; } // delete entry from config v = post.get("ossys_del_" + entry.key()); c = v != null && v.equals("checked"); if (c) { cfgentries.remove(); modified = true; } } if (modified) { // save settings to config file if modified try { cfg.commit(); } catch (final IOException ex) { } } } catch (final IOException e) { ConcurrentLog.warn("OpenSearchConnector", "file not found " + f.getAbsolutePath()); } // re-read config (and create/update work table) if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, true)) { FederateSearchManager.getManager().init(f.getAbsolutePath()); } } }