refactor opensearch heuristic

introduce FederateSearchManager handling search heuristic to external systems via specific FederateSearchConnectors, which provide the query() functionallity, the translation to YaCy schema .toYaCySchema() and the search() routine to deliver results to searchevents, which is generally implemented in Abstract connector. The manager enforces now a min 15s delay between calls to external systems. Besides the OpensearchConnector a SolrFederateSearchConnector is available. It uses a additional config file for fieldname translation. default heuristicopensearch.conf: - openbdb.com removed - seems not longer to deliver results - config via solrconnector to datacite.org added (large technical library archive)
10 years ago · 24f68a4eb7
parent 3b51636ecb
commit 24f68a4eb7
12 changed files with 971 additions and 238 deletions
--- a/defaults/federatecfg/datacite.solr.schema
+++ b/defaults/federatecfg/datacite.solr.schema
@ -0,0 +1,32 @@
 ## API datacite.org
 ## This service is also available as an API. We use Solr Search Handler for our API calls, the endpoint is: http://search.datacite.org/api.
 ## Please check Solr's common query parameters documentation in order to understand how to use API.
 ## Examples
 ##    http://search.datacite.org/api?q=wind simple search for wind
 ##    http://search.datacite.org/api?q=wind&fl=doi,title&rows=5 search for wind, retrieve only doi and title, and return (at max.) 5 results
 ##    http://search.datacite.org/api?q=wind&fl=doi,title&wt=csv csv output
 ##    http://search.datacite.org/api?q=wind&fl=doi,title&wt=json&indent=true json output
 ## YaCy solrconnector specific settings
 ## the basic url to acces the system
 _baseurl = http://search.datacite.org/
 ## Solr core, is appended to the _baseurl
 _corename = api
 ## some systems store a identifier instead of a url for the resource, the prefix is prepended the identifier in _skufieldname
 _skuprefix = http://dx.doi.org/
 ## the field name of the url of resource (in yacy/solr = sku)
 _skufieldname = doi
 ## field mappings
 ## YaCyFieldname = remoteFieldname
 keywords = subject
 author = creator
 publisher_t = publisher
 title = title
 description_txt = description
 language_s = language
 text_t = description
 size_i = size
 coordinate_p = geoLocationPoint
--- a/defaults/heuristicopensearch.conf
+++ b/defaults/heuristicopensearch.conf
@ -14,8 +14,15 @@
 #Blekko = http://blekko.com/ws/{searchTerms}+/rss  # get 20 results from blekko
 #Faroo-News = http://www.faroo.com/api?q={searchTerms}&start={startIndex}&length=20&l=en&src=news&f=rss  # get results from Faroo news-search
 #openBDB = http://www.openbdb.com/b/{searchTerms}.xml  # Open Book Database
 #WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?}  #Search WordPress.com Blogs
 #Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv
 #Los Angeles Times = http://framework.latimes.com/?s={searchTerms}&feed=rss2
 #Archive-It = http://archive-it.org/seam/resource/opensearch?q={searchTerms}&n=20 # archiving cultural heritage on the web 
 ## In addition to OpenSearch systems other connectors are available to query foreign systems
 ## the syntax is
 ##      SystemName = cfgfile:_connectortype_:_schemaconfig_
 ##      where    cfgfile:        is a fix prefix (to signal this is not a opensearch url)
 ##               _connectortype_ is the type of connector to use ( available is solrconnector )
 ##               _schemaconfig_  is  the config file with filed name mappings (the file has to exist in DATA/SETTINGS/federatecfg
 #datacite.org = cfgfile:solrconnector:datacite.solr.schema  # International Consortium for data citation
--- a/htroot/ConfigHeuristics_p.java
+++ b/htroot/ConfigHeuristics_p.java
@ -25,7 +25,6 @@
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 import com.google.common.io.Files;
 import java.io.File;
@ -37,9 +36,10 @@ import net.yacy.search.Switchboard;
 import java.io.IOException;
 import java.util.Iterator;
 import net.yacy.cora.federate.FederateSearchManager;
 import net.yacy.cora.federate.opensearch.OpenSearchConnector;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.storage.Files;
 import net.yacy.search.SwitchboardConstants;
 import net.yacy.search.schema.WebgraphSchema;
 import net.yacy.server.serverObjects;
@ -66,9 +66,9 @@ public class ConfigHeuristics_p {
            if (post.containsKey("searchresultglobal_off")) sb.setConfig(SwitchboardConstants.HEURISTIC_SEARCHRESULTS_CRAWLGLOBAL, false);
            if (post.containsKey("opensearch_on")) {
                sb.setConfig(SwitchboardConstants.HEURISTIC_OPENSEARCH, true);
-                // re-read config (and create work table)
+                // re-read config
-                OpenSearchConnector os = new OpenSearchConnector(sb, true);
+                FederateSearchManager.getManager().init(sb.getDataPath().getAbsolutePath()+ "DATA/SETTINGS/heuristicopensearch.conf");
-                if (os.getSize() == 0) {
+                if (FederateSearchManager.getManager().getSize() == 0) {
                    osderrmsg = "no active search targets are configured";
                }
            }
@ -77,8 +77,8 @@ public class ConfigHeuristics_p {
                final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name())
                        && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()));
                if (metafieldavailable) {
-                    OpenSearchConnector osc = new OpenSearchConnector(sb, false);
+                    //OpenSearchConnector osc = new OpenSearchConnector(sb, false);
-                    if (osc.discoverFromSolrIndex(sb)) {
+                    if (FederateSearchManager.getManager().discoverFromSolrIndex(sb)) {
                        osderrmsg = "started background search for target systems, refresh page after some minutes";
                    } else {
                        osderrmsg = "Error: webgraph Solr index not enabled";
@ -98,8 +98,7 @@ public class ConfigHeuristics_p {
                if (tmpname != null && tmpurl !=null) {
                    if (!tmpname.isEmpty() && !tmpurl.isEmpty() && tmpurl.toLowerCase().contains("{searchterms}")) {
                        final String tmpcomment = post.get("ossys_newcomment");
-                        OpenSearchConnector osc = new OpenSearchConnector(sb,false);
+                        FederateSearchManager.getManager().addOpenSearchTarget(tmpname,tmpurl,false,tmpcomment);
                        osc.add (tmpname,tmpurl,false,tmpcomment);
                    } else osderrmsg = "Url template must contain '{searchTerms}'";
                    }
                }
@ -143,6 +142,10 @@ public class ConfigHeuristics_p {
                if ((post.containsKey("resettodefaultosdlist") || !osdConfig.exists()) && osdDefaultConfig.exists()) {
                    try {
                        Files.copy(osdDefaultConfig, osdConfig);
                        File defdir = new File(sb.dataPath, "DATA/SETTINGS/federatecfg");
                        if (!defdir.exists()) {
                            Files.copy(new File(sb.appPath, "defaults/federatecfg"), defdir);
                        }
                    } catch (final IOException ex) {
                        osderrmsg = "file I/O error during copy";
                    }
@ -240,7 +243,7 @@ public class ConfigHeuristics_p {
        // re-read config (and create/update work table)
        if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, true)) {
-            new OpenSearchConnector(sb, true);
+            FederateSearchManager.getManager().init(f.getAbsolutePath());
        }
    }
 }
--- a/htroot/ConfigNetwork_p.java
+++ b/htroot/ConfigNetwork_p.java
@ -127,8 +127,8 @@ public class ConfigNetwork_p
                    sb.peers.mySeed().setPeerTags(MapTools.string2set(normalizedList(post.get("peertags")), ","));
                }
-                sb.setConfig("cluster.mode", post.get(SwitchboardConstants.CLUSTER_MODE, SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER));
+                sb.setConfig(SwitchboardConstants.CLUSTER_MODE, post.get(SwitchboardConstants.CLUSTER_MODE, SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER));
-                sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", "")));
+                sb.setConfig(SwitchboardConstants.CLUSTER_PEERS_IPPORT, checkIPPortList(post.get(SwitchboardConstants.CLUSTER_PEERS_IPPORT, "")));
                sb.setConfig(
                    "cluster.peers.yacydomain",
                    checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -45,7 +45,7 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.feed.RSSMessage;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
-import net.yacy.cora.federate.opensearch.OpenSearchConnector;
+import net.yacy.cora.federate.FederateSearchManager;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.geo.GeoLocation;
 import net.yacy.cora.lod.vocabulary.Tagging;
@ -719,10 +719,10 @@ public class yacysearch {
                    sb.heuristicSite(theSearch, modifier.sitehost);
                }
                if ( heuristicBlekko >= 0  && authenticated && !stealthmode ) {
-                    OpenSearchConnector.query(sb, theSearch);
+                    FederateSearchManager.getManager().search(theSearch);
                }
                if (sb.getConfigBool(SwitchboardConstants.HEURISTIC_OPENSEARCH, false) && authenticated && !stealthmode) {
-                    OpenSearchConnector.query(sb, theSearch);
+                    FederateSearchManager.getManager().search(theSearch);
                }
            }
--- a/source/net/yacy/cora/federate/AbstractFederateSearchConnector.java
+++ b/source/net/yacy/cora/federate/AbstractFederateSearchConnector.java
@ -0,0 +1,197 @@
 /**
 * AbstractFederateSearchConnector.java
 * Copyright 2015 by Burkhard Buelte
 * First released 19.01.2015 at http://yacy.net
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program in the file lgpl21.txt If not, see
 * <http://www.gnu.org/licenses/>.
 */
 package net.yacy.cora.federate;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import javax.servlet.http.HttpServletResponse;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.federate.solr.SchemaDeclaration;
 import net.yacy.cora.federate.solr.SolrType;
 import net.yacy.cora.sorting.ReversibleScoreMap;
 import net.yacy.cora.storage.Configuration;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.search.Switchboard;
 import net.yacy.search.query.SearchEvent;
 import net.yacy.search.schema.CollectionSchema;
 import org.apache.solr.common.SolrDocument;
 /**
 * Base implementation class for Federated Search Connectors providing the basic
 * funcitonality to search none YaCy systems
 * <ul>
 * <li> init() to read config file
 * <li> toYaCySchema() to convert remote schema fields to YaCy internal schema
 * names, called by query()
 * <li> query() needs to be implemented in specific connectors
 * <li> search() call's query() in a thread and adds results to internal search request.
 * </ul>
 * Subclasses should/need to override query() and maybe toYaCySchema() if more
 * is needed as a basic field mapping
 */
 abstract public class AbstractFederateSearchConnector implements FederateSearchConnector {
    public String instancename; // just a identifying name
    protected SchemaConfiguration localcfg; // the schema conversion cfg for each fieldname, yacyname = remote fieldname
    public long lastaccesstime = -1; // last time accessed, used for search delay calculation
    protected String baseurl;
    /**
     * Inits the connector with the remote field names and matches to yacy
     * schema and other specific settings from config file. Every connector
     * needs at least a query target (where to query) and some definition to
     * convert the remote serch result to the internal result presentation
     * (field mapping)
     *
     * @param instanceName internal name
     * @param cfgFileName e.g. DATA/SETTINGS/FEDERATECFG/instanceName.SCHEMA
     * @return true if success false if not
     */
    @Override
    public boolean init(String instance, String cfgFileName) {
        this.instancename = instance;
        File instanceCfgFile = new File(cfgFileName);
        if (instanceCfgFile.exists()) {
            try {
                this.localcfg = new SchemaConfiguration(instanceCfgFile);
            } catch (IOException ex) {
                ConcurrentLog.config(this.instancename, "error reading schema " + cfgFileName);
                return false;
            }
            // mandatory to contain a mapping for "sku" or alternatively "cfg_skufieldname" for a conversion to a final url
            if (this.localcfg.contains(CollectionSchema.sku) || this.localcfg.contains("_skufieldname")) {
                return true;
            } else {
                ConcurrentLog.config(this.instancename, "mandatory mapping for sku or _skufieldname missing in " + cfgFileName);
                return false;
            }
        } else {
            this.localcfg = null;
            return false;
        }
    }
    /**
     * queries a remote system and adds the results to the searchevent and to
     * the crawler if addResultsToLocalIndex is true
     *
     * @param theSearch receiving the results
     */
    @Override
    public void search(final SearchEvent theSearch) {
        final Thread job = new Thread() {
            @Override
            public void run() {
                Thread.currentThread().setName("heuristic:" + instancename);
                theSearch.oneFeederStarted();
                List<URIMetadataNode> doclist = query(theSearch.getQuery());
                if (doclist != null) {
                    Map<String, LinkedHashSet<String>> snippets = new HashMap<String, LinkedHashSet<String>>(); // add nodes doesn't allow null
                    Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(); // add nodes doesn't allow null
                    theSearch.addNodes(doclist, facets, snippets, false, instancename, doclist.size());
                    for (URIMetadataNode doc : doclist) {
                        theSearch.addHeuristic(doc.hash(), instancename, false);
                    }
                }
                // that's all we need to display serach result
                theSearch.oneFeederTerminated();
                // optional: add to crawler to get the full resource (later)
                if (doclist != null && !doclist.isEmpty() && theSearch.addResultsToLocalIndex) {
                    Collection<DigestURL> urls = new ArrayList<DigestURL>();
                    for (URIMetadataNode doc : doclist) {
                        urls.add(doc.url());
                    }
                    Switchboard.getSwitchboard().addToCrawler(urls, false);
                }
            }
        };
        job.start();
    }
    /**
     * Converts a remote schema result to YaCy schema using the fieldname
     * mapping provided as config file
     *
     * @param remote result (with remote fieldnames)
     * @return SolrDocument with field names according to the YaCy schema
     */
    protected URIMetadataNode toYaCySchema(final SolrDocument doc) {
        // set YaCy id
        String urlstr;
        if (localcfg.contains("sku"))  {
            urlstr = (String) doc.getFieldValue(localcfg.get("sku").getValue());
        } else {
            urlstr = (String) doc.getFieldValue(localcfg.get("_skufieldname").getValue());
            if (this.localcfg.contains("_skuprefix")) {
                String skuprefix = this.localcfg.get("_skuprefix").getValue();
                urlstr = skuprefix + urlstr;
            }
        }
        URIMetadataNode newdoc = new URIMetadataNode(urlstr);
        Iterator<Configuration.Entry> it = localcfg.entryIterator();
        while (it.hasNext()) {
            Configuration.Entry et = it.next();
            String yacyfieldname = et.key(); // config defines    yacyfieldname = remotefieldname
            String remotefieldname = et.getValue();
            if (remotefieldname != null && !remotefieldname.isEmpty()) {
                if (Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration().contains(yacyfieldname)) { // check if in local config
                    SchemaDeclaration est = CollectionSchema.valueOf(yacyfieldname);
                    if (est.isMultiValued()) {
                        if (doc.getFieldValues(remotefieldname) != null) {
                            newdoc.addField(yacyfieldname, doc.getFieldValues(remotefieldname)); //
                        }
                    } else {
                        if (doc.getFieldValue(remotefieldname) != null) {
                            Object val = doc.getFirstValue(remotefieldname);
                            // watch out for type conversion
                            try {
                                if (est.getType() == SolrType.num_integer && val instanceof String) {
                                    newdoc.setField(yacyfieldname, Integer.parseInt((String) val));
                                } else {
                                    newdoc.setField(yacyfieldname, val);
                                }
                            } catch (Exception ex) {
                                continue; // catch possible parse or type mismatch, skip the field
                            }
                        }
                    }
                }
            }
        }
        newdoc.addField(CollectionSchema.httpstatus_i.name(), HttpServletResponse.SC_OK); // yacy required
        return newdoc;
    }
 }
--- a/source/net/yacy/cora/federate/FederateSearchConnector.java
+++ b/source/net/yacy/cora/federate/FederateSearchConnector.java
@ -0,0 +1,62 @@
 /**
 * FederateSearchConnector.java
 * Copyright 2015 by Burkhard Buelte
 * First released 19.01.2015 at http://yacy.net
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program in the file lgpl21.txt If not, see
 * <http://www.gnu.org/licenses/>.
 */
 package net.yacy.cora.federate;
 import java.util.List;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.search.query.QueryParams;
 import net.yacy.search.query.SearchEvent;
 /**
 * Interface for a query connector to search and gather query results from none
 * YaCy systems (for the YaCy heuristic options)
 */
 public interface FederateSearchConnector {
    /**
     * Load the configuration for this connector every connector needs at least
     * a query target (where to query) and some definition to convert the remote
     * serch result to the internal result presentation (field mapping)
     *
     * @param instanceName is also the name of the config file DATA/SETTINGS/instanceName.schema
     * @param cfg config parameter
     * @return true if success  false if not
     */      
    abstract boolean init(String instanceName, String cfg);
    /**
     * Queries a remote system and adds the result metadata to the search events
     * result list. If SearchEvent.addResultsToLocalIndex (=default) result urls
     * are added to the crawler.
     * @param theSearch
     */
    abstract void search(SearchEvent theSearch);
    /**
     * Queries a remote system and returns the search result with field names
     * according to YaCy schema.
     *
     * @param query
     * @return result (metadata) in YaCy schema format
     */
    abstract List<URIMetadataNode> query(QueryParams query);
 }
--- a/source/net/yacy/cora/federate/FederateSearchManager.java
+++ b/source/net/yacy/cora/federate/FederateSearchManager.java
@ -0,0 +1,427 @@
 /**
 * FederateSearchManager.java
 * Copyright 2015 by Burkhard Buelte
 * First released 19.01.2015 at http://yacy.net
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program in the file lgpl21.txt If not, see
 * <http://www.gnu.org/licenses/>.
 */
 package net.yacy.cora.federate;
 import net.yacy.cora.federate.opensearch.OpenSearchConnector;
 import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.document.id.MultiProtocolURL;
 import net.yacy.cora.federate.solr.connector.SolrConnector;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.storage.Configuration;
 import net.yacy.cora.storage.Configuration.Entry;
 import net.yacy.cora.storage.Files;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.document.parser.xml.opensearchdescriptionReader;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.kelondro.util.Bitfield;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
 import net.yacy.search.query.QueryGoal;
 import net.yacy.search.query.QueryModifier;
 import net.yacy.search.query.QueryParams;
 import net.yacy.search.query.SearchEvent;
 import net.yacy.search.schema.WebgraphSchema;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 /**
 * Handling of queries to configured remote OpenSearch systems.
 */
 public class FederateSearchManager {
    private final int accessDelay = 15000; // delay between connects (in ms)
    private File confFile = null; // later initialized to DATA/SETTINGS/heuristicopensearch.conf
    private HashSet<AbstractFederateSearchConnector> conlist; // connector list
    protected Configuration cfg;//PropertiesConfiguration cfg;
    private static FederateSearchManager manager = null; // self referenc for static .getManager()
    public FederateSearchManager(Switchboard sb) {
        super();
        this.conlist = new HashSet<AbstractFederateSearchConnector>();
        // from here we need Switchboard settings
        if (sb == null) {
            return;
        }
        // Data needed  active  name, url(template), desc, rule-when-to-use, specifics
        confFile = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
        if (!confFile.exists()) {
            try {
                Files.copy(new File(sb.appPath, "defaults/heuristicopensearch.conf"), confFile);
                File defdir = new File(sb.dataPath, "DATA/SETTINGS/federatecfg");
                if (!defdir.exists()) {
                    Files.copy(new File(sb.appPath, "defaults/federatecfg"), defdir);
                }
            } catch (IOException ex) {
            }
        }
        // read settings config file
        if (confFile.exists()) {
            try {
                cfg = new Configuration(confFile);
                Iterator<Entry> it = cfg.entryIterator();
                while (it.hasNext()) {
                    Entry cfgentry = it.next();
                    String url = cfgentry.getValue();
                    if (cfgentry.enabled() && url != null && !url.isEmpty()) {
                        String name = cfgentry.key();
                        if (url.startsWith("cfgfile:")) { // is cfgfile with field mappings (no opensearch url)
                            // format    prefix:connectortype:configfilename
                            // example   cfgfile:solrconnector:testsys.solr.schema
                            String[] parts = url.split(":");
                            if (parts[1].equalsIgnoreCase("solrconnector")) {
                                SolrFederateSearchConnector sfc = new SolrFederateSearchConnector();
                                if (sfc.init(name, sb.getDataPath()+ "/DATA/SETTINGS/federatecfg/" + parts[2])) {
                                    conlist.add(sfc);
                                }
                            } else {
                                ConcurrentLog.config("FederateSearchManager", "Error in configuration of: " + url);
                            }
                        } else { // handle opensearch url template
                            OpenSearchConnector osc = new OpenSearchConnector();
                            if (osc.init(name, url)) {
                                conlist.add(osc);
                            }
                        }
                    }
                }
            } catch (IOException ex) {
                ConcurrentLog.logException(ex);
            }
        }
        manager = this; // reference for static access via .getManager()
    }
    /**
     * Get instance of this manager. There should be only one instance running,
     * use this to get or initialize the manager.
     *
     * @return
     */
    public static FederateSearchManager getManager() {
        if (manager == null) {
            manager = new FederateSearchManager(Switchboard.getSwitchboard());
        }
        return manager;
    }
    /**
     * Sends a query request to remote systems configured. 
     * If search query domain is LOCAL procedure does nothing.
     *
     * @param theSearch
     */
    public void search(SearchEvent theSearch) {
        if (theSearch != null) {
            if (!theSearch.query.isLocal()) {
                Set<AbstractFederateSearchConnector> picklist = getBest(theSearch.getQuery());
                for (AbstractFederateSearchConnector fsc : picklist) {
                    fsc.search(theSearch);
                }
            }
        }
    }
    /**
     * Sends a query to configured remote systems.
     *
     * @param query
     * @return list of results according to YaCy schema
     */
    public List<URIMetadataNode> query(QueryParams query) {
        if (query.isLocal()) {
            List<URIMetadataNode> sdl = new ArrayList<URIMetadataNode>();
            Set<AbstractFederateSearchConnector> picklist = getBest(query);
            for (AbstractFederateSearchConnector fsc : picklist) {
                sdl.addAll(fsc.query(query));
            }
            return sdl;
        } else {
            return null;
        }
    }
    /**
     * Takes a search string, converts it to queryparams and calls the
     * query(queryparams)
     *
     * @param querystr
     * @return SolrDocumentlist of remote query results according to YaCy schema
     */
    public List<URIMetadataNode> query(String querystr) {
        final QueryGoal qg = new QueryGoal(querystr);
        final Switchboard sb = Switchboard.getSwitchboard();
        Bitfield filter = new Bitfield();
        final QueryParams query = new QueryParams(
                qg,
                new QueryModifier(),
                Integer.MAX_VALUE,
                "",
                Classification.ContentDomain.ALL,
                "", //lang
                null,
                CacheStrategy.IFFRESH,
                100, 0, //count, offset
                ".*", //urlmask
                null,
                null,
                QueryParams.Searchdom.LOCAL,
                filter,
                false,
                null,
                MultiProtocolURL.TLD_any_zone_filter,
                "",
                false,
                sb.index,
                sb.getRanking(),
                "",//userAgent
                false,
                false,
                0.0, 0.0, -1.0,
                new String[0]);
        return query(query);
    }
    /**
     * Add a search target system/connector to the config file
     *
     * @param urlTemplate query template url
     * @return successful added
     */
    public boolean addOpenSearchTarget(String name, String urlTemplate, boolean active, String comment) {
        if (confFile == null) {
            return false;
        }
        try {
            Configuration conf = new Configuration(confFile);
            if (name != null && !name.isEmpty()) {
                conf.add(name, null, active);
                Configuration.Entry e = conf.get(name);
                e.setValue(urlTemplate);
                e.setEnable(active);
                e.setComment(comment);
                conf.put(name, e);
                try {
                    conf.commit();
                    if (active) {
                        OpenSearchConnector osd = new OpenSearchConnector();
                        if (osd.init(name, urlTemplate)) {
                            conlist.add(osd);
                        }
                    }
                } catch (final IOException ex) {
                    ConcurrentLog.warn("FederateSearchManager", "config file write error");
                }
                return true;
            }
        } catch (final IOException e1) {
            ConcurrentLog.logException(e1);
            return false;
        }
        return false;
    }
    /**
     * Get the number of active remote query target systems
     */
    public int getSize() {
        return conlist.size();
    }
    /**
     * Get best systems from configured targets for this search
     *
     * @param theSearch
     * @return list of searchtargetconnectors
     */
    protected Set<AbstractFederateSearchConnector> getBest(final QueryParams query) {
        HashSet<AbstractFederateSearchConnector> retset = new HashSet<AbstractFederateSearchConnector>();
        // currently only enforces limits (min access delay, frequency)
        for (AbstractFederateSearchConnector fsc : conlist) {
            // check access time
            if (fsc.lastaccesstime + accessDelay < System.currentTimeMillis()) { // enforce 15 sec delay between searches to same system
                retset.add(fsc);
            }
        }
        return retset;
    }
    /**
     * Discover opensearch description links from local (embedded) Solr index
     * using meta data field 'outboundlinks_tag_txt' and add found systems to
     * the config file
     *
     * @return true if background discover job was started, false if job not
     * started
     */
    public boolean discoverFromSolrIndex(Switchboard sb) {
        if (sb == null) {
            return false;
        }
        // check if needed Solr fields are available (selected)
        if (!sb.index.fulltext().useWebgraph()) {
            ConcurrentLog.severe("FederateSearchManager", "Error on connecting to embedded Solr webgraph index");
            return false;
        }
        final SolrConnector connector = sb.index.fulltext().getWebgraphConnector();
        final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name())
                && (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()))
                && sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false);
        if (!metafieldavailable) {
            ConcurrentLog.warn("FederateSearchManager", "webgraph option and webgraph Schema fields target_rel_s, target_protocol_s and target_urlstub_s must be switched on");
            return false;
        }
        // the solr search
        final String webgraphquerystr = WebgraphSchema.target_rel_s.getSolrFieldName() + ":search";
        final String[] webgraphqueryfields = {WebgraphSchema.target_protocol_s.getSolrFieldName(), WebgraphSchema.target_urlstub_s.getSolrFieldName()};
        // alternatively target_protocol_s + "://" +target_host_s + target_path_s
        final long numfound;
        try {
            SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, 0, 1, webgraphqueryfields);
            numfound = docList.getNumFound();
            if (numfound == 0) {
                ConcurrentLog.info("FederateSearchManager", "no results found, abort discover job");
                return true;
            }
            ConcurrentLog.info("FederateSearchManager", "start checking " + Long.toString(numfound) + " found index results");
        } catch (final IOException ex) {
            ConcurrentLog.logException(ex);
            return false;
        }
        final long stoptime = System.currentTimeMillis() + 1000 * 3600; // make sure job doesn't run forever
        // job to iterate through Solr index to find links to opensearchdescriptions
        // started as background job as connect timeouts may cause it run a long time
        final Thread job = new Thread() {
            @Override
            public void run() {
                try {
                    boolean doloop = true;
                    int loopnr = 0;
                    Set<String> dblmem = new HashSet<String>(); // temp memory for already checked url
                    while (doloop) {
                        ConcurrentLog.info("FederateSearchManager", "start Solr query loop at " + Integer.toString(loopnr * 20) + " of " + Long.toString(numfound));
                        SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, loopnr * 20, 20, webgraphqueryfields); // check chunk of 20 result documents
                        loopnr++;
                        if (stoptime < System.currentTimeMillis()) {// stop after max 1h
                            doloop = false;
                            ConcurrentLog.info("FederateSearchManager", "long running discover task aborted");
                        }
                        if (docList != null && docList.size() > 0) {
                            Iterator<SolrDocument> docidx = docList.iterator();
                            while (docidx.hasNext()) {
                                SolrDocument sdoc = docidx.next();
                                String hrefurltxt = sdoc.getFieldValue(WebgraphSchema.target_protocol_s.getSolrFieldName()) + "://" + sdoc.getFieldValue(WebgraphSchema.target_urlstub_s.getSolrFieldName());
                                try {
                                    URL url = new URL(hrefurltxt);
                                    //TODO: check Blacklist
                                    if (dblmem.add(url.getAuthority())) { // use only main path to detect double entries
                                        opensearchdescriptionReader os = new opensearchdescriptionReader(hrefurltxt);
                                        if (os.getRSSorAtomUrl() != null) {
                                            // add found system to config file
                                            addOpenSearchTarget(os.getShortName(), os.getRSSorAtomUrl(), false, os.getItem("LongName"));
                                            ConcurrentLog.info("FederateSearchManager", "added " + os.getShortName() + " " + hrefurltxt);
                                        } else {
                                            ConcurrentLog.info("FederateSearchManager", "osd.xml check failed (no RSS or Atom support) for " + hrefurltxt);
                                        }
                                    }
                                } catch (final MalformedURLException ex) {
                                }
                            }
                        } else {
                            doloop = false;
                        }
                    }
                    ConcurrentLog.info("FederateSearchManager", "finisched Solr query (checked " + Integer.toString(dblmem.size()) + " unique opensearchdescription links found in " + Long.toString(numfound) + " results)");
                } catch (final IOException ex) {
                    ConcurrentLog.logException(ex);
                }
            }
        };
        job.start();
        return true;
    }
    /**
     * Read or reread opensearch config file and initialize connectors
     *
     * @param cfgFileName
     * @return true if successful
     */
    public boolean init(String cfgFileName) {
        confFile = new File(cfgFileName);
        if (confFile.exists()) {
            try {
                cfg = new Configuration(confFile);
                if (!this.conlist.isEmpty()) this.conlist.clear(); // prevent double entries
                Iterator<Entry> it = cfg.entryIterator();
                while (it.hasNext()) {
                    Entry cfgentry = it.next();
                    if (cfgentry.enabled()) { // hold only enabled in memory
                        String name = cfgentry.key();
                        String url = cfgentry.getValue();
                        if (url != null && !url.isEmpty()) {
                            if (url.startsWith("cfgfile:")) { // is cfgfile with field mappings (no opensearch url)
                                // config entry has 3 parts separated by :    1=cfgfile 2=connectortype 3=relative path to connector-cfg-file
                                // example   cfgfile:solrconnector:testsys.solr.schema
                                String[] parts = url.split(":");
                                if (parts[1].equalsIgnoreCase("solrconnector")) {
                                    SolrFederateSearchConnector sfc = new SolrFederateSearchConnector();
                                    if (sfc.init(name, confFile.getParent()+"/federatecfg/"+parts[2])) {
                                        conlist.add(sfc);
                                    }
                                } else {
                                    ConcurrentLog.config("FederateSearchManager", "Init error in configuration of: " + url);
                                }
                            } else { // handle opensearch url template
                                OpenSearchConnector osd;
                                osd = new OpenSearchConnector();
                                if (osd.init(name, url)) {
                                    conlist.add(osd);
                                }
                            }
                        }
                    }
                }
            } catch (IOException ex) {
                ConcurrentLog.logException(ex);
            }
        }
        return true;
    }
 }
--- a/source/net/yacy/cora/federate/SolrFederateSearchConnector.java
+++ b/source/net/yacy/cora/federate/SolrFederateSearchConnector.java
@ -0,0 +1,119 @@
 /**
 * SolrFederateSearchConnector.java
 * Copyright 2015 by Burkhard Buelte
 * First released 19.01.2015 at http://yacy.net
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program in the file lgpl21.txt If not, see
 * <http://www.gnu.org/licenses/>.
 */
 package net.yacy.cora.federate;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
 import net.yacy.cora.federate.solr.connector.SolrConnector;
 import net.yacy.cora.federate.solr.instance.RemoteInstance;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.search.query.QueryParams;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 /**
 * Search connecter to collect query results from remote Solr systems which
 * provide results as Solr documents
 */
 public class SolrFederateSearchConnector extends AbstractFederateSearchConnector {
    private String corename;
    @Override
    public boolean init(String instance, String cfgFileName) {
        boolean initResult = super.init(instance, cfgFileName); // init local schema cfg
        if (initResult) {
            if (this.localcfg.contains("_baseurl")) {
                setBaseurl(this.localcfg.get("_baseurl").getValue());
            } else {
                ConcurrentLog.config(instance, "no _baseurl given in config file "+cfgFileName);
                initResult = false;
            }
            if (this.localcfg.contains("_corename")) {
                setCoreName(this.localcfg.get("_corename").getValue());
            } else {
                ConcurrentLog.config(instance, "no _corename given in config file "); // not mandatory
                this.corename = "";
            }
        }
        return initResult;
    }
    public void setBaseurl(String url) {
        if (url.endsWith("/")) {
            this.baseurl = url;
        } else {
            this.baseurl = url + "/";
        }
    }
    public void setCoreName(String core) {
        this.corename = core;
    }
    /**
     * Core query implementation
     * all query and search routines will use this routine to query the remote system
     *
     * @param query
     * @return list of solr documents (metadata) accordng to local YaCy internal schema
     */
    @Override
    public List<URIMetadataNode> query(QueryParams query) {
        List<URIMetadataNode> docs = new ArrayList<URIMetadataNode>();
        Collection<String> remotecorename = new ArrayList<String>();
        remotecorename.add(corename);
        ModifiableSolrParams msp = new SolrQuery(query.getQueryGoal().getQueryString(false));
        msp.add(CommonParams.QT, "/"); // important to override default append of /select
        msp.add(CommonParams.ROWS, Integer.toString(query.itemsPerPage));
        try {
            RemoteInstance instance = new RemoteInstance(baseurl, remotecorename, corename, 20000);
            try {
                SolrConnector solrConnector = new RemoteSolrConnector(instance, false, null);
                try {
                    this.lastaccesstime = System.currentTimeMillis();
                    SolrDocumentList docList = solrConnector.getDocumentListByParams(msp);
                    // convert to YaCy schema documentlist
                    for (SolrDocument doc : docList) {
                        URIMetadataNode anew = toYaCySchema(doc);
                        docs.add(anew);
                    }
                } catch (IOException | SolrException e) {
                } finally {
                    solrConnector.close();
                }
            } catch (Throwable ee) {
            } finally {
                instance.close();
            }
        } catch (IOException eee) {
        }
        return docs;
    }
 }
--- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
+++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
@ -19,107 +19,45 @@
 */
 package net.yacy.cora.federate.opensearch;
 import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
-import java.net.URL;
+import java.util.ArrayList;
-import java.util.HashSet;
+import java.util.Arrays;
-import java.util.Iterator;
+import java.util.List;
-import java.util.Set;
+import net.yacy.cora.document.encoding.UTF8;
-
+import net.yacy.cora.document.feed.RSSFeed;
-import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.document.feed.RSSMessage;
-import net.yacy.cora.storage.Configuration;
+import net.yacy.cora.document.feed.RSSReader;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
 import net.yacy.cora.federate.AbstractFederateSearchConnector;
 import net.yacy.cora.federate.FederateSearchConnector;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.http.HTTPClient;
 import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.cora.util.SpaceExceededException;
+import net.yacy.document.TextParser;
-import net.yacy.document.parser.xml.opensearchdescriptionReader;
+import net.yacy.kelondro.data.meta.URIMetadataNode;
-import net.yacy.kelondro.blob.Tables;
+import net.yacy.search.query.QueryParams;
-import net.yacy.search.Switchboard;
+import net.yacy.search.schema.CollectionSchema;
 import net.yacy.search.SwitchboardConstants;
 import net.yacy.search.query.SearchEvent;
 import net.yacy.search.schema.WebgraphSchema;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 /**
 * Handling of queries to remote OpenSearch systems. Iterates to a list of
- * configured systems until number of needed results are available. Uses a
+ * configured systems until number of needed results are available.
 * temporary work table to store search template urls for the iteration during
 * search.
 */
-public class OpenSearchConnector {
+public class OpenSearchConnector extends AbstractFederateSearchConnector implements FederateSearchConnector {
    private File confFile = null; // later initialized to DATA/SETTINGS/heuristicopensearch.conf
    private int size = 0; // remember the size of active opensearch targets
    public OpenSearchConnector(Switchboard sb, boolean createworktable) {
        super();
        if (sb == null) {
            return;
        }
        confFile = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
        if (createworktable) { // read from config file and create worktable
            sb.tables.clear("opensearchsys");
            try {
                Configuration cfg = new Configuration(confFile);
                // copy active opensearch systems to a work table (opensearchsys)
                Iterator<Configuration.Entry> cfgentries = cfg.entryIterator();
                while (cfgentries.hasNext()) {
                    Configuration.Entry e = cfgentries.next();
                    if (e.enabled()) {
                        String title = e.key(); // get the title
                        String urlstr = e.getValue(); // get the search template url
-                        Tables.Data row = new Tables.Data();
+    @Override
-                        row.put("title", title);
+    public boolean init(final String name, final String urltemplate) {
-                        row.put("url", urlstr);
+        this.baseurl = urltemplate;
-                        try {
+        this.instancename = name;
-                            sb.tables.insert("opensearchsys", row);
+        this.localcfg = null; // no field mapping needed
-                        } catch (final SpaceExceededException ex) {
+        return true;
                            ConcurrentLog.logException(ex);
                        }
                    }
                }
                size = sb.tables.size("opensearchsys");
            } catch (final IOException ex) {
                ConcurrentLog.logException(ex);
            }
        }
    }
    /**
     * Sends a search request to remote systems listed in worktable until the
     * searchevent contains less than needed results. Depending on already
     * collected search results none to all configured systems are queried to
     * complete available search results.
     * if query search domain is LOCAL procedure does nothing.
     */
    static public void query(Switchboard sb, SearchEvent theSearch) {
        if (theSearch != null && sb != null) {
            if (!theSearch.query.isLocal()) {
                try {
                    Iterator<Tables.Row> ossysworktable = sb.tables.iterator("opensearchsys");
                    //int needres = theSearch.query.neededResults(); // get number of needed results
                    while (ossysworktable.hasNext() /*&& theSearch.query.getResultCount() < needres*/) {
                        Tables.Row row = ossysworktable.next();
                        String osurl = row.get("url", "");
                        String name = row.get("title", "");
                        sb.heuristicRSS(parseSearchTemplate(osurl, theSearch.query.getQueryGoal().getQueryString(false), 0, theSearch.query.itemsPerPage), theSearch, name);
                    }
                } catch (final IOException ex) {
                    ConcurrentLog.warn("OpenSearchConnector.query", "failed reading table opensearchsys");
                }
            }
        }
    }
    /**
     * replace Opensearchdescription search template parameter with actual values
     */
-    private static String parseSearchTemplate(String searchurltemplate, String query, int start, int rows) {
+    private String parseSearchTemplate(String searchurltemplate, String query, int start, int rows) {
        String tmps = searchurltemplate.replaceAll("\\?}", "}"); // some optional parameters may include question mark '{param?}='
        tmps = tmps.replace("{startIndex}", Integer.toString(start));
        tmps = tmps.replace("{startPage}", "");
@ -131,138 +69,76 @@ public class OpenSearchConnector {
    }
    /**
-     * add a opensearch target system to the config file
+     * queries remote system and returns the resultlist (waits until results
     * transmitted or timeout) This is the main access routine used for the
     * serach and query operation For internal access delay time, also the
     * this.lastaccessed time needs to be set here.
     *
     * @return query results (metadata) with fields according to YaCy schema
     */
-    public boolean add(String name, String url, boolean active, String comment) {
+    @Override
-        if (confFile == null) {
+    public List<URIMetadataNode> query(QueryParams query) {
-            return false;
+        List<URIMetadataNode> docs = new ArrayList<URIMetadataNode>();
        }
        // see http://www.loc.gov/standards/sru/
        String searchurl = this.parseSearchTemplate(baseurl, query.getQueryGoal().getQueryString(false), 0, query.itemsPerPage);
        try {
-            Configuration conf = new Configuration(confFile);
+            MultiProtocolURL aurl = new MultiProtocolURL(MultiProtocolURL.unescape(searchurl));
-            if (name != null && !name.isEmpty()) {
+            try {
-                conf.add(name, null, active);
+                this.lastaccesstime = System.currentTimeMillis();
-                Configuration.Entry e = conf.get(name);
+                final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyIntranetCrawlerAgent);
-                e.setValue(url);
+                byte[] result = httpClient.GETbytes(aurl, null, null, false);
-                e.setEnable(active);
+                RSSReader rssReader =  RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
-                e.setComment(comment);
+                if (rssReader != null) {
-                conf.put(name, e);
+                    final RSSFeed feed = rssReader.getFeed();
-                try {
+                    if (feed != null) {
-                    conf.commit();
+                        for (final RSSMessage item : feed) {
-                } catch (final IOException ex) {
+                            try {
-                    ConcurrentLog.warn("OpenSearchConnector.add", "config file write error");
+                                DigestURL uri = new DigestURL(item.getLink());
-                }
+
-                return true;
+                                URIMetadataNode doc = new URIMetadataNode(uri);
-            }
+                                doc.setField(CollectionSchema.charset_s.getSolrFieldName(), UTF8.charset.name());
-        } catch (final IOException e1) {
+                                doc.setField(CollectionSchema.author.getSolrFieldName(), item.getAuthor());
-            ConcurrentLog.logException(e1);
+                                doc.setField(CollectionSchema.title.getSolrFieldName(), item.getTitle());
-            return false;
+                                doc.setField(CollectionSchema.language_s.getSolrFieldName(), item.getLanguage());
-        }
+                                doc.setField(CollectionSchema.last_modified.getSolrFieldName(), item.getPubDate());
-        return false;
+                                final String mime = TextParser.mimeOf(uri);
-    }
+                                if (mime != null) {
-
+                                    doc.setField(CollectionSchema.content_type.getSolrFieldName(), mime);
-    /**
+                                }
-     * Get the number of active remote opensearch target systems
+                                if (item.getCategory().isEmpty()) {
-     */
+                                    doc.setField(CollectionSchema.keywords.getSolrFieldName(), Arrays.toString(item.getSubject()));
-    public int getSize() {
+                                } else {
-        return size;
+                                    doc.setField(CollectionSchema.keywords.getSolrFieldName(), Arrays.toString(item.getSubject()) + " " + item.getCategory());
-    }
+                                }
-
+                                doc.setField(CollectionSchema.publisher_t.getSolrFieldName(), item.getCopyright());
    /**
     * Discover opensearch description links from local (embedded) Solr index using
     * meta data field 'outboundlinks_tag_txt' and add found systems to the
     * config file
     *  
     * @return true if background discover job was started, false if job not started
     */
    public boolean discoverFromSolrIndex(final Switchboard sb) {
        if (sb == null) {
            return false;
        }
        // check if needed Solr fields are available (selected)
        if (!sb.index.fulltext().useWebgraph()) {
            ConcurrentLog.severe("OpenSearchConnector.Discover", "Error on connecting to embedded Solr webgraph index");
            return false;
        }
        final SolrConnector connector = sb.index.fulltext().getWebgraphConnector();
        final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name()) 
                && ( sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()) ) 
                && sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false);
        if (!metafieldavailable) {
            ConcurrentLog.warn("OpenSearchConnector.Discover", "webgraph option and webgraph Schema fields target_rel_s, target_protocol_s and target_urlstub_s must be switched on");
            return false;
        }
        // the solr query
        final String webgraphquerystr = WebgraphSchema.target_rel_s.getSolrFieldName() + ":search";
        final String[] webgraphqueryfields = { WebgraphSchema.target_protocol_s.getSolrFieldName() , WebgraphSchema.target_urlstub_s.getSolrFieldName()};
        // alternatively target_protocol_s + "://" +target_host_s + target_path_s
        final long numfound;
        try {
            SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, 0, 1, webgraphqueryfields);
            numfound = docList.getNumFound();
            if (numfound == 0) {
                ConcurrentLog.info("OpenSearchConnector.Discover", "no results found, abort discover job");
                return true;
            }
            ConcurrentLog.info("OpenSearchConnector.Discover", "start checking " + Long.toString(numfound) + " found index results");
        } catch (final IOException ex) {
            ConcurrentLog.logException(ex);
            return false;
        }
-        final long stoptime = System.currentTimeMillis() + 1000 * 3600; // make sure job doesn't run forever
+                                // TODO: we likely got only a search related snippet (take is as text content)
                                // we need a way to differentiate metadata from full crawl data in the index (would be also good for rwi transferred/received metadata)
                                // or considere to add this to snippet cache, without adding text_t
                                doc.setField(CollectionSchema.text_t.getSolrFieldName(), item.getDescriptions());
-        // job to iterate through Solr index to find links to opensearchdescriptions
+                                if (item.getLat() != 0.0 && item.getLon() != 0.0) {
-        // started as background job as connect timeouts may cause it run a long time
+                                    doc.setField(CollectionSchema.coordinate_p.getSolrFieldName(), item.getLat() + "," + item.getLon());
-        final Thread job = new Thread() {
+                                }
-            @Override
+                                if (item.getSize() > 0) {
-            public void run() {
+                                    doc.setField(CollectionSchema.size_i.getSolrFieldName(), item.getSize());
                try {
                    boolean doloop = true;
                    int loopnr = 0;
                    Set<String> dblmem = new HashSet<String>(); // temp memory for already checked url
                    while (doloop) {
                        ConcurrentLog.info("OpenSearchConnector.Discover", "start Solr query loop at " + Integer.toString(loopnr * 20) + " of " + Long.toString(numfound));
                        SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, loopnr * 20, 20,webgraphqueryfields); // check chunk of 20 result documents
                        loopnr++;
                        if (stoptime < System.currentTimeMillis()) {// stop after max 1h
                            doloop = false;
                            ConcurrentLog.info("OpenSearchConnector.Discover", "long running discover task aborted");
                        }
                        if (docList != null && docList.size() > 0) {
                            Iterator<SolrDocument> docidx = docList.iterator();
                            while (docidx.hasNext()) {
                                SolrDocument sdoc = docidx.next();
                                String hrefurltxt = sdoc.getFieldValue(WebgraphSchema.target_protocol_s.getSolrFieldName()) + "://" + sdoc.getFieldValue(WebgraphSchema.target_urlstub_s.getSolrFieldName());
                                try {
                                    URL url = new URL(hrefurltxt);
                                    //TODO: check Blacklist
                                    if (dblmem.add(url.getAuthority())) { // use only main path to detect double entries
                                        opensearchdescriptionReader os = new opensearchdescriptionReader(hrefurltxt);
                                        if (os.getRSSorAtomUrl() != null) {
                                            // add found system to config file
                                            add(os.getShortName(), os.getRSSorAtomUrl(), false, os.getItem("LongName"));
                                            ConcurrentLog.info("OpenSearchConnector.Discover", "added " + os.getShortName() + " " + hrefurltxt);
                                        } else {
                                            ConcurrentLog.info("OpenSearchConnector.Discover", "osd.xml check failed (no RSS or Atom support) for " + hrefurltxt);
                                        }
                                    }
                                } catch (final MalformedURLException ex) {
                                }
                                docs.add(doc);
                            } catch (final MalformedURLException e) {
                            }
                        } else {
                            doloop = false;
                        }
                        ConcurrentLog.info("OpenSerachConnector", "received " + docs.size() + " results from " + this.instancename);
                    }
                    ConcurrentLog.info("OpenSearchConnector.Discover", "finisched Solr query (checked " + Integer.toString(dblmem.size()) + " unique opensearchdescription links found in " + Long.toString(numfound) + " results)");
                } catch (final IOException ex) {
                    ConcurrentLog.logException(ex);
                }
            } catch (IOException ex) {
                ConcurrentLog.logException(ex);
                ConcurrentLog.info("OpenSearchConnector", "no connection to " + searchurl);
            }
-        };
+        } catch (MalformedURLException ee) {
-        job.start();
+            ConcurrentLog.warn("OpenSearchConnector", "malformed url " + searchurl);
-        return true;
+        }
        return docs;
    }
 }
--- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
@ -69,7 +69,7 @@ public class URIMetadataNode extends SolrDocument {
    private static final long serialVersionUID = -256046934741561968L;
    protected String keywords = null;
-    protected DigestURL url = null;
+    protected DigestURL url;
    protected Bitfield flags = null;
    protected int imagec = -1, audioc = -1, videoc = -1, appc = -1;
    protected double lat = Double.NaN, lon = Double.NaN;
@ -150,7 +150,6 @@ public class URIMetadataNode extends SolrDocument {
        for (String name : doc.getFieldNames()) {
            this.addField(name, doc.getFieldValue(name));
        }
        this.snippet = "";
        Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
        this.score = scorex == null ? 0.0f : scorex.floatValue();
        final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
@ -169,6 +168,24 @@ public class URIMetadataNode extends SolrDocument {
        this.score = scorex;
    }
    public URIMetadataNode (final String urlstr) {
        super();
        try {
            url = new DigestURL(urlstr);
            this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
            this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
        } catch (final MalformedURLException e) {
            ConcurrentLog.logException(e);
            this.url = null;
        }
    }
    public URIMetadataNode(DigestURL theurl) {
        super();
        url = theurl;
        this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
        this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
    }
    /**
     * Get the content domain of a document. This tries to get the content domain from the mime type
     * and if this fails it uses alternatively the content domain from the file extension.
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -714,16 +714,7 @@ public final class Switchboard extends serverSwitch {
        this.log.info("surrogates.out Path = " + this.surrogatesOutPath.getAbsolutePath());
        this.surrogatesOutPath.mkdirs();
 */
-        // copy opensearch heuristic config (if not exist)
+        
        final File osdConfig = new File(getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
        if (!osdConfig.exists()) {
            final File osdDefaultConfig = new File(getAppPath(), "defaults/heuristicopensearch.conf");
            this.log.info("heuristic.opensearch list Path = " + osdDefaultConfig.getAbsolutePath());
            try {
                Files.copy(osdDefaultConfig, osdConfig);
            } catch (final IOException ex) { }
        }
        // create the release download directory
        this.releasePath =
            getDataPath(SwitchboardConstants.RELEASE_PATH, SwitchboardConstants.RELEASE_PATH_DEFAULT);
@ -3615,7 +3606,9 @@ public final class Switchboard extends serverSwitch {
     * @param urlpattern the search query url (e.g. http://search.org?query=searchword)
     * @param searchEvent
     * @param feedName short/internal name of the remote system
     * @deprecated use FederateSearchManager(SearchEvent) instead
     */
    @Deprecated // not used (since 2015-01-18, v1.81)
    public final void heuristicRSS(
        final String urlpattern,
        final SearchEvent searchEvent,