yacy_search_server/htroot/api/schema.java

/**
 *  schema_p
 *  Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
 *  First released 13.01.2012 at http://yacy.net
 *
 *  $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $
 *  $LastChangedRevision: 7653 $
 *  $LastChangedBy: orbiter $
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.server.servletProperties;

public class schema {

    public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final servletProperties prop = new servletProperties();
        final Switchboard sb = (Switchboard) env;

        String schemaName = CollectionSchema.CORE_NAME;
        if (post != null) schemaName = post.get("core", schemaName); 
        
        // write schema
        int c = 0;
        SchemaConfiguration solrSchema = schemaName.equals(CollectionSchema.CORE_NAME) ? sb.index.fulltext().getDefaultConfiguration() : sb.index.fulltext().getWebgraphConfiguration();
        SchemaDeclaration[] cc = schemaName.equals(CollectionSchema.CORE_NAME) ? CollectionSchema.values() : WebgraphSchema.values();
        for (SchemaDeclaration field : cc) {
            if (solrSchema.contains(field.name())) {
                addField(prop, c, field);
                c++;
            }
        }
        if (schemaName.equals(CollectionSchema.CORE_NAME)) {
            // add additional coordinate field for collection1
            if (solrSchema.contains(CollectionSchema.coordinate_p)) {
                addField(prop, c, CollectionSchema.coordinate_p_0_coordinate);
                c++;
                addField(prop, c, CollectionSchema.coordinate_p_1_coordinate);
                c++;
            }
        }
            
        //if (solrScheme.contains(YaCySchema.author)) {addField(prop, c, YaCySchema.author_sxt);}
        prop.put("fields", c);

        if (schemaName.equals(CollectionSchema.CORE_NAME)) {
            prop.put("copyFieldAuthor", solrSchema.contains(CollectionSchema.author) ? 1 : 0);
            
            prop.put("solruniquekey", CollectionSchema.id.getSolrFieldName());
            prop.put("solrdefaultsearchfield",
                    solrSchema.contains(CollectionSchema.text_t) ? CollectionSchema.text_t.getSolrFieldName() :
                    solrSchema.contains(CollectionSchema.fuzzy_signature_text_t) ? CollectionSchema.fuzzy_signature_text_t.getSolrFieldName() :
                    solrSchema.contains(CollectionSchema.h1_txt) ? CollectionSchema.h1_txt.getSolrFieldName() :
                    CollectionSchema.id.getSolrFieldName()
                    );
        } else {
            prop.put("copyFieldAuthor", 0);
            
            prop.put("solruniquekey", WebgraphSchema.id.getSolrFieldName());
            prop.put("solrdefaultsearchfield",
                    solrSchema.contains(WebgraphSchema.target_linktext_t) ? WebgraphSchema.target_linktext_t.getSolrFieldName() :
                    solrSchema.contains(WebgraphSchema.target_name_t) ? WebgraphSchema.target_name_t.getSolrFieldName() :
                    solrSchema.contains(WebgraphSchema.target_alt_t) ? WebgraphSchema.target_alt_t.getSolrFieldName() :
                    WebgraphSchema.id.getSolrFieldName()
                    );
        }     

        // add CORS Access header
        final ResponseHeader outgoingHeader = new ResponseHeader(200);
        outgoingHeader.put(HeaderFramework.CORS_ALLOW_ORIGIN, "*");
        prop.setOutgoingHeader(outgoingHeader);   
        
        // return rewrite properties
        return prop;
    }
    
    private static void addField(servletProperties prop, int c, SchemaDeclaration field) {
        prop.put("fields_" + c + "_solrname", field.getSolrFieldName());
        prop.put("fields_" + c + "_type", field.getType().printName());
        prop.put("fields_" + c + "_comment", field.getComment());
        prop.put("fields_" + c + "_indexedChecked", field.isIndexed() ? 1 : 0);
        prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0);
        prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0);
        prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0);
    }
}
added automatic generation of a solr schema.xml file 13 years ago			`/**`
			`* schema_p`
			`* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany`
			`* First released 13.01.2012 at http://yacy.net`
			`*`
			`* $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $`
			`* $LastChangedRevision: 7653 $`
			`* $LastChangedBy: orbiter $`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public License`
			`* along with this program in the file lgpl21.txt`
			`* If not, see <http://www.gnu.org/licenses/>.`
			`*/`

introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`import net.yacy.cora.federate.solr.SchemaConfiguration;`
			`import net.yacy.cora.federate.solr.SchemaDeclaration;`
made the index schema retrieval public and allow cross-domain retrieval 12 years ago			`import net.yacy.cora.protocol.HeaderFramework;`
added automatic generation of a solr schema.xml file 13 years ago			`import net.yacy.cora.protocol.RequestHeader;`
made the index schema retrieval public and allow cross-domain retrieval 12 years ago			`import net.yacy.cora.protocol.ResponseHeader;`
added automatic generation of a solr schema.xml file 13 years ago			`import net.yacy.search.Switchboard;`
introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`import net.yacy.search.schema.CollectionSchema;`
			`import net.yacy.search.schema.WebgraphSchema;`
refactoring 12 years ago			`import net.yacy.server.serverObjects;`
			`import net.yacy.server.serverSwitch;`
made the index schema retrieval public and allow cross-domain retrieval 12 years ago			`import net.yacy.server.servletProperties;`
added automatic generation of a solr schema.xml file 13 years ago
made the index schema retrieval public and allow cross-domain retrieval 12 years ago			`public class schema {`
added automatic generation of a solr schema.xml file 13 years ago
introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {`
added automatic generation of a solr schema.xml file 13 years ago			`// return variable that accumulates replacements`
made the index schema retrieval public and allow cross-domain retrieval 12 years ago			`final servletProperties prop = new servletProperties();`
added automatic generation of a solr schema.xml file 13 years ago			`final Switchboard sb = (Switchboard) env;`

introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`String schemaName = CollectionSchema.CORE_NAME;`
			`if (post != null) schemaName = post.get("core", schemaName);`

Full redesign of solr connection architecture. This was done to support multiple solr cores instead of just one. Therefore it is now necessary to distuingish between solr server connections (called an 'Instance') and a connection to a single solr core. One Instance may now have multiple connector classes assigned to it, each connecting to a single core. To support multiple cores it is also necessary to distinguish between the connection configuration and the configuration of the index schema. We will have multiple schema configurations in the future, each for every solr core. This caused that the IndexFederated servlet had to be split into two parts, the new Servlet for the Schema editor is now in the IndexSchema Servlet. 12 years ago			`// write schema`
added automatic generation of a solr schema.xml file 13 years ago			`int c = 0;`
introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`SchemaConfiguration solrSchema = schemaName.equals(CollectionSchema.CORE_NAME) ? sb.index.fulltext().getDefaultConfiguration() : sb.index.fulltext().getWebgraphConfiguration();`
			`SchemaDeclaration[] cc = schemaName.equals(CollectionSchema.CORE_NAME) ? CollectionSchema.values() : WebgraphSchema.values();`
			`for (SchemaDeclaration field : cc) {`
Full redesign of solr connection architecture. This was done to support multiple solr cores instead of just one. Therefore it is now necessary to distuingish between solr server connections (called an 'Instance') and a connection to a single solr core. One Instance may now have multiple connector classes assigned to it, each connecting to a single core. To support multiple cores it is also necessary to distinguish between the connection configuration and the configuration of the index schema. We will have multiple schema configurations in the future, each for every solr core. This caused that the IndexFederated servlet had to be split into two parts, the new Servlet for the Schema editor is now in the IndexSchema Servlet. 12 years ago			`if (solrSchema.contains(field.name())) {`
- add the copyField author_sxt only if author exists - set the solr default search field according to existing fields 12 years ago			`addField(prop, c, field);`
Add possibility to set custom Solr field names for the YaCy default Solr attributes. - Changing the format of YaCy's solr.key.list while maintainig backward compatibility Federated index config screens adjusted accordingly - modified the Solr update request to use a 3 min Solr autocommit intervall 13 years ago			`c++;`
added automatic generation of a solr schema.xml file 13 years ago			`}`
			`}`
fix for schema export to consider also automatically generated coordinate fields 12 years ago			`if (schemaName.equals(CollectionSchema.CORE_NAME)) {`
			`// add additional coordinate field for collection1`
			`if (solrSchema.contains(CollectionSchema.coordinate_p)) {`
			`addField(prop, c, CollectionSchema.coordinate_p_0_coordinate);`
			`c++;`
			`addField(prop, c, CollectionSchema.coordinate_p_1_coordinate);`
			`c++;`
			`}`
			`}`

bugfixes and more logging for solr connector 12 years ago			`//if (solrScheme.contains(YaCySchema.author)) {addField(prop, c, YaCySchema.author_sxt);}`
added automatic generation of a solr schema.xml file 13 years ago			`prop.put("fields", c);`
- added @SuppressWarnings to unused servlet method parameters - removed unnecessary casts - removed unnecessary throw statements 13 years ago
introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`if (schemaName.equals(CollectionSchema.CORE_NAME)) {`
			`prop.put("copyFieldAuthor", solrSchema.contains(CollectionSchema.author) ? 1 : 0);`

			`prop.put("solruniquekey", CollectionSchema.id.getSolrFieldName());`
			`prop.put("solrdefaultsearchfield",`
			`solrSchema.contains(CollectionSchema.text_t) ? CollectionSchema.text_t.getSolrFieldName() :`
			`solrSchema.contains(CollectionSchema.fuzzy_signature_text_t) ? CollectionSchema.fuzzy_signature_text_t.getSolrFieldName() :`
			`solrSchema.contains(CollectionSchema.h1_txt) ? CollectionSchema.h1_txt.getSolrFieldName() :`
			`CollectionSchema.id.getSolrFieldName()`
			`);`
			`} else {`
			`prop.put("copyFieldAuthor", 0);`

			`prop.put("solruniquekey", WebgraphSchema.id.getSolrFieldName());`
			`prop.put("solrdefaultsearchfield",`
			`solrSchema.contains(WebgraphSchema.target_linktext_t) ? WebgraphSchema.target_linktext_t.getSolrFieldName() :`
			`solrSchema.contains(WebgraphSchema.target_name_t) ? WebgraphSchema.target_name_t.getSolrFieldName() :`
			`solrSchema.contains(WebgraphSchema.target_alt_t) ? WebgraphSchema.target_alt_t.getSolrFieldName() :`
			`WebgraphSchema.id.getSolrFieldName()`
			`);`
			`}`
made the index schema retrieval public and allow cross-domain retrieval 12 years ago
			`// add CORS Access header`
			`final ResponseHeader outgoingHeader = new ResponseHeader(200);`
			`outgoingHeader.put(HeaderFramework.CORS_ALLOW_ORIGIN, "*");`
			`prop.setOutgoingHeader(outgoingHeader);`

added automatic generation of a solr schema.xml file 13 years ago			`// return rewrite properties`
			`return prop;`
			`}`
- add the copyField author_sxt only if author exists - set the solr default search field according to existing fields 12 years ago
introduced a second core named 'webgraph'. This core will hold the link structure, but is not filled yet. To have the opportunity of a second core, multi-core functionality had to be implemented to the deep-embedded solr: - migrated the solr_40 directory content to a subdirectory 'collection1'; the previously used default core is now called collection1 - added solr_40/webgraph subdirectory as second core - added a servlet configuration for the second core 'webgraph' in /IndexSchema_p.html - added instance handling as addition to solr connections: all solr connectors are now instances of an solr 'instance' object; this required a complete re-design of the solr embedding - migrated also caching and sharding ontop of new instance handling - migrated the search apis to handle now the access to a specific core, the default core named 'collection1' - migrated the remote solr search interface to access shards of cores; for the yacy remote search the default core is now called 'solr'; using the peer address as solr address - migrated the solr backup and restore process: old backups cannot be used after this migration! - redesign of solr instance handling in all methods which access the instances: they cannot hold copies of these instances any more; the must retrieve the actuall connection object every time they want to write to it (this solves also some bugs when switching the index/network) - added another schema 'solr.webgraph.schema', the old solr.keys.list is replaced by solr.collection.schema 12 years ago			`private static void addField(servletProperties prop, int c, SchemaDeclaration field) {`
- add the copyField author_sxt only if author exists - set the solr default search field according to existing fields 12 years ago			`prop.put("fields_" + c + "_solrname", field.getSolrFieldName());`
			`prop.put("fields_" + c + "_type", field.getType().printName());`
			`prop.put("fields_" + c + "_comment", field.getComment());`
			`prop.put("fields_" + c + "_indexedChecked", field.isIndexed() ? 1 : 0);`
			`prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0);`
			`prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0);`
			`prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0);`
			`}`
added automatic generation of a solr schema.xml file 13 years ago			`}`