without the file extension. This part of the file path is removed from
the multi-field url_paths_sxt, which has now not the file name as last
part of the path list.
The same applies to the new fields source_file_name_s and
target_file_name_s in the webgraph schema.
// request and response headers may be zero in case that we process surrogates
this.requestHeader=newRequestHeader();
this.responseHeader=newResponseHeader(200);
this.responseHeader.put(HeaderFramework.CONTENT_TYPE,Classification.ext2mime(request.url().getFileExtension(),"text/plain"));// tell parser how to handle the content
this.responseHeader.put(HeaderFramework.CONTENT_TYPE,Classification.ext2mime(MultiProtocolURI.getFileExtension(request.url().getFileName()),"text/plain"));// tell parser how to handle the content
@ -194,7 +194,7 @@ public final class TextParser {
try{
idioms=parsers(location,mimeType);
}catch(finalParser.Failuree){
finalStringerrorMsg="Parser Failure for extension '"+location.getFileExtension()+"' or mimetype '"+mimeType+"': "+e.getMessage();
finalStringerrorMsg="Parser Failure for extension '"+MultiProtocolURI.getFileExtension(location.getFileName())+"' or mimetype '"+mimeType+"': "+e.getMessage();
AbstractParser.log.logWarning(errorMsg);
thrownewParser.Failure(errorMsg,location);
}
@ -218,7 +218,7 @@ public final class TextParser {
try{
idioms=parsers(location,mimeType);
}catch(finalParser.Failuree){
finalStringerrorMsg="Parser Failure for extension '"+location.getFileExtension()+"' or mimetype '"+mimeType+"': "+e.getMessage();
finalStringerrorMsg="Parser Failure for extension '"+MultiProtocolURI.getFileExtension(location.getFileName())+"' or mimetype '"+mimeType+"': "+e.getMessage();
AbstractParser.log.logWarning(errorMsg);
thrownewParser.Failure(errorMsg,location);
}
@ -252,7 +252,7 @@ public final class TextParser {
finalInputStreamsourceStream
)throwsParser.Failure{
if(AbstractParser.log.isFine())AbstractParser.log.logFine("Parsing '"+location+"' from stream");
if(AbstractParser.log.isFine())AbstractParser.log.logFine("Parsing "+location+" with mimeType '"+mimeType+"' and file extension '"+fileExt+"' from byte[]");
@ -152,8 +152,9 @@ public enum CollectionSchema implements SchemaDeclaration {
publisher_url_s(SolrType.string,true,true,false,false,false,"publisher url as defined in http://support.google.com/plus/answer/1713826?hl=de"),
url_protocol_s(SolrType.string,true,true,false,false,false,"the protocol of the url"),
url_paths_sxt(SolrType.string,true,true,true,false,true,"all path elements in the url"),
url_file_name_s(SolrType.string,true,true,false,false,false,"the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension"),
url_file_ext_s(SolrType.string,true,true,false,false,false,"the file name extension"),
url_paths_sxt(SolrType.string,true,true,true,false,true,"all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"),
url_parameter_i(SolrType.num_integer,true,true,false,false,false,"number of key-value pairs in search part of the url"),
url_parameter_key_sxt(SolrType.string,true,true,true,false,false,"the keys from key-value pairs in the search part of the url"),
url_parameter_value_sxt(SolrType.string,true,true,true,false,false,"the values from key-value pairs in the search part of the url"),