@ -32,16 +32,13 @@ import java.io.File;
import java.io.FileInputStream ;
import java.io.FileInputStream ;
import java.io.IOException ;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.InputStream ;
import java.text.Collator ;
import java.util.ArrayList ;
import java.util.ArrayList ;
import java.util.HashMap ;
import java.util.HashMap ;
import java.util.HashSet ;
import java.util.HashSet ;
import java.util.List ;
import java.util.List ;
import java.util.Locale ;
import java.util.Map ;
import java.util.Map ;
import java.util.Set ;
import java.util.Set ;
import java.util.TreeMap ;
import java.util.concurrent.ConcurrentHashMap ;
import java.util.TreeSet ;
import net.yacy.document.parser.bzipParser ;
import net.yacy.document.parser.bzipParser ;
import net.yacy.document.parser.csvParser ;
import net.yacy.document.parser.csvParser ;
@ -73,18 +70,11 @@ public final class TextParser {
private static final Log log = new Log ( "PARSER" ) ;
private static final Log log = new Log ( "PARSER" ) ;
// use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Map < String , Idiom > mime2parser = new ConcurrentHashMap < String , Idiom > ( ) ;
private static final Collator insensitiveCollator = Collator . getInstance ( Locale . US ) ;
private static final Map < String , Idiom > ext2parser = new ConcurrentHashMap < String , Idiom > ( ) ;
static {
private static final Map < String , String > ext2mime = new ConcurrentHashMap < String , String > ( ) ;
insensitiveCollator . setStrength ( Collator . SECONDARY ) ;
private static final Map < String , Object > denyMime = new ConcurrentHashMap < String , Object > ( ) ;
insensitiveCollator . setDecomposition ( Collator . NO_DECOMPOSITION ) ;
private static final Map < String , Object > denyExtensionx = new ConcurrentHashMap < String , Object > ( ) ;
}
private static final Map < String , Idiom > mime2parser = new TreeMap < String , Idiom > ( insensitiveCollator ) ;
private static final Map < String , Idiom > ext2parser = new TreeMap < String , Idiom > ( insensitiveCollator ) ;
private static final Map < String , String > ext2mime = new TreeMap < String , String > ( insensitiveCollator ) ;
private static final Set < String > denyMime = new TreeSet < String > ( insensitiveCollator ) ;
private static final Set < String > denyExtension = new TreeSet < String > ( insensitiveCollator ) ;
static {
static {
initParser ( new bzipParser ( ) ) ;
initParser ( new bzipParser ( ) ) ;
@ -130,6 +120,7 @@ public final class TextParser {
}
}
if ( prototypeMime ! = null ) for ( String ext : parser . supportedExtensions ( ) ) {
if ( prototypeMime ! = null ) for ( String ext : parser . supportedExtensions ( ) ) {
ext = ext . toLowerCase ( ) ;
String s = ext2mime . get ( ext ) ;
String s = ext2mime . get ( ext ) ;
if ( s ! = null ) log . logSevere ( "parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'." ) ;
if ( s ! = null ) log . logSevere ( "parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'." ) ;
ext2mime . put ( ext , prototypeMime ) ;
ext2mime . put ( ext , prototypeMime ) ;
@ -137,6 +128,7 @@ public final class TextParser {
for ( String ext : parser . supportedExtensions ( ) ) {
for ( String ext : parser . supportedExtensions ( ) ) {
// process the extensions
// process the extensions
ext = ext . toLowerCase ( ) ;
Idiom p0 = ext2parser . get ( ext ) ;
Idiom p0 = ext2parser . get ( ext ) ;
if ( p0 ! = null ) log . logSevere ( "parser for extension '" + ext + "' was set to '" + p0 . getName ( ) + "', overwriting with new parser '" + parser . getName ( ) + "'." ) ;
if ( p0 ! = null ) log . logSevere ( "parser for extension '" + ext + "' was set to '" + p0 . getName ( ) + "', overwriting with new parser '" + parser . getName ( ) + "'." ) ;
ext2parser . put ( ext , parser ) ;
ext2parser . put ( ext , parser ) ;
@ -318,7 +310,8 @@ public final class TextParser {
String ext = url . getFileExtension ( ) ;
String ext = url . getFileExtension ( ) ;
Idiom idiom ;
Idiom idiom ;
if ( ext ! = null & & ext . length ( ) > 0 ) {
if ( ext ! = null & & ext . length ( ) > 0 ) {
if ( denyExtension . contains ( ext ) ) throw new ParserException ( "file extension '" + ext + "' is denied (1)" , url ) ;
ext = ext . toLowerCase ( ) ;
if ( denyExtensionx . containsKey ( ext ) ) throw new ParserException ( "file extension '" + ext + "' is denied (1)" , url ) ;
idiom = ext2parser . get ( ext ) ;
idiom = ext2parser . get ( ext ) ;
if ( idiom ! = null ) idioms . add ( idiom ) ;
if ( idiom ! = null ) idioms . add ( idiom ) ;
}
}
@ -326,14 +319,14 @@ public final class TextParser {
// check given mime type
// check given mime type
if ( mimeType1 ! = null ) {
if ( mimeType1 ! = null ) {
mimeType1 = normalizeMimeType ( mimeType1 ) ;
mimeType1 = normalizeMimeType ( mimeType1 ) ;
if ( denyMime . contains ( mimeType1 ) ) throw new ParserException ( "mime type '" + mimeType1 + "' is denied (1)" , url ) ;
if ( denyMime . contains Key ( mimeType1 ) ) throw new ParserException ( "mime type '" + mimeType1 + "' is denied (1)" , url ) ;
idiom = mime2parser . get ( mimeType1 ) ;
idiom = mime2parser . get ( mimeType1 ) ;
if ( idiom ! = null & & ! idioms . contains ( idiom ) ) idioms . add ( idiom ) ;
if ( idiom ! = null & & ! idioms . contains ( idiom ) ) idioms . add ( idiom ) ;
}
}
// check mime type computed from extension
// check mime type computed from extension
String mimeType2 = ext2mime . get ( ext ) ;
String mimeType2 = ext2mime . get ( ext ) ;
if ( mimeType2 = = null | | denyMime . contains ( mimeType2 ) ) return idioms ; // in this case we are a bit more lazy
if ( mimeType2 = = null | | denyMime . contains Key ( mimeType2 ) ) return idioms ; // in this case we are a bit more lazy
idiom = mime2parser . get ( mimeType2 ) ;
idiom = mime2parser . get ( mimeType2 ) ;
if ( idiom ! = null & & ! idioms . contains ( idiom ) ) idioms . add ( idiom ) ;
if ( idiom ! = null & & ! idioms . contains ( idiom ) ) idioms . add ( idiom ) ;
@ -346,15 +339,15 @@ public final class TextParser {
public static String supportsMime ( String mimeType ) {
public static String supportsMime ( String mimeType ) {
if ( mimeType = = null ) return null ;
if ( mimeType = = null ) return null ;
mimeType = normalizeMimeType ( mimeType ) ;
mimeType = normalizeMimeType ( mimeType ) ;
if ( denyMime . contains ( mimeType ) ) return "mime type '" + mimeType + "' is denied (2)" ;
if ( denyMime . contains Key ( mimeType ) ) return "mime type '" + mimeType + "' is denied (2)" ;
if ( mime2parser . get ( mimeType ) = = null ) return "no parser for mime '" + mimeType + "' available" ;
if ( mime2parser . get ( mimeType ) = = null ) return "no parser for mime '" + mimeType + "' available" ;
return null ;
return null ;
}
}
public static String supportsExtension ( final DigestURI url ) {
public static String supportsExtension ( final DigestURI url ) {
String ext = url . getFileExtension ( ) ;
String ext = url . getFileExtension ( ) .toLowerCase ( ) ;
if ( ext = = null | | ext . length ( ) = = 0 ) return null ;
if ( ext = = null | | ext . length ( ) = = 0 ) return null ;
if ( denyExtension . contains ( ext ) ) return "file extension '" + ext + "' is denied (2)" ;
if ( denyExtension x . contains Key ( ext ) ) return "file extension '" + ext + "' is denied (2)" ;
String mimeType = ext2mime . get ( ext ) ;
String mimeType = ext2mime . get ( ext ) ;
if ( mimeType = = null ) return "no parser available" ;
if ( mimeType = = null ) return "no parser available" ;
Idiom idiom = mime2parser . get ( mimeType ) ;
Idiom idiom = mime2parser . get ( mimeType ) ;
@ -368,11 +361,12 @@ public final class TextParser {
}
}
public static String mimeOf ( String ext ) {
public static String mimeOf ( String ext ) {
return ext2mime . get ( ext );
return ext2mime . get ( ext .toLowerCase ( ) );
}
}
private static String normalizeMimeType ( String mimeType ) {
private static String normalizeMimeType ( String mimeType ) {
if ( mimeType = = null ) return "application/octet-stream" ;
if ( mimeType = = null ) return "application/octet-stream" ;
mimeType = mimeType . toLowerCase ( ) ;
final int pos = mimeType . indexOf ( ';' ) ;
final int pos = mimeType . indexOf ( ';' ) ;
return ( ( pos < 0 ) ? mimeType . trim ( ) : mimeType . substring ( 0 , pos ) . trim ( ) ) ;
return ( ( pos < 0 ) ? mimeType . trim ( ) : mimeType . substring ( 0 , pos ) . trim ( ) ) ;
}
}
@ -382,13 +376,13 @@ public final class TextParser {
String n ;
String n ;
for ( String s : denyList . split ( "," ) ) {
for ( String s : denyList . split ( "," ) ) {
n = normalizeMimeType ( s ) ;
n = normalizeMimeType ( s ) ;
if ( n ! = null & & n . length ( ) > 0 ) denyMime . add( n ) ;
if ( n ! = null & & n . length ( ) > 0 ) denyMime . put( n , null ) ;
}
}
}
}
public static String getDenyMime ( ) {
public static String getDenyMime ( ) {
String s = "" ;
String s = "" ;
for ( String d : denyMime ) s + = d + "," ;
for ( String d : denyMime .keySet ( ) ) s + = d + "," ;
if ( s . length ( ) > 0 ) s = s . substring ( 0 , s . length ( ) - 1 ) ;
if ( s . length ( ) > 0 ) s = s . substring ( 0 , s . length ( ) - 1 ) ;
return s ;
return s ;
}
}
@ -396,22 +390,22 @@ public final class TextParser {
public static void grantMime ( String mime , boolean grant ) {
public static void grantMime ( String mime , boolean grant ) {
String n = normalizeMimeType ( mime ) ;
String n = normalizeMimeType ( mime ) ;
if ( n = = null | | n . length ( ) = = 0 ) return ;
if ( n = = null | | n . length ( ) = = 0 ) return ;
if ( grant ) denyMime . remove ( n ) ; else denyMime . add( n ) ;
if ( grant ) denyMime . remove ( n ) ; else denyMime . put( n , null ) ;
}
}
public static void setDenyExtension ( String denyList ) {
public static void setDenyExtension ( String denyList ) {
denyExtension . clear ( ) ;
denyExtension x . clear ( ) ;
for ( String s : denyList . split ( "," ) ) denyExtension . add ( s ) ;
for ( String s : denyList . split ( "," ) ) denyExtension x. put ( s , null ) ;
}
}
public static String getDenyExtension ( ) {
public static String getDenyExtension ( ) {
String s = "" ;
String s = "" ;
for ( String d : denyExtension ) s + = d + "," ;
for ( String d : denyExtension x. keySet ( ) ) s + = d + "," ;
s = s . substring ( 0 , s . length ( ) - 1 ) ;
s = s . substring ( 0 , s . length ( ) - 1 ) ;
return s ;
return s ;
}
}
public static void grantExtension ( String ext , boolean grant ) {
public static void grantExtension ( String ext , boolean grant ) {
if ( grant ) denyExtension . remove ( ext ) ; else denyExtension . add ( ext ) ;
if ( grant ) denyExtension x . remove ( ext ) ; else denyExtension x. put ( ext , null ) ;
}
}
}
}