|
|
|
@ -52,6 +52,7 @@ import java.io.FileInputStream;
|
|
|
|
|
import java.io.FilenameFilter;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.io.InputStream;
|
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
|
import java.net.URI;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
@ -600,6 +601,10 @@ public final class plasmaParser {
|
|
|
|
|
}
|
|
|
|
|
return doc;
|
|
|
|
|
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
String errorMsg = "Unsupported charset encoding: " + e.getMessage();
|
|
|
|
|
this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
|
|
|
|
|
throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_UNSUPPORTED_CHARSET);
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
// Interrupted- and Parser-Exceptions should pass through
|
|
|
|
|
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
|
|
|
|