* added class ListDirs to provoid generic listing of directories in systemdirectories and jar-files

* yacy runs, when classes are in a jar-file (->build-jar ant-target)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4971 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 17 years ago
parent f2e2d09916
commit b6301a54fa

@ -929,6 +929,7 @@
<pathelement location="${htroot}"/>
<pathelement location="${lib}/tar.jar" />
<pathelement location="${libx}" />
<fileset dir="${lib}" includes="**/*.jar" />
<fileset dir="${libx}" includes="**/*.jar" />
</classpath>
<arg line="-stop"/>

@ -38,6 +38,7 @@ import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
@ -49,6 +50,8 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import de.anomic.crawler.ErrorURL;
import de.anomic.htmlFilter.htmlFilterContentScraper;
@ -63,6 +66,7 @@ import de.anomic.plasma.parser.ParserInfo;
import de.anomic.server.serverFileUtils;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
import de.anomic.tools.ListDirs;
public final class plasmaParser {
public static final String PARSER_MODE_PROXY = "PROXY";
@ -438,87 +442,83 @@ public final class plasmaParser {
serverLog.logInfo("PARSER","Searching for additional content parsers in package " + plasmaParserPkgName);
// getting an uri to the parser subpackage
String packageURI = plasmaParser.class.getResource("/"+plasmaParserPkgName.replace('.','/')).toString();
String packageURI = plasmaParser.class.getResource("/"+plasmaParserPkgName.replace('.','/')).toString() + "/";
serverLog.logFine("PARSER", "Parser directory is " + packageURI);
// open the parser directory
File parserDir = new File(new URI(packageURI));
if ((parserDir == null) || (!parserDir.exists()) || (!parserDir.isDirectory())) return;
/*
* loop through all subdirectories and test if we can
* find an additional parser class
*/
File[] parserDirectories = parserDir.listFiles(parserDirectoryFilter);
if (parserDirectories == null) return;
for (int parserDirNr=0; parserDirNr< parserDirectories.length; parserDirNr++) {
File currentDir = parserDirectories[parserDirNr];
serverLog.logFine("PARSER", "Searching in directory " + currentDir.toString());
String[] parserClasses = currentDir.list(parserFileNameFilter);
if (parserClasses == null) continue;
ListDirs parserDir = new ListDirs(packageURI);
ArrayList<String> parserClasses = parserDir.listFiles(".*/parser/[^/]+/[^/]+Parser\\.class");
if (parserClasses == null) return;
final Pattern patternGetClassName = Pattern.compile(".*/([^/]+)\\.class");
final Pattern patternGetFullClassName = Pattern.compile(".*(/[^/]+/[^/]+)\\.class");
for (int parserNr=0; parserNr<parserClasses.length; parserNr++) {
serverLog.logFine("PARSER", "Testing parser class " + parserClasses[parserNr]);
String className = parserClasses[parserNr].substring(0,parserClasses[parserNr].indexOf(".class"));
String fullClassName = plasmaParserPkgName + "." + currentDir.getName() + "." + className;
try {
// trying to load the parser class by its name
Class<?> parserClass = Class.forName(fullClassName);
Object theParser0 = parserClass.newInstance();
if (!(theParser0 instanceof Parser)) continue;
Parser theParser = (Parser) theParser0;
// testing if all needed libx libraries are available
String[] neededLibx = theParser.getLibxDependences();
StringBuffer neededLibxBuf = new StringBuffer();
if (neededLibx != null) {
for (int libxId=0; libxId < neededLibx.length; libxId++) {
if (javaClassPath.indexOf(neededLibx[libxId]) == -1) {
throw new Exception("Missing dependency detected: '" + neededLibx[libxId] + "'.");
}
neededLibxBuf.append(neededLibx[libxId])
.append(",");
}
if (neededLibxBuf.length()>0) neededLibxBuf.deleteCharAt(neededLibxBuf.length()-1);
}
// loading the list of mime-types that are supported by this parser class
Hashtable<String, String> supportedMimeTypes = theParser.getSupportedMimeTypes();
// creating a parser info object
ParserInfo parserInfo = new ParserInfo();
parserInfo.parserClass = parserClass;
parserInfo.parserClassName = fullClassName;
parserInfo.libxDependencies = neededLibx;
parserInfo.supportedMimeTypes = supportedMimeTypes;
parserInfo.parserVersionNr = (theParser).getVersion();
parserInfo.parserName = (theParser).getName();
Iterator<String> mimeTypeIterator = supportedMimeTypes.keySet().iterator();
while (mimeTypeIterator.hasNext()) {
String mimeType = mimeTypeIterator.next();
availableParserList.put(mimeType, parserInfo);
serverLog.logInfo("PARSER", "Found functional parser for mimeType '" + mimeType + "'." +
"\n\tName: " + parserInfo.parserName +
"\n\tVersion: " + parserInfo.parserVersionNr +
"\n\tClass: " + parserInfo.parserClassName +
((neededLibxBuf.length()>0)?"\n\tDependencies: " + neededLibxBuf.toString():""));
}
} catch (Exception e) { /* we can ignore this for the moment */
serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
e.printStackTrace();
} catch (Error e) { /* we can ignore this for the moment */
serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
e.printStackTrace();
}
}
}
for (String parserClassFile: parserClasses) {
serverLog.logFine("PARSER", "Testing parser class " + parserClassFile);
Matcher matcherClassName = patternGetClassName.matcher(parserClassFile);
matcherClassName.find();
String className = matcherClassName.group(1);
Matcher matcherFullClassName = patternGetFullClassName.matcher(parserClassFile);
matcherFullClassName.find();
String fullClassName = plasmaParserPkgName + matcherFullClassName.group(1).replace("/", ".");
try {
// trying to load the parser class by its name
Class<?> parserClass = Class.forName(fullClassName);
Object theParser0 = parserClass.newInstance();
if (!(theParser0 instanceof Parser)) continue;
Parser theParser = (Parser) theParser0;
// testing if all needed libx libraries are available
String[] neededLibx = theParser.getLibxDependences();
StringBuffer neededLibxBuf = new StringBuffer();
if (neededLibx != null) {
for (int libxId=0; libxId < neededLibx.length; libxId++) {
if (javaClassPath.indexOf(neededLibx[libxId]) == -1) {
throw new Exception("Missing dependency detected: '" + neededLibx[libxId] + "'.");
}
neededLibxBuf.append(neededLibx[libxId])
.append(",");
}
if (neededLibxBuf.length()>0) neededLibxBuf.deleteCharAt(neededLibxBuf.length()-1);
}
// loading the list of mime-types that are supported by this parser class
Hashtable<String, String> supportedMimeTypes = theParser.getSupportedMimeTypes();
// creating a parser info object
ParserInfo parserInfo = new ParserInfo();
parserInfo.parserClass = parserClass;
parserInfo.parserClassName = fullClassName;
parserInfo.libxDependencies = neededLibx;
parserInfo.supportedMimeTypes = supportedMimeTypes;
parserInfo.parserVersionNr = (theParser).getVersion();
parserInfo.parserName = (theParser).getName();
Iterator<String> mimeTypeIterator = supportedMimeTypes.keySet().iterator();
while (mimeTypeIterator.hasNext()) {
String mimeType = mimeTypeIterator.next();
availableParserList.put(mimeType, parserInfo);
serverLog.logInfo("PARSER", "Found functional parser for mimeType '" + mimeType + "'." +
"\n\tName: " + parserInfo.parserName +
"\n\tVersion: " + parserInfo.parserVersionNr +
"\n\tClass: " + parserInfo.parserClassName +
((neededLibxBuf.length()>0)?"\n\tDependencies: " + neededLibxBuf.toString():""));
}
} catch (Exception e) { /* we can ignore this for the moment */
serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
e.printStackTrace();
} catch (Error e) { /* we can ignore this for the moment */
serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
e.printStackTrace();
}
}
} catch (Exception e) {
serverLog.logSevere("PARSER", "Unable to determine all installed parsers. " + e.getMessage());
serverLog.logSevere("PARSER", "Unable to determine all installed parsers. " + e.toString());
}
}

@ -46,18 +46,22 @@ package de.anomic.server.logging;
import java.io.File;
import java.io.FilenameFilter;
import java.net.URI;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import java.util.ArrayList;
import java.util.logging.Handler;
import java.util.logging.LogManager;
import java.util.logging.LogRecord;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import de.anomic.plasma.plasmaParser;
import de.anomic.server.logging.logParsers.LogParser;
import de.anomic.tools.ListDirs;
public class LogalizerHandler extends Handler {
@ -80,17 +84,17 @@ public class LogalizerHandler extends Handler {
String packageURI = plasmaParser.class.getResource("/"+logParserPackage.replace('.','/')).toString();
if (debug) System.out.println("LogParser directory is " + packageURI);
File parserDir = new File(new URI(packageURI));
//System.out.println(parserDir.toString());
String [] parserDirFiles = parserDir.list(parserNameFilter);
if(parserDirFiles == null && debug) {
System.err.println("Can't find any parsers in "+parserDir.getAbsolutePath());
parserDirFiles = new String[0];
ListDirs parserDir = new ListDirs(packageURI);
ArrayList<String> parserDirFiles = parserDir.listFiles(".*\\.class");
if(parserDirFiles.size() == 0 && debug) {
System.out.println("Can't find any parsers in "+parserDir.toString());
}
//System.out.println(parserDirFiles.length);
for (int i=0; i<parserDirFiles.length; i++) {
String tmp = parserDirFiles[i].substring(0,parserDirFiles[i].indexOf(".class"));
Class<?> tempClass = Class.forName(logParserPackage+"."+tmp);
for(String filename: parserDirFiles) {
final Pattern patternGetClassName = Pattern.compile(".*/([^/]+)\\.class");
Matcher matcherClassName = patternGetClassName.matcher(filename);
matcherClassName.find();
String className = matcherClassName.group(1);
Class<?> tempClass = Class.forName(logParserPackage+"."+className);
if (tempClass.isInterface()) {
if (debug) System.out.println(tempClass.getName() + " is an Interface");
} else {
@ -115,6 +119,8 @@ public class LogalizerHandler extends Handler {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}

@ -0,0 +1,108 @@
// listDirs.java
// (C) 2008 by Florian Richter <Florian_Richter@gmx.de>
// first published 06.07.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: $
// $LastChangedRevision: $
// $LastChangedBy: $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.tools;
import java.io.File;
import java.io.FilenameFilter;
import java.io.FileFilter;
import java.io.InputStream;
import java.util.jar.JarFile;
import java.util.jar.JarEntry;
import java.util.Enumeration;
import java.util.ArrayList;
import java.net.URI;
import java.net.URISyntaxException;
import java.io.IOException;
import de.anomic.server.logging.serverLog;
public class ListDirs {
private boolean isJar = false;
private File FileObject = null;
private JarFile JarFileObject = null;
private String uri;
private String pathInJar;
public ListDirs(String uri) throws IOException, URISyntaxException {
this.uri = uri;
if(uri.startsWith("jar:")) {
isJar = true;
JarFileObject = new JarFile(uri.substring(9, uri.indexOf('!')));
pathInJar = uri.substring(uri.indexOf('!') + 2);
} else {
FileObject = new File(new URI(uri));
}
}
public ArrayList<String> listFiles(String regex) {
ArrayList<String> files = getAllFiles();
ArrayList<String> classes = new ArrayList<String>();
for(String file: files) {
if(file.matches(regex)) {
classes.add(file);
}
}
return classes;
}
private ArrayList<String> getAllFiles() {
ArrayList<String> files = new ArrayList<String>(50);
if(isJar) {
Enumeration entries = JarFileObject.entries();
while(entries.hasMoreElements()) {
JarEntry entry = (JarEntry)entries.nextElement();
String entryname = entry.getName();
if(entryname.startsWith(pathInJar) && entryname.charAt(entryname.length()-1)!='/') {
files.add(entryname);
}
}
} else {
for(File file: getFilesRecursive(FileObject)) {
files.add(file.toString());
}
}
return files;
}
private ArrayList<File> getFilesRecursive(File start) {
File[] fileList = start.listFiles();
ArrayList<File> completeList = new ArrayList<File>();
for(File file: fileList) {
if(file.isDirectory()) {
completeList.addAll(getFilesRecursive(file));
} else {
completeList.add(file);
}
}
return completeList;
}
public String toString() {
return this.uri;
}
}
Loading…
Cancel
Save