- restructuring to allow different import tasks to be controlled via one gui - adding possibility to import a single assortment file - adding possibility to set the cache size that should be used git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1504 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
56936139ae
commit
6a99304b2b
@ -0,0 +1,113 @@
|
||||
package de.anomic.plasma.dbImport;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
|
||||
public abstract class AbstractImporter extends Thread implements dbImporter{
|
||||
|
||||
protected int jobID;
|
||||
protected String jobType;
|
||||
protected serverLog log;
|
||||
protected boolean stopped = false;
|
||||
protected boolean paused = false;
|
||||
|
||||
protected plasmaSwitchboard sb;
|
||||
protected File importPath;
|
||||
protected int cacheSize;
|
||||
|
||||
protected long globalStart = System.currentTimeMillis();
|
||||
protected long globalEnd;
|
||||
protected String error;
|
||||
|
||||
public AbstractImporter(plasmaSwitchboard theSb) {
|
||||
super(theSb.dbImportManager.runningJobs,"");
|
||||
this.sb = theSb;
|
||||
}
|
||||
|
||||
public String getError() {
|
||||
return this.error;
|
||||
}
|
||||
|
||||
public void init(File theImportPath) {
|
||||
this.importPath = theImportPath;
|
||||
|
||||
this.jobID = this.sb.dbImportManager.getJobID();
|
||||
this.log = new serverLog("IMPORT_" + this.jobType + "_" + this.jobID);
|
||||
this.setName("IMPORT_" + this.jobType + "_" + this.sb.dbImportManager.getJobID());
|
||||
}
|
||||
|
||||
public void startIt() {
|
||||
this.start();
|
||||
}
|
||||
|
||||
public void stopIt() throws InterruptedException {
|
||||
this.stopped = true;
|
||||
this.continueIt();
|
||||
this.join();
|
||||
}
|
||||
|
||||
public void pauseIt() {
|
||||
synchronized(this) {
|
||||
this.paused = true;
|
||||
}
|
||||
}
|
||||
|
||||
public void continueIt() {
|
||||
synchronized(this) {
|
||||
if (this.paused) {
|
||||
this.paused = false;
|
||||
this.notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isPaused() {
|
||||
synchronized(this) {
|
||||
return this.paused;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean isAborted() {
|
||||
synchronized(this) {
|
||||
if (this.paused) {
|
||||
try {
|
||||
this.wait();
|
||||
}
|
||||
catch (InterruptedException e){}
|
||||
}
|
||||
}
|
||||
|
||||
return (this.stopped) || Thread.currentThread().isInterrupted();
|
||||
}
|
||||
|
||||
public boolean isStopped() {
|
||||
return this.isAlive();
|
||||
}
|
||||
|
||||
public int getJobID() {
|
||||
return this.jobID;
|
||||
}
|
||||
|
||||
public long getTotalRuntime() {
|
||||
return (this.globalEnd == 0)?System.currentTimeMillis()-this.globalStart:this.globalEnd-this.globalStart;
|
||||
}
|
||||
|
||||
public long getElapsedTime() {
|
||||
return System.currentTimeMillis()-this.globalStart;
|
||||
}
|
||||
|
||||
public String getJobType() {
|
||||
return this.jobType;
|
||||
}
|
||||
|
||||
public File getImportPath() {
|
||||
return this.importPath;
|
||||
}
|
||||
|
||||
public abstract long getEstimatedTime();
|
||||
public abstract String getJobName();
|
||||
public abstract int getProcessingStatusPercent();
|
||||
|
||||
}
|
@ -0,0 +1,115 @@
|
||||
package de.anomic.plasma.dbImport;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
|
||||
public class dbImportManager {
|
||||
|
||||
public final Vector finishedJobs = new Vector();
|
||||
public final ThreadGroup runningJobs = new ThreadGroup("ImporterThreads");
|
||||
public int currMaxJobNr = 0;
|
||||
private plasmaSwitchboard sb;
|
||||
|
||||
public dbImportManager(plasmaSwitchboard theSb) {
|
||||
this.sb = theSb;
|
||||
}
|
||||
|
||||
public int getJobID() {
|
||||
int jobID;
|
||||
synchronized(runningJobs) {
|
||||
jobID = currMaxJobNr;
|
||||
currMaxJobNr++;
|
||||
}
|
||||
return jobID;
|
||||
}
|
||||
|
||||
public dbImporter[] getRunningImporter() {
|
||||
Thread[] importThreads = new Thread[runningJobs.activeCount()*2];
|
||||
int activeCount = runningJobs.enumerate(importThreads);
|
||||
dbImporter[] importers = new dbImporter[activeCount];
|
||||
for (int i=0; i<activeCount; i++) {
|
||||
importers[i] = (dbImporter) importThreads[i];
|
||||
}
|
||||
return importers;
|
||||
}
|
||||
|
||||
public dbImporter[] getFinishedImporter() {
|
||||
return (dbImporter[]) finishedJobs.toArray(new dbImporter[finishedJobs.size()]);
|
||||
}
|
||||
|
||||
public dbImporter getImporterByID(int jobID) {
|
||||
|
||||
Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
|
||||
int activeCount = this.runningJobs.enumerate(importThreads);
|
||||
|
||||
for (int i=0; i < activeCount; i++) {
|
||||
dbImporter currThread = (dbImporter) importThreads[i];
|
||||
if (currThread.getJobID() == Integer.valueOf(jobID).intValue()) {
|
||||
return currThread;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public dbImporter getNewImporter(String type) {
|
||||
if (type == null) return null;
|
||||
if (type.length() == 0) return null;
|
||||
|
||||
dbImporter newImporter = null;
|
||||
if (type.equals("plasmaDB")) {
|
||||
newImporter = new plasmaDbImporter(this.sb);
|
||||
} else if (type.equalsIgnoreCase("ASSORTMENT")) {
|
||||
newImporter = new plasmaWordIndexAssortmentImporter(this.sb);
|
||||
}
|
||||
return newImporter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can be used to close all still running importer threads
|
||||
* e.g. on server shutdown
|
||||
*/
|
||||
public void close() {
|
||||
/* waiting for all threads to finish */
|
||||
int threadCount = runningJobs.activeCount();
|
||||
Thread[] threadList = new Thread[threadCount];
|
||||
threadCount = runningJobs.enumerate(threadList);
|
||||
|
||||
if (threadCount == 0) return;
|
||||
|
||||
serverLog log = new serverLog("DB-IMPORT");
|
||||
try {
|
||||
// trying to gracefull stop all still running sessions ...
|
||||
log.logInfo("Signaling shutdown to " + threadCount + " remaining dbImporter threads ...");
|
||||
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
|
||||
Thread currentThread = threadList[currentThreadIdx];
|
||||
if (currentThread.isAlive()) {
|
||||
((plasmaDbImporter)currentThread).stopIt();
|
||||
}
|
||||
}
|
||||
|
||||
// waiting a few ms for the session objects to continue processing
|
||||
try { Thread.sleep(500); } catch (InterruptedException ex) {}
|
||||
|
||||
// interrupting all still running or pooled threads ...
|
||||
log.logInfo("Sending interruption signal to " + runningJobs.activeCount() + " remaining dbImporter threads ...");
|
||||
runningJobs.interrupt();
|
||||
|
||||
// we need to use a timeout here because of missing interruptable session threads ...
|
||||
log.logFine("Waiting for " + runningJobs.activeCount() + " remaining dbImporter threads to finish shutdown ...");
|
||||
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
|
||||
Thread currentThread = threadList[currentThreadIdx];
|
||||
if (currentThread.isAlive()) {
|
||||
log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx + "] to finish shutdown.");
|
||||
try { currentThread.join(500); } catch (InterruptedException ex) {}
|
||||
}
|
||||
}
|
||||
|
||||
log.logInfo("Shutdown of remaining dbImporter threads finished.");
|
||||
} catch (Exception e) {
|
||||
log.logSevere("Unexpected error while trying to shutdown all remaining dbImporter threads.",e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package de.anomic.plasma.dbImport;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
|
||||
public interface dbImporter {
|
||||
|
||||
// functions to pause and continue importing
|
||||
public boolean isPaused();
|
||||
public void pauseIt();
|
||||
public void continueIt();
|
||||
public void stopIt() throws InterruptedException;
|
||||
public boolean isStopped();
|
||||
|
||||
// getting status information
|
||||
public long getTotalRuntime();
|
||||
public long getElapsedTime();
|
||||
public long getEstimatedTime();
|
||||
public int getProcessingStatusPercent();
|
||||
|
||||
public int getJobID();
|
||||
public String getJobName();
|
||||
public String getJobType();
|
||||
public File getImportPath();
|
||||
public String getError();
|
||||
public String getStatus();
|
||||
|
||||
public void init(File importPath, int cacheSize);
|
||||
public void startIt();
|
||||
}
|
@ -0,0 +1,122 @@
|
||||
package de.anomic.plasma.dbImport;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.plasma.plasmaWordIndex;
|
||||
import de.anomic.plasma.plasmaWordIndexAssortment;
|
||||
import de.anomic.plasma.plasmaWordIndexEntryContainer;
|
||||
|
||||
public class plasmaWordIndexAssortmentImporter extends AbstractImporter implements dbImporter{
|
||||
|
||||
private int importStartSize;
|
||||
private int wordEntityCount = 0;
|
||||
private int wordEntryCount = 0;
|
||||
|
||||
private File importAssortmentFile;
|
||||
private plasmaWordIndexAssortment assortmentFile;
|
||||
|
||||
public plasmaWordIndexAssortmentImporter(plasmaSwitchboard sb) {
|
||||
super(sb);
|
||||
this.jobType = "ASSORTMENT";
|
||||
}
|
||||
|
||||
public void init(File importAssortmentFile, int cacheSize) {
|
||||
super.init(importAssortmentFile);
|
||||
this.importAssortmentFile = importAssortmentFile;
|
||||
this.cacheSize = cacheSize;
|
||||
if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
|
||||
|
||||
String errorMsg = null;
|
||||
if (!importAssortmentFile.getName().matches("indexAssortment0[0-6][0-9]\\.db")) errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
|
||||
if (!importAssortmentFile.exists()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' does not exist.";
|
||||
else if (importAssortmentFile.isDirectory()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is a directory.";
|
||||
else if (!importAssortmentFile.canRead()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not readable.";
|
||||
else if (!importAssortmentFile.canWrite()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not writeable.";
|
||||
|
||||
|
||||
File importAssortmentPath = null;
|
||||
int assortmentNr = -1;
|
||||
try {
|
||||
importAssortmentPath = new File(importAssortmentFile.getParent());
|
||||
assortmentNr = Integer.valueOf(importAssortmentFile.getName().substring("indexAssortment".length(),"indexAssortment".length()+3)).intValue();
|
||||
if (assortmentNr <1 || assortmentNr > 64) {
|
||||
errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
errorMsg = "Unable to parse the assortment file number.";
|
||||
}
|
||||
|
||||
if (errorMsg != null) {
|
||||
this.log.logSevere(errorMsg);
|
||||
throw new IllegalStateException(errorMsg);
|
||||
}
|
||||
|
||||
|
||||
this.log.logInfo("Initializing source assortment file");
|
||||
this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr,8*1024*1024, this.log);
|
||||
this.importStartSize = this.assortmentFile.size();
|
||||
}
|
||||
|
||||
public long getEstimatedTime() {
|
||||
return (this.wordEntityCount==0)?0:this.assortmentFile.size()*((System.currentTimeMillis()-this.globalStart)/this.wordEntityCount);
|
||||
}
|
||||
|
||||
public String getJobName() {
|
||||
return this.getImportPath().toString();
|
||||
}
|
||||
|
||||
public int getProcessingStatusPercent() {
|
||||
return (this.wordEntityCount)/((this.importStartSize<100)?1:(this.importStartSize)/100);
|
||||
}
|
||||
|
||||
public String getStatus() {
|
||||
StringBuffer theStatus = new StringBuffer();
|
||||
|
||||
theStatus.append("#Word Entities=").append(this.wordEntityCount).append("\n");
|
||||
theStatus.append("#Word Entries=").append(this.wordEntryCount);
|
||||
|
||||
return theStatus.toString();
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
Iterator contentIter = this.assortmentFile.content();
|
||||
while (contentIter.hasNext()) {
|
||||
this.wordEntityCount++;
|
||||
|
||||
byte[][] row = (byte[][]) contentIter.next();
|
||||
String hash = new String(row[0]);
|
||||
plasmaWordIndexEntryContainer container;
|
||||
try {
|
||||
container = this.assortmentFile.row2container(hash, row);
|
||||
} catch (NullPointerException e) {
|
||||
this.log.logWarning("NullpointerException detected in row with hash '" + hash + "'.");
|
||||
if (this.wordEntityCount < this.importStartSize) continue;
|
||||
return;
|
||||
}
|
||||
this.wordEntryCount += container.size();
|
||||
|
||||
// importing entity container to home db
|
||||
this.sb.wordIndex.addEntries(container, true);
|
||||
|
||||
if (this.wordEntityCount % 500 == 0) {
|
||||
this.log.logFine(this.wordEntityCount + " word entities processed so far.");
|
||||
}
|
||||
if (this.wordEntryCount % 2000 == 0) {
|
||||
this.log.logFine(this.wordEntryCount + " word entries processed so far.");
|
||||
}
|
||||
if (isAborted()) break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
this.error = e.toString();
|
||||
this.log.logSevere("Error detected",e);
|
||||
} finally {
|
||||
this.globalEnd = System.currentTimeMillis();
|
||||
this.sb.dbImportManager.finishedJobs.add(this);
|
||||
this.assortmentFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue