- moved OAI-PMH source list file from SETTINGS to DICTIONARIES/harvesting

- added convenience method for loading of files from the web in LoaderDispatcher

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6455 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 2889b9426e
commit 19f31bb043

@ -36,12 +36,10 @@ import java.util.Set;
import java.util.TreeSet;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.document.parser.csvParser;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.search.Switchboard;
@ -58,7 +56,7 @@ import de.anomic.search.Switchboard;
public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPMHImporter> {
private static int importerCounter = 0;
private static int importerCounter = Integer.MAX_VALUE;
public static TreeSet<OAIPMHImporter> startedJobs = new TreeSet<OAIPMHImporter>();
public static TreeSet<OAIPMHImporter> runningJobs = new TreeSet<OAIPMHImporter>();
@ -73,7 +71,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
private int serialNumber;
public OAIPMHImporter(LoaderDispatcher loader, DigestURI source) {
this.serialNumber = importerCounter++;
this.serialNumber = importerCounter--;
this.loader = loader;
this.recordsCount = 0;
this.chunkCount = 0;
@ -175,7 +173,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
TreeSet<String> list = new TreeSet<String>();
// read roar
File roar = new File(Switchboard.getSwitchboard().getRootPath(), "DATA/SETTINGS/roar.csv");
File roar = new File(Switchboard.getSwitchboard().dictionariesPath, "harvesting/roar.csv");
DigestURI roarSource;
try {
roarSource = new DigestURI("http://roar.eprints.org/index.php?action=csv", null);
@ -185,9 +183,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
}
if (!roar.exists()) try {
// load the file from the net
Response response = loader.load(roarSource, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
byte[] b = response.getContent();
FileUtils.copy(b, roar);
loader.load(roarSource, CrawlProfile.CACHE_STRATEGY_NOCACHE, roar);
} catch (IOException e) {
e.printStackTrace();
}

@ -27,6 +27,7 @@
package net.yacy.repository;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
@ -44,6 +45,7 @@ import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.FTPLoader;
@ -93,8 +95,7 @@ public final class LoaderDispatcher {
public Response load(
final DigestURI url,
final boolean forText,
final boolean global
) throws IOException {
final boolean global) throws IOException {
return load(request(url, forText, global), forText);
}
@ -102,11 +103,28 @@ public final class LoaderDispatcher {
final DigestURI url,
final boolean forText,
final boolean global,
int cacheStratgy
) throws IOException {
int cacheStratgy) throws IOException {
return load(request(url, forText, global), forText, cacheStratgy);
}
public void load(final DigestURI url, int cacheStratgy, File targetFile) throws IOException {
byte[] b = load(url, cacheStratgy);
if (b == null) throw new IOException("load == null");
File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
// transaction-safe writing
File parent = targetFile.getParentFile();
if (!parent.exists()) parent.mkdirs();
FileUtils.copy(b, tmp);
tmp.renameTo(targetFile);
}
public byte[] load(final DigestURI url, int cacheStratgy) throws IOException {
Response response = load(request(url, false, true), false, cacheStratgy);
return response.getContent();
}
/**
* generate a request object
* @param url the target url

Loading…
Cancel
Save