- moved OAI-PMH source list file from SETTINGS to DICTIONARIES/harvesting

- added convenience method for loading of files from the web in LoaderDispatcher

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6455 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 2889b9426e
commit 19f31bb043

@ -36,12 +36,10 @@ import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.document.parser.csvParser; import net.yacy.document.parser.csvParser;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
@ -58,7 +56,7 @@ import de.anomic.search.Switchboard;
public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPMHImporter> { public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPMHImporter> {
private static int importerCounter = 0; private static int importerCounter = Integer.MAX_VALUE;
public static TreeSet<OAIPMHImporter> startedJobs = new TreeSet<OAIPMHImporter>(); public static TreeSet<OAIPMHImporter> startedJobs = new TreeSet<OAIPMHImporter>();
public static TreeSet<OAIPMHImporter> runningJobs = new TreeSet<OAIPMHImporter>(); public static TreeSet<OAIPMHImporter> runningJobs = new TreeSet<OAIPMHImporter>();
@ -73,7 +71,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
private int serialNumber; private int serialNumber;
public OAIPMHImporter(LoaderDispatcher loader, DigestURI source) { public OAIPMHImporter(LoaderDispatcher loader, DigestURI source) {
this.serialNumber = importerCounter++; this.serialNumber = importerCounter--;
this.loader = loader; this.loader = loader;
this.recordsCount = 0; this.recordsCount = 0;
this.chunkCount = 0; this.chunkCount = 0;
@ -175,7 +173,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
TreeSet<String> list = new TreeSet<String>(); TreeSet<String> list = new TreeSet<String>();
// read roar // read roar
File roar = new File(Switchboard.getSwitchboard().getRootPath(), "DATA/SETTINGS/roar.csv"); File roar = new File(Switchboard.getSwitchboard().dictionariesPath, "harvesting/roar.csv");
DigestURI roarSource; DigestURI roarSource;
try { try {
roarSource = new DigestURI("http://roar.eprints.org/index.php?action=csv", null); roarSource = new DigestURI("http://roar.eprints.org/index.php?action=csv", null);
@ -185,9 +183,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
} }
if (!roar.exists()) try { if (!roar.exists()) try {
// load the file from the net // load the file from the net
Response response = loader.load(roarSource, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE); loader.load(roarSource, CrawlProfile.CACHE_STRATEGY_NOCACHE, roar);
byte[] b = response.getContent();
FileUtils.copy(b, roar);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }

@ -27,6 +27,7 @@
package net.yacy.repository; package net.yacy.repository;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Writer; import java.io.Writer;
@ -44,6 +45,7 @@ import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter; import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.FTPLoader; import de.anomic.crawler.retrieval.FTPLoader;
@ -93,8 +95,7 @@ public final class LoaderDispatcher {
public Response load( public Response load(
final DigestURI url, final DigestURI url,
final boolean forText, final boolean forText,
final boolean global final boolean global) throws IOException {
) throws IOException {
return load(request(url, forText, global), forText); return load(request(url, forText, global), forText);
} }
@ -102,11 +103,28 @@ public final class LoaderDispatcher {
final DigestURI url, final DigestURI url,
final boolean forText, final boolean forText,
final boolean global, final boolean global,
int cacheStratgy int cacheStratgy) throws IOException {
) throws IOException {
return load(request(url, forText, global), forText, cacheStratgy); return load(request(url, forText, global), forText, cacheStratgy);
} }
public void load(final DigestURI url, int cacheStratgy, File targetFile) throws IOException {
byte[] b = load(url, cacheStratgy);
if (b == null) throw new IOException("load == null");
File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
// transaction-safe writing
File parent = targetFile.getParentFile();
if (!parent.exists()) parent.mkdirs();
FileUtils.copy(b, tmp);
tmp.renameTo(targetFile);
}
public byte[] load(final DigestURI url, int cacheStratgy) throws IOException {
Response response = load(request(url, false, true), false, cacheStratgy);
return response.getContent();
}
/** /**
* generate a request object * generate a request object
* @param url the target url * @param url the target url

Loading…
Cancel
Save