very first working version of oai-pmh importer: if given the right url, the importer can read and index listRecord xml files and calculate the right resumptionURL which is then given as next default start point for the importer url input.
no automatic harvesting by now, this will be done later git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6443 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
58616d99e4
commit
350d13e153
@ -1,90 +0,0 @@
|
||||
package net.yacy.document.importer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
public class ResumptionTokenReader extends DefaultHandler {
|
||||
|
||||
// class variables
|
||||
private final StringBuilder buffer;
|
||||
private boolean parsingValue;
|
||||
private ResumptionToken token;
|
||||
private SAXParser saxParser;
|
||||
private InputStream stream;
|
||||
private Attributes atts;
|
||||
|
||||
public ResumptionTokenReader(final InputStream stream) throws IOException {
|
||||
this.buffer = new StringBuilder();
|
||||
this.parsingValue = false;
|
||||
this.token = null;
|
||||
this.stream = stream;
|
||||
this.atts = null;
|
||||
final SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||
try {
|
||||
this.saxParser = factory.newSAXParser();
|
||||
this.saxParser.parse(this.stream, this);
|
||||
} catch (SAXException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (ParserConfigurationException e) {
|
||||
e.printStackTrace();
|
||||
throw new IOException(e.getMessage());
|
||||
} finally {
|
||||
try {
|
||||
this.stream.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ResumptionToken getToken() {
|
||||
return this.token;
|
||||
}
|
||||
|
||||
/*
|
||||
<resumptionToken expirationDate="2009-10-31T22:52:14Z"
|
||||
completeListSize="226"
|
||||
cursor="0">688</resumptionToken>
|
||||
*/
|
||||
|
||||
public void run() {
|
||||
|
||||
}
|
||||
|
||||
public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException {
|
||||
if ("resumptionToken".equals(tag)) {
|
||||
this.parsingValue = true;
|
||||
this.atts = atts;
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(final String uri, final String name, final String tag) {
|
||||
if (tag == null) return;
|
||||
if ("resumptionToken".equals(tag)) {
|
||||
this.token = new ResumptionToken(
|
||||
atts.getValue("expirationDate"),
|
||||
Integer.parseInt(atts.getValue("completeListSize")),
|
||||
Integer.parseInt(atts.getValue("cursor")),
|
||||
Integer.parseInt(buffer.toString().trim()));
|
||||
this.buffer.setLength(0);
|
||||
this.parsingValue = false;
|
||||
}
|
||||
}
|
||||
|
||||
public void characters(final char ch[], final int start, final int length) {
|
||||
if (parsingValue) {
|
||||
buffer.append(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue