Add a responsHeader to the solr index export with a format identifier

and export parameter (in accordance with response xml format) for easier
format detection on import.
pull/122/head
reger 8 years ago
parent 9697209ef6
commit 9ad4d16829

@ -176,7 +176,13 @@ public class DCEntry extends MultiMapSolrParams {
}
}
private static String bestU(String[] urls) {
/**
* From the given list of urls selects the best to access the resource,
* possibly expanding short identifier for doi nbn or itef with resolved url
* @param urls list of identifier / url
* @return best/resolved url
*/
private String bestU(String[] urls) {
if (urls.length > 1) { // with only one ... no choice
for (String uu: urls) {
if (uu.startsWith("http://") && (uu.endsWith(".html") || uu.endsWith(".htm") || uu.endsWith(".pdf") || uu.endsWith(".doc") || uu.endsWith(".rss") || uu.endsWith(".xml"))) return uu;

@ -180,17 +180,23 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
}
/**
* Check for format string in responseHeader "yacy.index.export.solr.xml"
* (introduced v1.92/9188 2017-04-30) or guess format by existing "<respons>"
* and "<result>" or "<doc>" tag in the first 1024 characters.
*
* @return true when inputStream is likely to contain a rich and full-text Solr xml data dump (see IndexExport_p.html)
*/
private boolean isSolrDump() {
boolean res = false;
byte[] b = new byte[100];
byte[] b = new byte[1024];
int nbRead = -1;
try {
nbRead = this.inputStream.read(b);
if (nbRead > 0) {
String s = new String(b, 0, nbRead, StandardCharsets.UTF_8);
if ((s.contains("<response>") && s.contains("<result>")) || s.startsWith("<doc>")) {
if (s.contains("format=\"yacy.index.export.solr.xml\"")) {
res = true;
} else if ((s.contains("<response>") && s.contains("<result>")) || s.startsWith("<doc>")) {
res = true;
}
}

@ -803,6 +803,12 @@ public final class Fulltext {
if (this.format == ExportFormat.solr) {
pw.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
pw.println("<response>");
pw.println("<lst name=\"responseHeader\">");
pw.println(" <str format=\"yacy.index.export.solr.xml\"/>");
pw.println(" <lst name=\"params\"");
pw.println(" <str name=\"q\">" + this.query + "</str>");
pw.println(" </lst>");
pw.println("</lst>");
pw.println("<result>");
}
if (this.dom) {

Loading…
Cancel
Save