- fixes to csv parser

- automatic OAI-PMH import by just clicking on one link from the provided resource list

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6449 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 9b6762ec2e
commit 11f7da06ed

@ -49,7 +49,7 @@ public class IndexImportOAIPMHList_p {
int cnt = 0;
for (String root: oaiRoots) {
prop.put("source_table_" + cnt + "_dark", (dark) ? "1" : "0");
prop.put("source_table_" + cnt + "_source", root);
prop.put("source_table_" + cnt + "_source", "<a href=\"/IndexImportOAIPMH_p.html?importroot=&urlstartall=" + root + "\" target=\"_top\">" + root+ "</a>");
dark = !dark;
cnt++;
}

@ -35,14 +35,14 @@
Import all records that follow according to resumption elements into index<br />
<input name="urlstartall" type="text" value="" size="80" />
<input name="importroot" type="submit" value="import this source" />
or&nbsp;<input name="getlist" type="submit" value="import from a list" />
#(optiongetlist)#::or&nbsp;<input name="getlist" type="submit" value="import from a list" />#(/optiongetlist)#
#(status)#::<p>Import started!</p>::<p>Bad input data: #[message]# </p>#(/status)#
</fieldset>
</form>
<iframe name="OAI-PMH Import List"
src="/IndexImportOAIPMHList_p.html#(iframetype)#::?import=::?source=#(/iframetype)#"
width="100%"
height="340"
height="420"
frameborder="0"
scrolling="auto"
id="list">

@ -44,8 +44,9 @@ public class IndexImportOAIPMH_p {
prop.put("import-one", 0);
prop.put("status", 0);
prop.put("defaulturl", "");
prop.put("iframetype", (OAIPMHImporter.runningJobs.size() + OAIPMHImporter.startedJobs.size() + OAIPMHImporter.finishedJobs.size() == 0) ? 0 : 1);
int jobcount = OAIPMHImporter.runningJobs.size() + OAIPMHImporter.startedJobs.size() + OAIPMHImporter.finishedJobs.size();
prop.put("iframetype", (jobcount == 0) ? 2 : 1);
prop.put("optiongetlist", (jobcount == 0) ? 0 : 1);
if (post != null) {
if (post.containsKey("urlstartone")) {
String oaipmhurl = post.get("urlstartone");
@ -89,6 +90,8 @@ public class IndexImportOAIPMH_p {
OAIPMHImporter job = new OAIPMHImporter(sb.loader, url);
job.start();
prop.put("status", 1);
prop.put("optiongetlist", 1);
prop.put("iframetype", 1);
} catch (MalformedURLException e) {
e.printStackTrace();
prop.put("status", 2);

@ -196,7 +196,9 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
try {
List<String[]> table = parser.getTable(roarSource, "", "UTF-8", new FileInputStream(roar));
for (String[] row: table) {
list.add(row[2]);
if (row.length > 2 && (row[2].startsWith("http://") || row[2].startsWith("https://"))) {
list.add(row[2]);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();

@ -134,6 +134,8 @@ public class csvParser extends AbstractParser implements Idiom {
if (cols.length >= colc.length && cols.length >= colt.length) separator = ";";
if (colt.length >= cols.length && colt.length >= colc.length) separator = "\t";
}
row = stripQuotes(row, '\"', separator.charAt(0), ' ');
row = stripQuotes(row, '\'', separator.charAt(0), ' ');
String[] cols = row.split(separator);
if (columns == -1) columns = cols.length;
//if (cols.length != columns) continue; // skip lines that have the wrong number of columns
@ -144,6 +146,28 @@ public class csvParser extends AbstractParser implements Idiom {
return rows;
}
/**
* remove quotes AND separator characters within the quotes
* to make it possible to split the line using the String.split method
* @param line
* @param quote
* @param separator
* @param replacement
* @return the line without the quotes
*/
public static String stripQuotes(String line, char quote, char separator, char replacement) {
int p, q;
// find left quote
while ((p = line.indexOf(quote)) >= 0) {
q = line.indexOf(quote, p + 1);
if (q < 0) {
// there is only a single quote but no 'right' quote.
// This data is not well-formed. Just remove the quote and give up.
return line.substring(0, p) + line.substring(p + 1);
}
line = line.substring(0, p) + line.substring(p + 1, q).replace(separator, replacement) + line.substring(q + 1);
}
return line;
}
}

Loading…
Cancel
Save