|
|
@ -29,16 +29,20 @@ public class ParserTest {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
try {
|
|
|
|
FileInputStream inStream = null;
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
|
|
|
|
try {
|
|
|
|
final File file = new File(filename);
|
|
|
|
final File file = new File(filename);
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
|
|
|
|
|
|
|
|
AbstractParser p = new ooxmlParser();
|
|
|
|
AbstractParser p = new ooxmlParser();
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, new FileInputStream(file));
|
|
|
|
inStream = new FileInputStream(file);
|
|
|
|
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
Reader content = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
int c;
|
|
|
|
int c;
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
@ -49,8 +53,27 @@ public class ParserTest {
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(content != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close text input stream");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (final InterruptedException ex) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(inStream != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close input stream on file " + filename);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (final InterruptedException ex) {}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -63,16 +86,20 @@ public class ParserTest {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
try {
|
|
|
|
FileInputStream inStream = null;
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
|
|
|
|
try {
|
|
|
|
final File file = new File(filename);
|
|
|
|
final File file = new File(filename);
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
|
|
|
|
|
|
|
|
AbstractParser p = new odtParser();
|
|
|
|
AbstractParser p = new odtParser();
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, new FileInputStream(file));
|
|
|
|
inStream = new FileInputStream(file);
|
|
|
|
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
Reader content = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
int c;
|
|
|
|
int c;
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
@ -83,8 +110,26 @@ public class ParserTest {
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(content != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close text input stream");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (final InterruptedException ex) {
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(inStream != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close input stream on file " + filename);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (final InterruptedException ex) {}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -95,16 +140,20 @@ public class ParserTest {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
try {
|
|
|
|
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
|
|
|
|
FileInputStream inStream = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
final File file = new File(filename);
|
|
|
|
final File file = new File(filename);
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
|
|
|
|
|
|
|
|
AbstractParser p = new pdfParser();
|
|
|
|
AbstractParser p = new pdfParser();
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, new FileInputStream(file));
|
|
|
|
inStream = new FileInputStream(file);
|
|
|
|
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
Reader content = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
int c;
|
|
|
|
int c;
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
@ -115,8 +164,26 @@ public class ParserTest {
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(content != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close text input stream");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (final InterruptedException ex) {
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(inStream != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close input stream on file " + filename);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (final InterruptedException ex) {}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -127,16 +194,20 @@ public class ParserTest {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
for (final String[] testFile : testFiles) {
|
|
|
|
try {
|
|
|
|
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
final String filename = "test/parsertest/" + testFile[0];
|
|
|
|
|
|
|
|
FileInputStream inStream = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
final File file = new File(filename);
|
|
|
|
final File file = new File(filename);
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final String mimetype = testFile[1];
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/"+filename);
|
|
|
|
|
|
|
|
|
|
|
|
AbstractParser p = new docParser();
|
|
|
|
AbstractParser p = new docParser();
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, new FileInputStream(file));
|
|
|
|
inStream = new FileInputStream(file);
|
|
|
|
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
for (final Document doc: docs) {
|
|
|
|
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
Reader content = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
int c;
|
|
|
|
int c;
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
|
while( (c = content.read()) != -1 )
|
|
|
@ -147,8 +218,26 @@ public class ParserTest {
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_title(), containsString(testFile[2]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
if (testFile[4].length() > 0) assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(content != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close text input stream");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (final InterruptedException ex) {
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(inStream != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close input stream on file " + filename);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (final InterruptedException ex) {}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -172,9 +261,14 @@ public class ParserTest {
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/" + filename);
|
|
|
|
final AnchorURL url = new AnchorURL("http://localhost/" + filename);
|
|
|
|
|
|
|
|
|
|
|
|
AbstractParser p = new pptParser();
|
|
|
|
AbstractParser p = new pptParser();
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, new FileInputStream(file));
|
|
|
|
FileInputStream inStream = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream = new FileInputStream(file);
|
|
|
|
|
|
|
|
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
|
|
|
|
for (final Document doc : docs) {
|
|
|
|
for (final Document doc : docs) {
|
|
|
|
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
Reader content = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
final StringBuilder str = new StringBuilder();
|
|
|
|
int c;
|
|
|
|
int c;
|
|
|
|
while ((c = content.read()) != -1) {
|
|
|
|
while ((c = content.read()) != -1) {
|
|
|
@ -188,6 +282,25 @@ public class ParserTest {
|
|
|
|
if (testFile[4].length() > 0) {
|
|
|
|
if (testFile[4].length() > 0) {
|
|
|
|
assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(content != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
content.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close text input stream");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
|
|
|
|
if(inStream != null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
inStream.close();
|
|
|
|
|
|
|
|
} catch(IOException ioe) {
|
|
|
|
|
|
|
|
System.out.println("Could not close input stream on file " + filename);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|