some updates to dublin core, metadata browsing, file indexing and parser stability

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6342 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 62a7341c4d
commit 031e6eefbd

@ -19,7 +19,7 @@
<fieldset><legend>View URL Content</legend>
<dl>
<dt>URL:</dt> <dd><a href="#[url]#">#[url]#</a></dd>
<dt>Hash:</dt> <dd>#[hash]#</dd>
<dt>Hash:</dt> <dd><a href="/api/yacydoc.html?urlhash=#[hash]#">#[hash]#</a></dd>
<dt>Word Count:</dt> <dd>#[wordCount]#</dd>
<dt>Description:</dt><dd>#[desc]#</dd>
<dt>Size:</dt> <dd>#[size]# Bytes</dd>#(mimeTypeAvailable)#::

@ -7,18 +7,18 @@ you can validate it with http://www.stg.brown.edu/service/xmlvalid/
-->
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:yacy="http://yacy.net/">
<dc:Title LANG="#[dc_language]#">#[dc_title]#</dc:Title>
<dc:Creator>#[dc_creator]#</dc:Creator>
<dc:Contributor>#[dc_contributor]#</dc:Contributor>
<dc:Subject SCHEME="item-list">#[dc_subject]#</dc:Subject>
<dc:Description LANG="#[dc_language]#">#[dc_description]#</dc:Description>
<dc:Publisher.URL>#[dc_publisher]#</dc:Publisher.URL>
<dc:Date>#[dc_date]#</dc:Date>
<dc:Type>yacy:doctype:#[dc_type]#</dc:Type>
<dc:Identifier SCHEME="yacy:urlhash">yacy:urlhash:#[dc_identifier]#</dc:Identifier>
<dc:Identifier SCHEME="URL">#[dc_publisher]#</dc:Identifier>
<dc:Format.Extent>#[yacy_size]#</dc:Format.Extent>
<dc:Language SCHEME="ISO639-2">#[dc_language]#</dc:Language>
<dc:title LANG="#[dc_language]#">#[dc_title]#</dc:title>
<dc:creator>#[dc_creator]#</dc:creator>
<dc:contributor>#[dc_contributor]#</dc:contributor>
<dc:subject SCHEME="item-list">#[dc_subject]#</dc:subject>
<dc:description LANG="#[dc_language]#">#[dc_description]#</dc:description>
<dc:publisher.URL>#[dc_publisher]#</dc:publisher.URL>
<dc:date>#[dc_date]#</dc:date>
<dc:type>yacy:doctype:#[dc_type]#</dc:type>
<dc:identifier SCHEME="yacy:urlhash">yacy:urlhash:#[dc_identifier]#</dc:identifier>
<dc:identifier SCHEME="URL">#[dc_publisher]#</dc:identifier>
<dc:format.extent>#[yacy_size]#</dc:format.extent>
<dc:language SCHEME="ISO639-2">#[dc_language]#</dc:language>
<yacy:loaddate>#[yacy_loaddate]#</yacy:loaddate>
<yacy:referrer.hash>yacy:urlhash:#[yacy_referrer_hash]#</yacy:referrer.hash>
<yacy:referrer.url>#[yacy_referrer_url]#</yacy:referrer.url>

@ -50,6 +50,7 @@ import de.anomic.document.Document;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.logging.Log;
public class pdfParser extends AbstractParser implements Idiom {
@ -134,8 +135,11 @@ public class pdfParser extends AbstractParser implements Idiom {
} else {
writer = new serverCharBuffer();
}
stripper.writeText(theDocument, writer );
try {
stripper.writeText(theDocument, writer ); // may throw a NPE
} catch (Exception e) {
Log.logWarning("pdfParser", e.getMessage());
}
theDocument.close(); theDocument = null;
writer.close();

@ -39,6 +39,8 @@ import de.anomic.yacy.yacyURL;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.SummaryInformation;
import de.anomic.yacy.logging.Log;
public class vsdParser extends AbstractParser implements Idiom {
/**
@ -91,7 +93,7 @@ public class vsdParser extends AbstractParser implements Idiom {
contents = extractor.getText();
summary = extractor.getSummaryInformation();
} catch (Exception e) {
e.printStackTrace();
Log.logWarning("vsdParser", e.getMessage());
}
String author = null;

@ -124,7 +124,7 @@ public class DocumentIndex extends Segment {
*/
public void addAll(File start) {
assert (start != null);
assert (start.canRead());
assert (start.canRead()) : start.toString();
if (!start.isDirectory()) {
try {
this.queue.put(start);
@ -192,7 +192,7 @@ public class DocumentIndex extends Segment {
* @return a list of files that contain the word
*/
public ArrayList<File> find(String querystring) {
return find(querystring, 0, 20);
return find(querystring, 0, 100);
}
/**

@ -188,8 +188,6 @@ public class Segment {
wordCount++;
}
return wordCount;
}

@ -78,6 +78,10 @@ public class migration {
if(file.exists())
delete(file);
}
/*
* copy skins from the release to DATA/SKINS.
*/
public static void installSkins(final Switchboard sb){
final File skinsPath = sb.getConfigPath("skinPath", "DATA/SKINS");
final File defaultSkinsPath = new File(sb.getRootPath(), "skins");
@ -86,8 +90,10 @@ public class migration {
mkdirs(skinsPath);
for (String skinFile : skinFiles){
if (skinFile.endsWith(".css")){
try{
FileUtils.copy(new File(defaultSkinsPath, skinFile), new File(skinsPath, skinFile));
File from = new File(defaultSkinsPath, skinFile);
File to = new File(skinsPath, skinFile);
if (from.lastModified() > to.lastModified()) try {
FileUtils.copy(from, to);
} catch (final IOException e) {}
}
}

Loading…
Cancel
Save