some updates to dublin core, metadata browsing, file indexing and parser stability

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6342 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 62a7341c4d
commit 031e6eefbd

@ -19,7 +19,7 @@
<fieldset><legend>View URL Content</legend> <fieldset><legend>View URL Content</legend>
<dl> <dl>
<dt>URL:</dt> <dd><a href="#[url]#">#[url]#</a></dd> <dt>URL:</dt> <dd><a href="#[url]#">#[url]#</a></dd>
<dt>Hash:</dt> <dd>#[hash]#</dd> <dt>Hash:</dt> <dd><a href="/api/yacydoc.html?urlhash=#[hash]#">#[hash]#</a></dd>
<dt>Word Count:</dt> <dd>#[wordCount]#</dd> <dt>Word Count:</dt> <dd>#[wordCount]#</dd>
<dt>Description:</dt><dd>#[desc]#</dd> <dt>Description:</dt><dd>#[desc]#</dd>
<dt>Size:</dt> <dd>#[size]# Bytes</dd>#(mimeTypeAvailable)#:: <dt>Size:</dt> <dd>#[size]# Bytes</dd>#(mimeTypeAvailable)#::

@ -7,18 +7,18 @@ you can validate it with http://www.stg.brown.edu/service/xmlvalid/
--> -->
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:yacy="http://yacy.net/"> <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:yacy="http://yacy.net/">
<dc:Title LANG="#[dc_language]#">#[dc_title]#</dc:Title> <dc:title LANG="#[dc_language]#">#[dc_title]#</dc:title>
<dc:Creator>#[dc_creator]#</dc:Creator> <dc:creator>#[dc_creator]#</dc:creator>
<dc:Contributor>#[dc_contributor]#</dc:Contributor> <dc:contributor>#[dc_contributor]#</dc:contributor>
<dc:Subject SCHEME="item-list">#[dc_subject]#</dc:Subject> <dc:subject SCHEME="item-list">#[dc_subject]#</dc:subject>
<dc:Description LANG="#[dc_language]#">#[dc_description]#</dc:Description> <dc:description LANG="#[dc_language]#">#[dc_description]#</dc:description>
<dc:Publisher.URL>#[dc_publisher]#</dc:Publisher.URL> <dc:publisher.URL>#[dc_publisher]#</dc:publisher.URL>
<dc:Date>#[dc_date]#</dc:Date> <dc:date>#[dc_date]#</dc:date>
<dc:Type>yacy:doctype:#[dc_type]#</dc:Type> <dc:type>yacy:doctype:#[dc_type]#</dc:type>
<dc:Identifier SCHEME="yacy:urlhash">yacy:urlhash:#[dc_identifier]#</dc:Identifier> <dc:identifier SCHEME="yacy:urlhash">yacy:urlhash:#[dc_identifier]#</dc:identifier>
<dc:Identifier SCHEME="URL">#[dc_publisher]#</dc:Identifier> <dc:identifier SCHEME="URL">#[dc_publisher]#</dc:identifier>
<dc:Format.Extent>#[yacy_size]#</dc:Format.Extent> <dc:format.extent>#[yacy_size]#</dc:format.extent>
<dc:Language SCHEME="ISO639-2">#[dc_language]#</dc:Language> <dc:language SCHEME="ISO639-2">#[dc_language]#</dc:language>
<yacy:loaddate>#[yacy_loaddate]#</yacy:loaddate> <yacy:loaddate>#[yacy_loaddate]#</yacy:loaddate>
<yacy:referrer.hash>yacy:urlhash:#[yacy_referrer_hash]#</yacy:referrer.hash> <yacy:referrer.hash>yacy:urlhash:#[yacy_referrer_hash]#</yacy:referrer.hash>
<yacy:referrer.url>#[yacy_referrer_url]#</yacy:referrer.url> <yacy:referrer.url>#[yacy_referrer_url]#</yacy:referrer.url>

@ -50,6 +50,7 @@ import de.anomic.document.Document;
import de.anomic.kelondro.util.FileUtils; import de.anomic.kelondro.util.FileUtils;
import de.anomic.server.serverCharBuffer; import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyURL;
import de.anomic.yacy.logging.Log;
public class pdfParser extends AbstractParser implements Idiom { public class pdfParser extends AbstractParser implements Idiom {
@ -134,8 +135,11 @@ public class pdfParser extends AbstractParser implements Idiom {
} else { } else {
writer = new serverCharBuffer(); writer = new serverCharBuffer();
} }
try {
stripper.writeText(theDocument, writer ); stripper.writeText(theDocument, writer ); // may throw a NPE
} catch (Exception e) {
Log.logWarning("pdfParser", e.getMessage());
}
theDocument.close(); theDocument = null; theDocument.close(); theDocument = null;
writer.close(); writer.close();

@ -39,6 +39,8 @@ import de.anomic.yacy.yacyURL;
import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hpsf.SummaryInformation;
import de.anomic.yacy.logging.Log;
public class vsdParser extends AbstractParser implements Idiom { public class vsdParser extends AbstractParser implements Idiom {
/** /**
@ -91,7 +93,7 @@ public class vsdParser extends AbstractParser implements Idiom {
contents = extractor.getText(); contents = extractor.getText();
summary = extractor.getSummaryInformation(); summary = extractor.getSummaryInformation();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); Log.logWarning("vsdParser", e.getMessage());
} }
String author = null; String author = null;

@ -124,7 +124,7 @@ public class DocumentIndex extends Segment {
*/ */
public void addAll(File start) { public void addAll(File start) {
assert (start != null); assert (start != null);
assert (start.canRead()); assert (start.canRead()) : start.toString();
if (!start.isDirectory()) { if (!start.isDirectory()) {
try { try {
this.queue.put(start); this.queue.put(start);
@ -192,7 +192,7 @@ public class DocumentIndex extends Segment {
* @return a list of files that contain the word * @return a list of files that contain the word
*/ */
public ArrayList<File> find(String querystring) { public ArrayList<File> find(String querystring) {
return find(querystring, 0, 20); return find(querystring, 0, 100);
} }
/** /**

@ -188,8 +188,6 @@ public class Segment {
wordCount++; wordCount++;
} }
return wordCount; return wordCount;
} }

@ -78,17 +78,23 @@ public class migration {
if(file.exists()) if(file.exists())
delete(file); delete(file);
} }
/*
* copy skins from the release to DATA/SKINS.
*/
public static void installSkins(final Switchboard sb){ public static void installSkins(final Switchboard sb){
final File skinsPath = sb.getConfigPath("skinPath", "DATA/SKINS"); final File skinsPath = sb.getConfigPath("skinPath", "DATA/SKINS");
final File defaultSkinsPath = new File(sb.getRootPath(), "skins"); final File defaultSkinsPath = new File(sb.getRootPath(), "skins");
if(defaultSkinsPath.exists()){ if (defaultSkinsPath.exists()) {
final List<String> skinFiles = listManager.getDirListing(defaultSkinsPath.getAbsolutePath()); final List<String> skinFiles = listManager.getDirListing(defaultSkinsPath.getAbsolutePath());
mkdirs(skinsPath); mkdirs(skinsPath);
for(String skinFile : skinFiles){ for (String skinFile : skinFiles){
if(skinFile.endsWith(".css")){ if (skinFile.endsWith(".css")){
try{ File from = new File(defaultSkinsPath, skinFile);
FileUtils.copy(new File(defaultSkinsPath, skinFile), new File(skinsPath, skinFile)); File to = new File(skinsPath, skinFile);
}catch(final IOException e){} if (from.lastModified() > to.lastModified()) try {
FileUtils.copy(from, to);
} catch (final IOException e) {}
} }
} }
} }

Loading…
Cancel
Save