- fixes to doc, ppt, xls parser: better title

- fixes to httpd server response header generation
- fixes to a server date computation bug
- new Button in indexControl to view content of url in ViewFile


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5576 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 7936e58fe7
commit b57c9da1f8

@ -123,6 +123,12 @@
Click the API icon to see an example call to the search rss API.
To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de/wiki/index.php/Dev:API">API wiki page</a>.</span>
</div>
<form action="ViewFile.html" method="get">
<input type="hidden" name="viewMode" value="parsed" />
<input type="hidden" name="show" value="Show" />
<input type="hidden" name="urlHash" value="#[urlhash]#" />
<input type="submit" value="Show Content" name="showcontent" /><br />
</form>
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data">
<input type="hidden" name="keystring" value="" />
<input type="hidden" name="keyhash" value="" />

@ -102,7 +102,7 @@ public class ViewFile {
return prop;
}
// gettin the url that belongs to the entry
// getting the url that belongs to the entry
final indexURLReference.Components comp = urlEntry.comp();
if ((comp == null) || (comp.url() == null)) {
prop.put("error", "3");

@ -84,7 +84,7 @@ public class yacydoc {
prop.putXML("dc_publisher", comp.url().toNormalform(false, true));
prop.putXML("dc_contributor", "");
prop.putXML("dc_date", entry.moddate().toString());
prop.put("dc_type", entry.doctype());
prop.putXML("dc_type", "" + entry.doctype());
prop.putXML("dc_identifier", urlhash);
prop.putXML("dc_language", entry.language());

@ -73,7 +73,6 @@ public class httpHeader extends TreeMap<String, String> implements Map<String, S
* ============================================================= */
// TODO: sort these header properties into request and response properties (some are both)
public static final String HOST = "Host";
public static final String USER_AGENT = "User-Agent";
@ -93,6 +92,7 @@ public class httpHeader extends TreeMap<String, String> implements Map<String, S
public static final String CACHE_CONTROL = "Cache-Control";
public static final String DATE = "Date";
public static final String LAST_MODIFIED = "Last-modified";
public static final String SERVER = "Server";
public static final String ACCEPT_RANGES = "Accept-Ranges";
@ -105,7 +105,15 @@ public class httpHeader extends TreeMap<String, String> implements Map<String, S
public static final String X_FORWARDED_FOR = "X-Forwarded-For";
public static final String X_YACY_INDEX_CONTROL = "X-YACY-Index-Control";
public static final String X_YACY_PREVIOUS_REQUEST_LINE = "X-Previous-Request-Line";
public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count";
public static final String X_YACY_ORIGINAL_REQUEST_LINE = "X-Original-Request-Line";
public static final String SET_COOKIE = "Set-Cookie";
public static final String SET_COOKIE2 = "Set-Cookie2";
public static final String EXPIRES = "Expires";
/* =============================================================
* Constants for content-encodings

@ -34,16 +34,7 @@ import de.anomic.kelondro.util.Log;
public class httpResponseHeader extends httpHeader {
// response header properties
public static final String X_YACY_PREVIOUS_REQUEST_LINE = "X-Previous-Request-Line";
public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count";
public static final String X_YACY_ORIGINAL_REQUEST_LINE = "X-Original-Request-Line";
public static final String SET_COOKIE = "Set-Cookie";
public static final String SET_COOKIE2 = "Set-Cookie2";
public static final String EXPIRES = "Expires";
public static final String LAST_MODIFIED = "Last-modified";
private static final long serialVersionUID = 0L;
public httpResponseHeader() {
@ -57,11 +48,10 @@ public class httpResponseHeader extends httpHeader {
public httpResponseHeader(final HashMap<String, String> reverseMappingCache, final Map<String, String> othermap) {
super(reverseMappingCache, othermap);
}
public Date date() {
return headerDate(httpHeader.DATE);
Date d = headerDate(httpHeader.DATE);
if (d == null) return new Date(); else return d;
}
public Date expires() {

@ -1236,34 +1236,6 @@ public final class httpd implements serverHandler, Cloneable {
if (o != null) try { o.close(); } catch (final Exception e) { e.printStackTrace(); }
}
}
public static final void sendRespondHeader(
final Properties conProp,
final OutputStream respond,
final String httpVersion,
final int httpStatusCode,
final String httpStatusText,
final long contentLength
) throws IOException {
sendRespondHeader(conProp,respond,httpVersion,httpStatusCode,httpStatusText,null,contentLength,null,null,null,null,null);
}
public static final void sendRespondHeader(
final Properties conProp,
final OutputStream respond,
final String httpVersion,
final int httpStatusCode,
final String httpStatusText,
final String contentType,
final long contentLength,
final Date moddate,
final Date expires,
final httpResponseHeader headers,
final String contentEnc,
final String transferEnc
) throws IOException {
sendRespondHeader(conProp,respond,httpVersion,httpStatusCode,httpStatusText,contentType,contentLength,moddate,expires,headers,contentEnc,transferEnc,true);
}
public static final void sendRespondHeader(
final Properties conProp,
@ -1303,7 +1275,10 @@ public final class httpd implements serverHandler, Cloneable {
headers.put(httpResponseHeader.SERVER, "AnomicHTTPD (www.anomic.de)");
headers.put(httpResponseHeader.DATE, DateFormatter.formatRFC1123(now));
if (moddate.after(now)) moddate = now;
if (moddate.after(now)) {
System.out.println("*** DEBUG: correcting moddate = " + moddate.toString() + " to now = " + now.toString());
moddate = now;
}
headers.put(httpResponseHeader.LAST_MODIFIED, DateFormatter.formatRFC1123(moddate));
if (nocache) {

@ -742,9 +742,7 @@ public final class httpdFileHandler {
// call rewrite-class
if (targetClass == null) {
targetDate = new Date(targetFile.lastModified());
} else {
if (targetClass != null) {
// CGI-class: call the class to create a property for rewriting
try {
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
@ -804,9 +802,10 @@ public final class httpdFileHandler {
targetClass = null;
throw e;
}
targetDate = new Date(System.currentTimeMillis());
nocache = true;
}
targetDate = new Date(targetFile.lastModified());
// rewrite the file
InputStream fis = null;

@ -147,7 +147,7 @@ public final class DateFormatter {
public static final String formatRFC1123(final Date date) {
if (date == null) return "";
if (date.getTime() - lastRFC1123long < 1000) {
if (Math.abs(date.getTime() - lastRFC1123long) < 1000) {
//System.out.println("date cache hit - " + lastRFC1123string);
return lastRFC1123string;
}

@ -65,19 +65,22 @@ public class docParser extends AbstractParser implements Parser {
try {
final WordTextExtractorFactory extractorFactory = new WordTextExtractorFactory();
final TextExtractor extractor = extractorFactory.textExtractor(source);
final String contents = extractor.getText();
final String contents = extractor.getText().trim();
String title = contents.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
if (title.length() > 80) title = title.substring(0, 80);
int l = title.length();
while (true) {
title = title.replaceAll(" ", " ");
if (title.length() == l) break;
l = title.length();
}
final plasmaParserDocument theDoc = new plasmaParserDocument(
location,
mimeType,
"UTF-8",
null,
null,
((contents.length() > 80)? contents.substring(0, 80):contents.trim()).
replaceAll("\r\n"," ").
replaceAll("\n"," ").
replaceAll("\r"," ").
replaceAll("\t"," "),
title,
"", // TODO: AUTHOR
null,
null,

@ -77,7 +77,15 @@ public class pptParser extends AbstractParser implements Parser {
* of the document
*/
final PowerPointExtractor pptExtractor = new PowerPointExtractor(new BufferedInputStream(source));
final String contents = pptExtractor.getText(true, true);
final String contents = pptExtractor.getText(true, true).trim();
String title = contents.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
if (title.length() > 80) title = title.substring(0, 80);
int l = title.length();
while (true) {
title = title.replaceAll(" ", " ");
if (title.length() == l) break;
l = title.length();
}
/*
* create the plasmaParserDocument for the database
@ -89,11 +97,7 @@ public class pptParser extends AbstractParser implements Parser {
"UTF-8",
null,
null,
((contents.length() > 80) ? contents.substring(0, 80) : contents.trim()).
replaceAll("\r\n"," ").
replaceAll("\n"," ").
replaceAll("\r"," ").
replaceAll("\t"," "),
title,
"", // TODO: AUTHOR
null,
null,

@ -105,7 +105,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
din.close();
//now the parsed strings are in the StringBuilder, now convert them to a String
final String contents = sbFoundStrings.toString();
final String contents = sbFoundStrings.toString().trim();
/*
* create the plasmaParserDocument for the database
@ -117,11 +117,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
"UTF-8",
null,
null,
((contents.length() > 80) ? contents.substring(0, 80) : contents.trim()).
replaceAll("\r\n"," ").
replaceAll("\n"," ").
replaceAll("\r"," ").
replaceAll("\t"," "),
location.getFile(),
"", // TODO: AUTHOR
null,
null,

Loading…
Cancel
Save