From 5aee19daa4bd11d79723cd84d631e608d9f20aa3 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen
Date: Mon, 4 Jun 2012 23:44:26 +0200
Subject: [PATCH] added show from cache in search results (not yet finished)
---
defaults/yacy.init | 5 +-
htroot/ConfigPortal.html | 5 +-
htroot/ConfigPortal.java | 3 +
htroot/yacy/search.java | 6 +-
htroot/yacysearch.java | 9 ++-
htroot/yacysearchitem.html | 1 +
htroot/yacysearchitem.java | 3 +-
htroot/yacysearchtrailer.java | 4 +-
source/net/yacy/cora/storage/Files.java | 79 ++++++++++++++++++++++++-
9 files changed, 99 insertions(+), 16 deletions(-)
diff --git a/defaults/yacy.init b/defaults/yacy.init
index 3859653e1..81c1a1cae 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -744,9 +744,10 @@ search.target.special.pattern =
# these information pieces may be switched on or off
search.result.show.date = true
search.result.show.size = false
-search.result.show.metadata = true
-search.result.show.parser = true
+search.result.show.metadata = false
+search.result.show.parser = false
search.result.show.pictures = false
+search.result.show.cache = true
# search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values
diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html
index 208f7a0fb..4d1a80758 100644
--- a/htroot/ConfigPortal.html
+++ b/htroot/ConfigPortal.html
@@ -62,7 +62,7 @@
Speed up search results with this option! (use CACHEONLY or FALSE to switch off verification)
NOCACHE: no use of web cache, load all snippets online
IFFRESH: use the cache if the cache exists and is fresh otherwise load online
- IFEXIST: use the cache if the cache exist or load online
+ IFEXIST: use the cache if the cache exist or load online
If verification fails, delete index reference
CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet
FALSE: no link verification and not snippet generation: all search results are valid without verification
@@ -74,7 +74,8 @@
Size
Metadata
Parser
- Pictures
+ Pictures
+ Cache
Show Navigation on Side-Bar
diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java
index c6b82a3b0..7411d4a6a 100644
--- a/htroot/ConfigPortal.java
+++ b/htroot/ConfigPortal.java
@@ -92,6 +92,7 @@ public class ConfigPortal {
sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata", false));
sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser", false));
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures", false));
+ sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache", false));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, post.get("search.verify", "ifexist"));
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, post.getBoolean("search.verify.delete", false));
@@ -136,6 +137,7 @@ public class ConfigPortal {
sb.setConfig("search.result.show.metadata", true);
sb.setConfig("search.result.show.parser", true);
sb.setConfig("search.result.show.pictures", false);
+ sb.setConfig("search.result.show.cache", false);
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, "iffresh");
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, "true");
sb.setConfig("about.headline", "");
@@ -165,6 +167,7 @@ public class ConfigPortal {
prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0);
prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0);
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
+ prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index d056b81c9..f20f0d583 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -41,12 +41,12 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
+import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
-import net.yacy.document.Autotagging.Metatag;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
@@ -236,7 +236,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
- new HashSet(),
+ new HashSet(),
"", // no navigation
null, // no snippet computation
count,
@@ -299,7 +299,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
- new HashSet(),
+ new HashSet(),
"", // no navigation
null, // no snippet computation
count,
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 6a4e6da26..e5b86d0cd 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -45,13 +45,12 @@ import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
+import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
-import net.yacy.document.Autotagging.Metatag;
-import net.yacy.document.Autotagging.Vocabulary;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
@@ -125,9 +124,9 @@ public class yacysearch {
prop.put("focus", ((post == null) ? true : post.get("focus", "1").equals("1")) ? 1 : 0);
// produce vocabulary navigation sidebars
- Collection vocabularies = LibraryProvider.autotagging.getVocabularies();
+ Collection vocabularies = LibraryProvider.autotagging.getVocabularies();
int j = 0;
- for (Vocabulary v: vocabularies) {
+ for (SimpleVocabulary v: vocabularies) {
prop.put("sidebarVocabulary_" + j + "_vocabulary", v.getName());
j++;
}
@@ -464,7 +463,7 @@ public class yacysearch {
}
int voc = 0;
- Collection metatags = new ArrayList(1);
+ Collection metatags = new ArrayList(1);
while ((voc = querystring.indexOf("/vocabulary/", 0)) >= 0) {
String vocabulary = "";
int ve = querystring.indexOf(' ', voc + 12);
diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html
index 0789fb3b7..de0086832 100644
--- a/htroot/yacysearchitem.html
+++ b/htroot/yacysearchitem.html
@@ -29,6 +29,7 @@
#(showMetadata)#:: | Metadata #(/showMetadata)#
#(showParser)#:: | Parser #(/showParser)#
#(showPictures)#:: | Pictures
#(/showPictures)#
+ #(showCache)#:: | Cache #(/showCache)#
::
#(item)#::
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index c5f823473..9604d2a5e 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -128,6 +128,7 @@ public class yacysearchitem {
prop.put("content_showMetadata", sb.getConfigBool("search.result.show.metadata", true) ? 1 : 0);
prop.put("content_showParser", sb.getConfigBool("search.result.show.parser", true) ? 1 : 0);
prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0);
+ prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) ? 1 : 0);
prop.put("content_authorized", authenticated ? "1" : "0");
final String urlhash = ASCII.String(result.hash());
prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1");
@@ -198,7 +199,7 @@ public class yacysearchitem {
} else {
prop.put("content_code", "");
}
- if (result.lat() == 0.0f || result.lon() == 0.0f) {
+ if (result.lat() == 0.0d || result.lon() == 0.0d) {
prop.put("content_loc", 0);
} else {
prop.put("content_loc", 1);
diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java
index 934632d6e..a7c55ff68 100644
--- a/htroot/yacysearchtrailer.java
+++ b/htroot/yacysearchtrailer.java
@@ -28,9 +28,9 @@ import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ScoreMap;
-import net.yacy.document.Autotagging;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.util.Formatter;
import net.yacy.peers.graphics.ProfilingGraph;
@@ -307,7 +307,7 @@ public class yacysearchtrailer {
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = ve.getValue().get(name);
- nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Autotagging.encodePrintname(name)).toString();
+ nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(SimpleVocabulary.Metatag.encodePrintname(name)).toString();
queryStringForUrl = theQuery.queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
diff --git a/source/net/yacy/cora/storage/Files.java b/source/net/yacy/cora/storage/Files.java
index 7fab34611..017b6fe6a 100644
--- a/source/net/yacy/cora/storage/Files.java
+++ b/source/net/yacy/cora/storage/Files.java
@@ -1,5 +1,5 @@
/**
- * ConfigurationSet
+ * Files
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
* First released 29.06.2011 at http://yacy.net
*
@@ -26,15 +26,92 @@ package net.yacy.cora.storage;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.OutputStream;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
public class Files {
+ /**
+ * open text files for reading. If the files are compressed, choose the
+ * appropriate decompression method automatically
+ * @param f
+ * @return the input stream for the file
+ * @throws IOException
+ */
+ public static InputStream read(File f) throws IOException {
+
+ // make input stream
+ InputStream is = new BufferedInputStream(new FileInputStream(f));
+ if (f.toString().endsWith(".bz2")) is = new BZip2CompressorInputStream(is);
+ if (f.toString().endsWith(".gz")) is = new GZIPInputStream(is);
+
+ return is;
+ }
+
+ /**
+ * reading a file line by line should be done with two concurrent processes
+ * - one reading the file and doing IO operations
+ * - one processing the result
+ * This method makes is easy to create concurrent file readers by providing
+ * a process that fills a blocking queue with lines from a file.
+ * After the method is called, it returns immediately a blocking queue which is
+ * filled concurrently with the lines of the file. When the reading is finished,
+ * this is signalled with a poison entry, the POISON_LINE String which can be
+ * compared with an "==" operation.
+ * @param f the file to read
+ * @param maxQueueSize
+ * @return a blocking queue which is filled with the lines, terminated by POISON_LINE
+ * @throws IOException
+ */
+ public final static String POISON_LINE = "__@POISON__";
+ public static BlockingQueue concurentLineReader(final File f, final int maxQueueSize) throws IOException {
+ final BlockingQueue q = new ArrayBlockingQueue(maxQueueSize);
+ final InputStream is = read(f);
+ final BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ Thread t = new Thread() {
+ public void run() {
+ String line;
+ try {
+ while ((line = br.readLine()) != null) {
+ q.put(line);
+ }
+ } catch (IOException e) {
+ } catch (InterruptedException e) {
+ } finally {
+ try {
+ q.put(POISON_LINE);
+ try {
+ br.close();
+ is.close();
+ } catch (IOException ee) {
+ }
+ } catch (InterruptedException e) {
+ // last try
+ q.add(POISON_LINE);
+ try {
+ br.close();
+ is.close();
+ } catch (IOException ee) {
+ }
+ }
+ }
+ }
+ };
+ t.start();
+ return q;
+ }
+
/**
* copy a file or a complete directory
* @param from the source file or directory