From 5aee19daa4bd11d79723cd84d631e608d9f20aa3 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 4 Jun 2012 23:44:26 +0200 Subject: [PATCH] added show from cache in search results (not yet finished) --- defaults/yacy.init | 5 +- htroot/ConfigPortal.html | 5 +- htroot/ConfigPortal.java | 3 + htroot/yacy/search.java | 6 +- htroot/yacysearch.java | 9 ++- htroot/yacysearchitem.html | 1 + htroot/yacysearchitem.java | 3 +- htroot/yacysearchtrailer.java | 4 +- source/net/yacy/cora/storage/Files.java | 79 ++++++++++++++++++++++++- 9 files changed, 99 insertions(+), 16 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 3859653e1..81c1a1cae 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -744,9 +744,10 @@ search.target.special.pattern = # these information pieces may be switched on or off search.result.show.date = true search.result.show.size = false -search.result.show.metadata = true -search.result.show.parser = true +search.result.show.metadata = false +search.result.show.parser = false search.result.show.pictures = false +search.result.show.cache = true # search navigators: comma-separated list of default values for search navigation. # can be temporary different if search string is given with differen navigation values diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html index 208f7a0fb..4d1a80758 100644 --- a/htroot/ConfigPortal.html +++ b/htroot/ConfigPortal.html @@ -62,7 +62,7 @@ ideaSpeed up search results with this option! (use CACHEONLY or FALSE to switch off verification)
NOCACHE: no use of web cache, load all snippets online
IFFRESH: use the cache if the cache exists and is fresh otherwise load online
- IFEXIST: use the cache if the cache exist or load online
+ IFEXIST: use the cache if the cache exist or load online
If verification fails, delete index reference

CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet
FALSE: no link verification and not snippet generation: all search results are valid without verification @@ -74,7 +74,8 @@ Size  Metadata  Parser  - Pictures + Pictures  + Cache
Show Navigation on Side-Bar
diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java index c6b82a3b0..7411d4a6a 100644 --- a/htroot/ConfigPortal.java +++ b/htroot/ConfigPortal.java @@ -92,6 +92,7 @@ public class ConfigPortal { sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata", false)); sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser", false)); sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures", false)); + sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache", false)); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, post.get("search.verify", "ifexist")); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, post.getBoolean("search.verify.delete", false)); @@ -136,6 +137,7 @@ public class ConfigPortal { sb.setConfig("search.result.show.metadata", true); sb.setConfig("search.result.show.parser", true); sb.setConfig("search.result.show.pictures", false); + sb.setConfig("search.result.show.cache", false); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, "iffresh"); sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, "true"); sb.setConfig("about.headline", ""); @@ -165,6 +167,7 @@ public class ConfigPortal { prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0); prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0); prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0); + prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0); prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0); prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0); diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index d056b81c9..f20f0d583 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -41,12 +41,12 @@ import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.RSSMessage; +import net.yacy.cora.lod.SimpleVocabulary; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.WeakPriorityBlockingQueue; -import net.yacy.document.Autotagging.Metatag; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceFactory; @@ -236,7 +236,7 @@ public final class search { prefer, ContentDomain.contentdomParser(contentdom), language, - new HashSet(), + new HashSet(), "", // no navigation null, // no snippet computation count, @@ -299,7 +299,7 @@ public final class search { prefer, ContentDomain.contentdomParser(contentdom), language, - new HashSet(), + new HashSet(), "", // no navigation null, // no snippet computation count, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 6a4e6da26..e5b86d0cd 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -45,13 +45,12 @@ import net.yacy.cora.document.Classification; import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.UTF8; +import net.yacy.cora.lod.SimpleVocabulary; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.services.federated.yacy.CacheStrategy; -import net.yacy.document.Autotagging.Metatag; -import net.yacy.document.Autotagging.Vocabulary; import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.document.LibraryProvider; @@ -125,9 +124,9 @@ public class yacysearch { prop.put("focus", ((post == null) ? true : post.get("focus", "1").equals("1")) ? 1 : 0); // produce vocabulary navigation sidebars - Collection vocabularies = LibraryProvider.autotagging.getVocabularies(); + Collection vocabularies = LibraryProvider.autotagging.getVocabularies(); int j = 0; - for (Vocabulary v: vocabularies) { + for (SimpleVocabulary v: vocabularies) { prop.put("sidebarVocabulary_" + j + "_vocabulary", v.getName()); j++; } @@ -464,7 +463,7 @@ public class yacysearch { } int voc = 0; - Collection metatags = new ArrayList(1); + Collection metatags = new ArrayList(1); while ((voc = querystring.indexOf("/vocabulary/", 0)) >= 0) { String vocabulary = ""; int ve = querystring.indexOf(' ', voc + 12); diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 0789fb3b7..de0086832 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -29,6 +29,7 @@ #(showMetadata)#:: | Metadata#(/showMetadata)# #(showParser)#:: | Parser#(/showParser)# #(showPictures)#:: | Pictures

#(/showPictures)# + #(showCache)#:: | Cache

#(/showCache)# :: #(item)#::
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index c5f823473..9604d2a5e 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -128,6 +128,7 @@ public class yacysearchitem { prop.put("content_showMetadata", sb.getConfigBool("search.result.show.metadata", true) ? 1 : 0); prop.put("content_showParser", sb.getConfigBool("search.result.show.parser", true) ? 1 : 0); prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0); + prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) ? 1 : 0); prop.put("content_authorized", authenticated ? "1" : "0"); final String urlhash = ASCII.String(result.hash()); prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1"); @@ -198,7 +199,7 @@ public class yacysearchitem { } else { prop.put("content_code", ""); } - if (result.lat() == 0.0f || result.lon() == 0.0f) { + if (result.lat() == 0.0d || result.lon() == 0.0d) { prop.put("content_loc", 0); } else { prop.put("content_loc", 1); diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 934632d6e..a7c55ff68 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -28,9 +28,9 @@ import java.util.Iterator; import java.util.Map; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.lod.SimpleVocabulary; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.sorting.ScoreMap; -import net.yacy.document.Autotagging; import net.yacy.document.LibraryProvider; import net.yacy.kelondro.util.Formatter; import net.yacy.peers.graphics.ProfilingGraph; @@ -307,7 +307,7 @@ public class yacysearchtrailer { while (i < 20 && navigatorIterator.hasNext()) { name = navigatorIterator.next(); count = ve.getValue().get(name); - nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Autotagging.encodePrintname(name)).toString(); + nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(SimpleVocabulary.Metatag.encodePrintname(name)).toString(); queryStringForUrl = theQuery.queryStringForUrl(); p = queryStringForUrl.indexOf(nav); if (p < 0) { diff --git a/source/net/yacy/cora/storage/Files.java b/source/net/yacy/cora/storage/Files.java index 7fab34611..017b6fe6a 100644 --- a/source/net/yacy/cora/storage/Files.java +++ b/source/net/yacy/cora/storage/Files.java @@ -1,5 +1,5 @@ /** - * ConfigurationSet + * Files * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany * First released 29.06.2011 at http://yacy.net * @@ -26,15 +26,92 @@ package net.yacy.cora.storage; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.OutputStream; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; public class Files { + /** + * open text files for reading. If the files are compressed, choose the + * appropriate decompression method automatically + * @param f + * @return the input stream for the file + * @throws IOException + */ + public static InputStream read(File f) throws IOException { + + // make input stream + InputStream is = new BufferedInputStream(new FileInputStream(f)); + if (f.toString().endsWith(".bz2")) is = new BZip2CompressorInputStream(is); + if (f.toString().endsWith(".gz")) is = new GZIPInputStream(is); + + return is; + } + + /** + * reading a file line by line should be done with two concurrent processes + * - one reading the file and doing IO operations + * - one processing the result + * This method makes is easy to create concurrent file readers by providing + * a process that fills a blocking queue with lines from a file. + * After the method is called, it returns immediately a blocking queue which is + * filled concurrently with the lines of the file. When the reading is finished, + * this is signalled with a poison entry, the POISON_LINE String which can be + * compared with an "==" operation. + * @param f the file to read + * @param maxQueueSize + * @return a blocking queue which is filled with the lines, terminated by POISON_LINE + * @throws IOException + */ + public final static String POISON_LINE = "__@POISON__"; + public static BlockingQueue concurentLineReader(final File f, final int maxQueueSize) throws IOException { + final BlockingQueue q = new ArrayBlockingQueue(maxQueueSize); + final InputStream is = read(f); + final BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); + Thread t = new Thread() { + public void run() { + String line; + try { + while ((line = br.readLine()) != null) { + q.put(line); + } + } catch (IOException e) { + } catch (InterruptedException e) { + } finally { + try { + q.put(POISON_LINE); + try { + br.close(); + is.close(); + } catch (IOException ee) { + } + } catch (InterruptedException e) { + // last try + q.add(POISON_LINE); + try { + br.close(); + is.close(); + } catch (IOException ee) { + } + } + } + } + }; + t.start(); + return q; + } + /** * copy a file or a complete directory * @param from the source file or directory