From 10d888e70c3ddf35805bb9a2c7dec17452c53e85 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Thu, 7 Dec 2006 02:40:57 +0000
Subject: [PATCH] - added a media search for images, audio, video and
applications - new search options on search page - new option in ViewInfo to
display all links of a file - enhanced collection data structure
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3054 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
build.properties | 2 +-
htroot/ViewFile.html | 16 ++-
htroot/ViewFile.java | 108 +++++++++++++-----
htroot/index.html | 9 +-
htroot/index.java | 17 ++-
htroot/yacysearch.html | 20 +++-
htroot/yacysearch.java | 26 ++++-
source/de/anomic/index/indexCachedRI.java | 8 +-
source/de/anomic/index/indexCollectionRI.java | 8 +-
source/de/anomic/index/indexContainer.java | 90 ++++++++-------
source/de/anomic/index/indexRAMRI.java | 15 +--
.../kelondro/kelondroCollectionIndex.java | 3 +-
.../kelondro/kelondroRowCollection.java | 17 ++-
source/de/anomic/kelondro/kelondroRowSet.java | 1 +
.../de/anomic/plasma/plasmaSearchEvent.java | 6 +-
source/de/anomic/plasma/plasmaWordIndex.java | 10 +-
.../plasma/plasmaWordIndexAssortment.java | 3 +-
source/de/anomic/yacy/yacyClient.java | 4 +-
source/yacy.java | 2 +-
19 files changed, 246 insertions(+), 119 deletions(-)
diff --git a/build.properties b/build.properties
index 8cd3e44f4..7ef576e44 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.491
+releaseVersion=0.492
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index 9cccb1fdc..bd2edda3b 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -42,7 +42,8 @@
Original |
Plain Text |
Parsed Text |
- Parsed Sentences
+ Parsed Sentences |
+ Link List
@@ -85,6 +86,19 @@ Unsupported protocol.
Original Resource Content
+::
+ Link List
+
+ #{links}#
+
+ #[nr]# |
+ #[type]# |
+ #[text]# |
+ #[link]# |
+ #[attr]# |
+
+ #{/links}#
+
#(/viewMode)#
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index ad20fdc50..6bc426371 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -51,8 +51,12 @@ import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeSet;
import de.anomic.data.wikiCode;
+import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry;
@@ -75,6 +79,7 @@ public class ViewFile {
public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4;
+ public static final int VIEW_MODE_AS_LINKLIST = 5;
public static final String[] highlightingColors = new String[] {
"255,255,100",
@@ -271,7 +276,7 @@ public class ViewFile {
} else if (viewMode.equals("iframe")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url", url.toNormalform());
- } else if (viewMode.equals("parsed") || viewMode.equals("sentences")) {
+ } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("links")) {
// parsing the resource content
plasmaParserDocument document = null;
try {
@@ -305,45 +310,52 @@ public class ViewFile {
prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText", content);
- } else {
+ } else if (viewMode.equals("sentences")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
final Enumeration sentences = document.getSentences(pre);
boolean dark = true;
int i = 0;
- if (sentences != null)
+ if (sentences != null) {
+ String[] wordArray = wordArray(post.get("words", null));
+
+ // Search word highlighting
while (sentences.hasMoreElements()) {
- String currentSentence = wikiCode.replaceHTML((String) sentences.nextElement());
-
- // Search word highlighting
- String words = post.get("words", null);
- if (words != null) {
- try {
- words = URLDecoder.decode(words, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- }
-
- String[] wordArray = words.substring(1,
- words.length() - 1).split(",");
- for (int j = 0; j < wordArray.length; j++) {
- String currentWord = wordArray[j].trim();
- currentSentence = currentSentence.replaceAll(
- currentWord,
- "" + currentWord
- + "");
- }
- }
-
prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1));
- prop.put("viewMode_sentences_" + i + "_text", currentSentence);
+ prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, (String) sentences.nextElement()));
prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0));
dark = !dark;
i++;
}
+ }
prop.put("viewMode_sentences", i);
+ } else if (viewMode.equals("links")) {
+ prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
+ String[] wordArray = wordArray(post.get("words", null));
+ boolean dark = true;
+ int i = 0;
+ i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
+ i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
+ i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
+ dark = (i % 2 == 0);
+
+ TreeSet ts = document.getImages();
+ Iterator tsi = ts.iterator();
+ htmlFilterImageEntry entry;
+ while (tsi.hasNext()) {
+ entry = (htmlFilterImageEntry) tsi.next();
+ prop.put("viewMode_links_" + i + "_nr", i);
+ prop.put("viewMode_links_" + i + "_dark", ((dark) ? 1 : 0));
+ prop.put("viewMode_links_" + i + "_type", "image");
+ prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt()));
+ prop.put("viewMode_links_" + i + "_link", "" + markup(wordArray, (String) entry.url().toNormalform()) + "");
+ prop.put("viewMode_links_" + i + "_attr", entry.width() + " x " + entry.height());
+ dark = !dark;
+ i++;
+ }
+ prop.put("viewMode_links", i);
+
}
if (document != null) document.close();
}
@@ -358,4 +370,46 @@ public class ViewFile {
return prop;
}
+ private static final String[] wordArray(String words) {
+ String[] w = null;
+ if (words != null) try {
+ words = URLDecoder.decode(words, "UTF-8");
+ w = words.substring(1, words.length() - 1).split(",");
+ if (w.length == 0) return null;
+ } catch (UnsupportedEncodingException e) {}
+ return w;
+ }
+
+ private static final String markup(String[] wordArray, String message) {
+ message = wikiCode.replaceHTML(message);
+ if (wordArray != null) for (int j = 0; j < wordArray.length; j++) {
+ String currentWord = wordArray[j].trim();
+ message = message.replaceAll(currentWord,
+ "" + currentWord
+ + "");
+ }
+ return message;
+ }
+
+ private static int putMediaInfo(serverObjects prop, String[] wordArray, int c, Map media, String name, boolean dark) {
+ Iterator mi = media.entrySet().iterator();
+ Map.Entry entry;
+ int i = 0;
+ while (mi.hasNext()) {
+ entry = (Map.Entry) mi.next();
+ prop.put("viewMode_links_" + c + "_nr", c);
+ prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
+ prop.put("viewMode_links_" + c + "_type", name);
+ prop.put("viewMode_links_" + c + "_text", markup(wordArray, (String) entry.getValue()));
+ prop.put("viewMode_links_" + c + "_link", "" + markup(wordArray, (String) entry.getKey()) + "");
+ prop.put("viewMode_links_" + c + "_attr", "");
+ dark = !dark;
+ c++;
+ i++;
+ }
+ return i;
+ }
+
}
diff --git a/htroot/index.html b/htroot/index.html
index 30d0a9df9..2fffd0b97 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -21,8 +21,13 @@
-
- #(searchoptions)#
+
+ Text
+ Images
+ Audio
+ Video
+ Applications
+ #(searchoptions)#
diff --git a/htroot/index.java b/htroot/index.java
index 69615c7c6..0bc75fb1e 100644
--- a/htroot/index.java
+++ b/htroot/index.java
@@ -83,6 +83,15 @@ public class index {
}
}
+ // search domain
+ int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
+ String cds = (post == null) ? "text" : post.get("contentdom", "text");
+ if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
+ if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
+ if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
+ if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
+ if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
+
// we create empty entries for template strings
String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", "");
if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH";
@@ -123,8 +132,12 @@ public class index {
prop.put("display", display);
prop.put("constraint", constraint);
prop.put("searchoptions_display", display);
-
-
+ prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
+ prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
+ prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
+ prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
+ prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
+
return prop;
}
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index 1910de6f2..75ee4ddd2 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -22,12 +22,20 @@

#[promoteSearchPageGreeting]#