- added a media search for images, audio, video and applications

- new search options on search page
- new option in ViewInfo to display all links of a file
- enhanced collection data structure

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3054 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 25a64fe3da
commit 10d888e70c

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4 javacTarget=1.4
# Release Configuration # Release Configuration
releaseVersion=0.491 releaseVersion=0.492
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -42,7 +42,8 @@
<a href="?urlHash=#[hash]#&viewMode=iframe&words=#[words]#">Original</a> | <a href="?urlHash=#[hash]#&viewMode=iframe&words=#[words]#">Original</a> |
<a href="?urlHash=#[hash]#&viewMode=plain&words=#[words]#">Plain Text</a> | <a href="?urlHash=#[hash]#&viewMode=plain&words=#[words]#">Plain Text</a> |
<a href="?urlHash=#[hash]#&viewMode=parsed&words=#[words]#">Parsed Text</a> | <a href="?urlHash=#[hash]#&viewMode=parsed&words=#[words]#">Parsed Text</a> |
<a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a> <a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a> |
<a href="?urlHash=#[hash]#&viewMode=links&words=#[words]#">Link List</a>
</td> </td>
</tr> </tr>
</table> </table>
@ -85,6 +86,19 @@ Unsupported protocol.
<h3>Original Resource Content</h3><br> <h3>Original Resource Content</h3><br>
<iframe src="#[url]#" width="800" height="400"> <iframe src="#[url]#" width="800" height="400">
</iframe> </iframe>
:: <!-- 5 -->
<h3>Link List</h3><br>
<table border="0" cellpadding="2" cellspacing="1">
#{links}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td>#[nr]#</td>
<td><tt>#[type]#</tt></td>
<td><tt>#[text]#</tt></td>
<td><tt>#[link]#</tt></td>
<td><tt>#[attr]#</tt></td>
</tr>
#{/links}#
</table>
#(/viewMode)# #(/viewMode)#
</p> </p>

@ -51,8 +51,12 @@ import java.net.MalformedURLException;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import de.anomic.data.wikiCode; import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.http.httpc; import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
@ -75,6 +79,7 @@ public class ViewFile {
public static final int VIEW_MODE_AS_PARSED_TEXT = 2; public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3; public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4; public static final int VIEW_MODE_AS_IFRAME = 4;
public static final int VIEW_MODE_AS_LINKLIST = 5;
public static final String[] highlightingColors = new String[] { public static final String[] highlightingColors = new String[] {
"255,255,100", "255,255,100",
@ -271,7 +276,7 @@ public class ViewFile {
} else if (viewMode.equals("iframe")) { } else if (viewMode.equals("iframe")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME); prop.put("viewMode", VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url", url.toNormalform()); prop.put("viewMode_url", url.toNormalform());
} else if (viewMode.equals("parsed") || viewMode.equals("sentences")) { } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("links")) {
// parsing the resource content // parsing the resource content
plasmaParserDocument document = null; plasmaParserDocument document = null;
try { try {
@ -305,45 +310,52 @@ public class ViewFile {
prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT); prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText", content); prop.put("viewMode_parsedText", content);
} else { } else if (viewMode.equals("sentences")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES); prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
final Enumeration sentences = document.getSentences(pre); final Enumeration sentences = document.getSentences(pre);
boolean dark = true; boolean dark = true;
int i = 0; int i = 0;
if (sentences != null) if (sentences != null) {
String[] wordArray = wordArray(post.get("words", null));
// Search word highlighting
while (sentences.hasMoreElements()) { while (sentences.hasMoreElements()) {
String currentSentence = wikiCode.replaceHTML((String) sentences.nextElement());
// Search word highlighting
String words = post.get("words", null);
if (words != null) {
try {
words = URLDecoder.decode(words, "UTF-8");
} catch (UnsupportedEncodingException e) {
}
String[] wordArray = words.substring(1,
words.length() - 1).split(",");
for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
currentSentence = currentSentence.replaceAll(
currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
}
prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1)); prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1));
prop.put("viewMode_sentences_" + i + "_text", currentSentence); prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, (String) sentences.nextElement()));
prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0)); prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0));
dark = !dark; dark = !dark;
i++; i++;
} }
}
prop.put("viewMode_sentences", i); prop.put("viewMode_sentences", i);
} else if (viewMode.equals("links")) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
String[] wordArray = wordArray(post.get("words", null));
boolean dark = true;
int i = 0;
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
dark = (i % 2 == 0);
TreeSet ts = document.getImages();
Iterator tsi = ts.iterator();
htmlFilterImageEntry entry;
while (tsi.hasNext()) {
entry = (htmlFilterImageEntry) tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_link", "<a href=\"" + (String) entry.url().toNormalform() + "\">" + markup(wordArray, (String) entry.url().toNormalform()) + "</a>");
prop.put("viewMode_links_" + i + "_attr", entry.width() + "&nbsp;x&nbsp;" + entry.height());
dark = !dark;
i++;
}
prop.put("viewMode_links", i);
} }
if (document != null) document.close(); if (document != null) document.close();
} }
@ -358,4 +370,46 @@ public class ViewFile {
return prop; return prop;
} }
private static final String[] wordArray(String words) {
String[] w = null;
if (words != null) try {
words = URLDecoder.decode(words, "UTF-8");
w = words.substring(1, words.length() - 1).split(",");
if (w.length == 0) return null;
} catch (UnsupportedEncodingException e) {}
return w;
}
private static final String markup(String[] wordArray, String message) {
message = wikiCode.replaceHTML(message);
if (wordArray != null) for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
message = message.replaceAll(currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
return message;
}
private static int putMediaInfo(serverObjects prop, String[] wordArray, int c, Map media, String name, boolean dark) {
Iterator mi = media.entrySet().iterator();
Map.Entry entry;
int i = 0;
while (mi.hasNext()) {
entry = (Map.Entry) mi.next();
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + c + "_type", name);
prop.put("viewMode_links_" + c + "_text", markup(wordArray, (String) entry.getValue()));
prop.put("viewMode_links_" + c + "_link", "<a href=\"" + (String) entry.getKey() + "\">" + markup(wordArray, (String) entry.getKey()) + "</a>");
prop.put("viewMode_links_" + c + "_attr", "");
dark = !dark;
c++;
i++;
}
return i;
}
} }

@ -21,8 +21,13 @@
<input type="hidden" name="display" value="#[display]#" /> <input type="hidden" name="display" value="#[display]#" />
<input name="search" type="text" size="52" maxlength="80" value="#[former]#" /> <input name="search" type="text" size="52" maxlength="80" value="#[former]#" />
<input type="submit" name="Enter" value="Search" /> <input type="submit" name="Enter" value="Search" />
<input type="hidden" name="former" value="#[former]#" /> <input type="hidden" name="former" value="#[former]#" /><br />
#(searchoptions)# <input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
#(searchoptions)#
<input type="hidden" name="count" value="10" /> <input type="hidden" name="count" value="10" />
<input type="hidden" name="order" value="Date-YBR-Quality" /> <input type="hidden" name="order" value="Date-YBR-Quality" />
<input type="hidden" name="resource" value="global" /> <input type="hidden" name="resource" value="global" />

@ -83,6 +83,15 @@ public class index {
} }
} }
// search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = (post == null) ? "text" : post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
// we create empty entries for template strings // we create empty entries for template strings
String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", ""); String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", "");
if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH"; if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH";
@ -123,8 +132,12 @@ public class index {
prop.put("display", display); prop.put("display", display);
prop.put("constraint", constraint); prop.put("constraint", constraint);
prop.put("searchoptions_display", display); prop.put("searchoptions_display", display);
prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
return prop; return prop;
} }

@ -22,12 +22,20 @@
<p class="yacylogo"><a href="http://yacy.net/yacy/" class="yacylogo"><img src="/env/grafics/yacy.png" alt="yacy" /></a></p> <p class="yacylogo"><a href="http://yacy.net/yacy/" class="yacylogo"><img src="/env/grafics/yacy.png" alt="yacy" /></a></p>
<h2>#[promoteSearchPageGreeting]#</h2> <h2>#[promoteSearchPageGreeting]#</h2>
<fieldset class="maininput"> <fieldset class="maininput">
<table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="80%"> <table width="100%" border="0" cellspacing="0" cellpadding="0">
<input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' /> <tr><td width="80%">
<input type="submit" name="Enter" value="Search" /> <input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' />
</td><td width="20%"> <input type="submit" name="Enter" value="Search" />
<a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&former=#[former]#">more options<a> </td><td width="20%">
</td></tr> <a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&contentdom=#[contentdom]#&former=#[former]#">more options<a>
</td></tr>
<tr><td width="100%" colspan="2">
<input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications
</td></tr>
</table> </table>
<input type="hidden" name="former" value="#[former]#" /> <input type="hidden" name="former" value="#[former]#" />
<input type="hidden" name="count" value="#[count]#" /> <input type="hidden" name="count" value="#[count]#" />

@ -126,6 +126,12 @@ public class yacysearch {
prop.put("type_resultbottomline", 0); prop.put("type_resultbottomline", 0);
prop.put("type_results", ""); prop.put("type_results", "");
prop.put("display", display); prop.put("display", display);
prop.put("contentdom", "text");
prop.put("contentdomCheckText", 1);
prop.put("contentdomCheckAudio", 0);
prop.put("contentdomCheckVideo", 0);
prop.put("contentdomCheckImage", 0);
prop.put("contentdomCheckApp", 0);
return prop; return prop;
} }
@ -163,8 +169,16 @@ public class yacysearch {
final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true"); final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
if (!indexDistributeGranted || !indexReceiveGranted) { global = false; } if (!indexDistributeGranted || !indexReceiveGranted) { global = false; }
// find search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) { if (post.get("cat", "href").equals("href")) {
final TreeSet query = plasmaSearchQuery.cleanQuery(querystring); final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
@ -234,13 +248,13 @@ public class yacysearch {
if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR; if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR;
if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE; if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE;
if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY; if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY;
// do the search // do the search
plasmaSearchQuery thisSearch = new plasmaSearchQuery( plasmaSearchQuery thisSearch = new plasmaSearchQuery(
query, query,
maxDistance, maxDistance,
prefermask, prefermask,
plasmaSearchQuery.CONTENTDOM_TEXT, contentdom,
count, count,
searchtime, searchtime,
urlmask, urlmask,
@ -408,6 +422,12 @@ public class yacysearch {
prop.put("display", display); prop.put("display", display);
prop.put("indexof", (indexof) ? "on" : "off"); prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", constraint.exportB64()); prop.put("constraint", constraint.exportB64());
prop.put("contentdom", cds);
prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -145,14 +145,14 @@ public class indexCachedRI implements indexRI {
if (container == null) { if (container == null) {
container = riIntern.getContainer(wordHash, urlselection, maxTime); container = riIntern.getContainer(wordHash, urlselection, maxTime);
} else { } else {
container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime); container.addAllUnique(riIntern.getContainer(wordHash, urlselection, maxTime));
} }
// get from collection index // get from collection index
if (container == null) { if (container == null) {
container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else { } else {
container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime); container.addAllUnique(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime));
} }
return container; return container;
} }
@ -208,8 +208,8 @@ public class indexCachedRI implements indexRI {
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
indexContainer c = riIntern.deleteContainer(wordHash); indexContainer c = riIntern.deleteContainer(wordHash);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1); if (c == null) c = riExtern.deleteContainer(wordHash); else c.addAllUnique(riExtern.deleteContainer(wordHash));
if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1); if (c == null) c = backend.deleteContainer(wordHash); else c.addAllUnique(backend.deleteContainer(wordHash));
return c; return c;
} }

@ -155,7 +155,13 @@ public class indexCollectionRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow()); indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
container.add(newEntry); container.add(newEntry);
addEntries(container, updateTime, dhtCase); try {
collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) container);
} catch (kelondroOutOfLimitsException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} }
public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {

@ -34,7 +34,6 @@ import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroRowSet;
@ -55,7 +54,7 @@ public class indexContainer extends kelondroRowSet {
public indexContainer topLevelClone() { public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef); indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef);
newContainer.add(this, -1); newContainer.addAllUnique(this);
return newContainer; return newContainer;
} }
@ -70,60 +69,53 @@ public class indexContainer extends kelondroRowSet {
public String getWordHash() { public String getWordHash() {
return wordHash; return wordHash;
} }
public int add(indexRWIEntry entry) { public void add(indexRWIEntry entry) {
// add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
this.addUnique(entry.toKelondroEntry()); this.addUnique(entry.toKelondroEntry());
return 1;
} }
public int add(indexRWIEntry entry, long updateTime) { public void add(indexRWIEntry entry, long updateTime) {
// add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
this.add(entry); this.add(entry);
this.lastTimeWrote = updateTime; this.lastTimeWrote = updateTime;
return 1;
} }
public int add(indexRWIEntry[] entries, long updateTime) { /*
for (int i = 0; i < entries.length; i++) this.add(entries[i], updateTime); public void addAllUnique(indexContainer c) {
return entries.length; // this method can be called if all entries in c are known to be unique with reference to
// the entries in this container; that means: there are no double occurrences anywhere
// in/and between c and this.
super.addAllUnique((kelondroRowCollection) c);
} }
public int add(indexContainer c, long maxTime) { public static final indexContainer mergeUnique(indexContainer a, boolean aIsClone, indexContainer b, boolean bIsClone) {
// returns the number of new elements if ((aIsClone) && (bIsClone)) {
long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a);
if (c == null) return 0;
int x = 0;
synchronized (c) {
Iterator i = c.entries();
while (i.hasNext()) {
try {
if (addi((indexRWIEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {
e.printStackTrace();
}
if (System.currentTimeMillis() > timeout) break;
}
} }
this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); if (aIsClone) return mergeUnique(a, b);
return x; if (bIsClone) return mergeUnique(b, a);
if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a);
} }
*/
private boolean addi(indexRWIEntry entry) { public indexRWIEntry put(indexRWIEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
kelondroRow.Entry r = super.put(entry.toKelondroEntry());
if (r == null) return null;
return new indexRWIEntryNew(r);
}
public boolean putRecent(indexRWIEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
// returns true if the new entry was added, false if it already existed // returns true if the new entry was added, false if it already existed
kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry()); kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry());
if (oldEntryRow == null) { if (oldEntryRow == null) {
return true; return true;
} else { } else {
indexRWIEntry oldEntry; indexRWIEntry oldEntry = new indexRWIEntryNew(oldEntryRow);
if (entry instanceof indexRWIEntryNew)
oldEntry = new indexRWIEntryNew(oldEntryRow);
else try {
oldEntry = new indexRWIEntryNew(new indexRWIEntryOld(oldEntryRow));
} catch (kelondroException e) {
return false;
}
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back this.put(oldEntry.toKelondroEntry()); // put it back
return false; return false;
@ -133,6 +125,25 @@ public class indexContainer extends kelondroRowSet {
} }
} }
public int putAllRecent(indexContainer c) {
// adds all entries in c and checks every entry for double-occurrence
// returns the number of new elements
if (c == null) return 0;
int x = 0;
synchronized (c) {
Iterator i = c.entries();
while (i.hasNext()) {
try {
if (putRecent((indexRWIEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {
e.printStackTrace();
}
}
}
this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated());
return x;
}
public indexRWIEntry get(String urlHash) { public indexRWIEntry get(String urlHash) {
kelondroRow.Entry entry = this.get(urlHash.getBytes()); kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null; if (entry == null) return null;
@ -204,12 +215,13 @@ public class indexContainer extends kelondroRowSet {
} }
} }
/*
public static Object containerMerge(Object a, Object b) { public static Object containerMerge(Object a, Object b) {
indexContainer c = (indexContainer) a; indexContainer c = (indexContainer) a;
c.add((indexContainer) b, -1); c.add((indexContainer) b, -1);
return c; return c;
} }
*/
public static indexContainer joinContainer(Collection containers, long time, int maxDistance) { public static indexContainer joinContainer(Collection containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis(); long stamp = System.currentTimeMillis();

@ -432,7 +432,7 @@ public final class indexRAMRI implements indexRI {
entries = container.topLevelClone(); entries = container.topLevelClone();
added = entries.size(); added = entries.size();
} else { } else {
added = entries.add(container, -1); added = entries.putAllRecent(container);
} }
if (added > 0) { if (added > 0) {
cache.put(wordHash, entries); cache.put(wordHash, entries);
@ -445,15 +445,10 @@ public final class indexRAMRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow); if (container == null) container = new indexContainer(wordHash, this.payloadrow);
indexRWIEntry[] entries = new indexRWIEntry[] { newEntry }; container.put(newEntry);
if (container.add(entries, updateTime) > 0) { cache.put(wordHash, container);
cache.put(wordHash, container); hashScore.incScore(wordHash);
hashScore.incScore(wordHash); hashDate.setScore(wordHash, intTime(updateTime));
hashDate.setScore(wordHash, intTime(updateTime));
return;
}
container = null;
entries = null;
} }
public synchronized void close() { public synchronized void close() {

@ -315,8 +315,9 @@ public class kelondroCollectionIndex {
kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false); kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false);
// join with new collection // join with new collection
oldcollection.addAll(collection); oldcollection.addAllUnique(collection);
oldcollection.shape(); oldcollection.shape();
oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
oldcollection.trim(); oldcollection.trim();
collection = oldcollection; collection = oldcollection;
} }

@ -251,20 +251,17 @@ public class kelondroRowCollection {
} }
return false; return false;
} }
public final void addAll(kelondroRowCollection c) { public final void addAllUnique(kelondroRowCollection c) {
assert(rowdef.objectsize() >= c.rowdef.objectsize()); if (c == null) return;
assert(rowdef.objectsize() == c.rowdef.objectsize());
synchronized(chunkcache) { synchronized(chunkcache) {
ensureSize(chunkcount + c.size()); ensureSize(chunkcount + c.size());
} System.arraycopy(c.chunkcache, 0, chunkcache, rowdef.objectsize() * chunkcount, rowdef.objectsize() * c.size());
Iterator i = c.rows(); chunkcount += c.size();
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
addUnique(entry);
} }
} }
protected final void removeShift(int pos, int dist, int upBound) { protected final void removeShift(int pos, int dist, int upBound) {
assert ((pos + dist) * rowdef.objectsize() >= 0) : "pos = " + pos + ", dist = " + dist + ", rowdef.objectsize() = " + rowdef.objectsize; assert ((pos + dist) * rowdef.objectsize() >= 0) : "pos = " + pos + ", dist = " + dist + ", rowdef.objectsize() = " + rowdef.objectsize;
assert (pos * rowdef.objectsize() >= 0) : "pos = " + pos + ", rowdef.objectsize() = " + rowdef.objectsize; assert (pos * rowdef.objectsize() >= 0) : "pos = " + pos + ", rowdef.objectsize() = " + rowdef.objectsize;

@ -79,6 +79,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
} }
public void addUnique(kelondroRow.Entry row) { public void addUnique(kelondroRow.Entry row) {
// add an entry without doing a double-occurrence test
if (removeMarker.size() == 0) { if (removeMarker.size() == 0) {
super.addUnique(row); super.addUnique(row);
} else { } else {

@ -361,8 +361,10 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
profileLocal.startTimer(); profileLocal.startTimer();
long pst = System.currentTimeMillis(); long pst = System.currentTimeMillis();
searchResult.add(rcLocal, preorderTime); searchResult.addAllUnique(rcLocal);
searchResult.add(rcContainers, preorderTime); searchResult.addAllUnique(rcContainers);
searchResult.shape();
searchResult.uniq();
preorderTime = preorderTime - (System.currentTimeMillis() - pst); preorderTime = preorderTime - (System.currentTimeMillis() - pst);
if (preorderTime < 0) preorderTime = 200; if (preorderTime < 0) preorderTime = 200;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime); plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime);

@ -298,14 +298,14 @@ public final class plasmaWordIndex implements indexRI {
if (container == null) { if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, -1); container = dhtInCache.getContainer(wordHash, urlselection, -1);
} else { } else {
container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1); container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection, -1));
} }
// get from collection index // get from collection index
if (container == null) { if (container == null) {
container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else { } else {
container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1); container.addAllUnique(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime));
} }
return container; return container;
} }
@ -362,9 +362,9 @@ public final class plasmaWordIndex implements indexRI {
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); indexContainer c = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow);
c.add(dhtInCache.deleteContainer(wordHash), -1); c.addAllUnique(dhtInCache.deleteContainer(wordHash));
c.add(dhtOutCache.deleteContainer(wordHash), -1); c.addAllUnique(dhtOutCache.deleteContainer(wordHash));
c.add(collections.deleteContainer(wordHash), -1); c.addAllUnique(collections.deleteContainer(wordHash));
return c; return c;
} }

@ -57,7 +57,6 @@ import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexRWIEntryNew;
import de.anomic.index.indexRWIEntryOld; import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
@ -127,7 +126,7 @@ public final class plasmaWordIndexAssortment {
int al = assortmentCapacity(row.objectsize()); int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) try { for (int i = 0; i < al; i++) try {
// fill AND convert old entries to new entries // fill AND convert old entries to new entries
container.add(new indexRWIEntry[] { new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))) }, updateTime); container.add(new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))), updateTime);
} catch (kelondroException e) {} } catch (kelondroException e) {}
return container; return container;
} }

@ -533,14 +533,14 @@ public final class yacyClient {
// add the url entry to the word indexes // add the url entry to the word indexes
for (int m = 0; m < words; m++) { for (int m = 0; m < words; m++) {
assert (entry instanceof indexRWIEntryNew); assert (entry instanceof indexRWIEntryNew);
container[m].add(new indexRWIEntry[]{entry}, System.currentTimeMillis()); container[m].add(entry, System.currentTimeMillis());
} }
// store url hash for statistics // store url hash for statistics
urls[n] = urlEntry.hash(); urls[n] = urlEntry.hash();
} }
// insert the containers to the index // insert the containers to the index
for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); } for (int m = 0; m < words; m++) { containerCache.addAllUnique(container[m]); }
// read index abstract // read index abstract
if (abstractCache != null) { if (abstractCache != null) {

@ -707,7 +707,7 @@ public final class yacy {
while (entries.hasNext()) { while (entries.hasNext()) {
entry = (indexRWIEntry) entries.next(); entry = (indexRWIEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash()); // System.out.println("ENTRY = " + entry.getUrlHash());
container.add(new indexRWIEntry[] { entry }, System.currentTimeMillis()); container.add(entry, System.currentTimeMillis());
} }
// we have read all elements, now delete the entity // we have read all elements, now delete the entity
entity.deleteComplete(); entity.deleteComplete();

Loading…
Cancel
Save