- added a media search for images, audio, video and applications

- new search options on search page
- new option in ViewInfo to display all links of a file
- enhanced collection data structure

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3054 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 25a64fe3da
commit 10d888e70c

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.491
releaseVersion=0.492
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -42,7 +42,8 @@
<a href="?urlHash=#[hash]#&viewMode=iframe&words=#[words]#">Original</a> |
<a href="?urlHash=#[hash]#&viewMode=plain&words=#[words]#">Plain Text</a> |
<a href="?urlHash=#[hash]#&viewMode=parsed&words=#[words]#">Parsed Text</a> |
<a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a>
<a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a> |
<a href="?urlHash=#[hash]#&viewMode=links&words=#[words]#">Link List</a>
</td>
</tr>
</table>
@ -85,6 +86,19 @@ Unsupported protocol.
<h3>Original Resource Content</h3><br>
<iframe src="#[url]#" width="800" height="400">
</iframe>
:: <!-- 5 -->
<h3>Link List</h3><br>
<table border="0" cellpadding="2" cellspacing="1">
#{links}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td>#[nr]#</td>
<td><tt>#[type]#</tt></td>
<td><tt>#[text]#</tt></td>
<td><tt>#[link]#</tt></td>
<td><tt>#[attr]#</tt></td>
</tr>
#{/links}#
</table>
#(/viewMode)#
</p>

@ -51,8 +51,12 @@ import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry;
@ -75,6 +79,7 @@ public class ViewFile {
public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4;
public static final int VIEW_MODE_AS_LINKLIST = 5;
public static final String[] highlightingColors = new String[] {
"255,255,100",
@ -271,7 +276,7 @@ public class ViewFile {
} else if (viewMode.equals("iframe")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url", url.toNormalform());
} else if (viewMode.equals("parsed") || viewMode.equals("sentences")) {
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("links")) {
// parsing the resource content
plasmaParserDocument document = null;
try {
@ -305,45 +310,52 @@ public class ViewFile {
prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText", content);
} else {
} else if (viewMode.equals("sentences")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
final Enumeration sentences = document.getSentences(pre);
boolean dark = true;
int i = 0;
if (sentences != null)
if (sentences != null) {
String[] wordArray = wordArray(post.get("words", null));
// Search word highlighting
while (sentences.hasMoreElements()) {
String currentSentence = wikiCode.replaceHTML((String) sentences.nextElement());
// Search word highlighting
String words = post.get("words", null);
if (words != null) {
try {
words = URLDecoder.decode(words, "UTF-8");
} catch (UnsupportedEncodingException e) {
}
String[] wordArray = words.substring(1,
words.length() - 1).split(",");
for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
currentSentence = currentSentence.replaceAll(
currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
}
prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1));
prop.put("viewMode_sentences_" + i + "_text", currentSentence);
prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, (String) sentences.nextElement()));
prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0));
dark = !dark;
i++;
}
}
prop.put("viewMode_sentences", i);
} else if (viewMode.equals("links")) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
String[] wordArray = wordArray(post.get("words", null));
boolean dark = true;
int i = 0;
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
dark = (i % 2 == 0);
TreeSet ts = document.getImages();
Iterator tsi = ts.iterator();
htmlFilterImageEntry entry;
while (tsi.hasNext()) {
entry = (htmlFilterImageEntry) tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_link", "<a href=\"" + (String) entry.url().toNormalform() + "\">" + markup(wordArray, (String) entry.url().toNormalform()) + "</a>");
prop.put("viewMode_links_" + i + "_attr", entry.width() + "&nbsp;x&nbsp;" + entry.height());
dark = !dark;
i++;
}
prop.put("viewMode_links", i);
}
if (document != null) document.close();
}
@ -358,4 +370,46 @@ public class ViewFile {
return prop;
}
private static final String[] wordArray(String words) {
String[] w = null;
if (words != null) try {
words = URLDecoder.decode(words, "UTF-8");
w = words.substring(1, words.length() - 1).split(",");
if (w.length == 0) return null;
} catch (UnsupportedEncodingException e) {}
return w;
}
private static final String markup(String[] wordArray, String message) {
message = wikiCode.replaceHTML(message);
if (wordArray != null) for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
message = message.replaceAll(currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
return message;
}
private static int putMediaInfo(serverObjects prop, String[] wordArray, int c, Map media, String name, boolean dark) {
Iterator mi = media.entrySet().iterator();
Map.Entry entry;
int i = 0;
while (mi.hasNext()) {
entry = (Map.Entry) mi.next();
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + c + "_type", name);
prop.put("viewMode_links_" + c + "_text", markup(wordArray, (String) entry.getValue()));
prop.put("viewMode_links_" + c + "_link", "<a href=\"" + (String) entry.getKey() + "\">" + markup(wordArray, (String) entry.getKey()) + "</a>");
prop.put("viewMode_links_" + c + "_attr", "");
dark = !dark;
c++;
i++;
}
return i;
}
}

@ -21,8 +21,13 @@
<input type="hidden" name="display" value="#[display]#" />
<input name="search" type="text" size="52" maxlength="80" value="#[former]#" />
<input type="submit" name="Enter" value="Search" />
<input type="hidden" name="former" value="#[former]#" />
#(searchoptions)#
<input type="hidden" name="former" value="#[former]#" /><br />
<input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
#(searchoptions)#
<input type="hidden" name="count" value="10" />
<input type="hidden" name="order" value="Date-YBR-Quality" />
<input type="hidden" name="resource" value="global" />

@ -83,6 +83,15 @@ public class index {
}
}
// search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = (post == null) ? "text" : post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
// we create empty entries for template strings
String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", "");
if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH";
@ -123,8 +132,12 @@ public class index {
prop.put("display", display);
prop.put("constraint", constraint);
prop.put("searchoptions_display", display);
prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
return prop;
}

@ -22,12 +22,20 @@
<p class="yacylogo"><a href="http://yacy.net/yacy/" class="yacylogo"><img src="/env/grafics/yacy.png" alt="yacy" /></a></p>
<h2>#[promoteSearchPageGreeting]#</h2>
<fieldset class="maininput">
<table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="80%">
<input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' />
<input type="submit" name="Enter" value="Search" />
</td><td width="20%">
<a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&former=#[former]#">more options<a>
</td></tr>
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr><td width="80%">
<input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' />
<input type="submit" name="Enter" value="Search" />
</td><td width="20%">
<a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&contentdom=#[contentdom]#&former=#[former]#">more options<a>
</td></tr>
<tr><td width="100%" colspan="2">
<input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications
</td></tr>
</table>
<input type="hidden" name="former" value="#[former]#" />
<input type="hidden" name="count" value="#[count]#" />

@ -126,6 +126,12 @@ public class yacysearch {
prop.put("type_resultbottomline", 0);
prop.put("type_results", "");
prop.put("display", display);
prop.put("contentdom", "text");
prop.put("contentdomCheckText", 1);
prop.put("contentdomCheckAudio", 0);
prop.put("contentdomCheckVideo", 0);
prop.put("contentdomCheckImage", 0);
prop.put("contentdomCheckApp", 0);
return prop;
}
@ -163,8 +169,16 @@ public class yacysearch {
final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
if (!indexDistributeGranted || !indexReceiveGranted) { global = false; }
// find search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) {
final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
@ -234,13 +248,13 @@ public class yacysearch {
if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR;
if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE;
if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY;
// do the search
plasmaSearchQuery thisSearch = new plasmaSearchQuery(
query,
maxDistance,
prefermask,
plasmaSearchQuery.CONTENTDOM_TEXT,
contentdom,
count,
searchtime,
urlmask,
@ -408,6 +422,12 @@ public class yacysearch {
prop.put("display", display);
prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", constraint.exportB64());
prop.put("contentdom", cds);
prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
// return rewrite properties
return prop;

@ -145,14 +145,14 @@ public class indexCachedRI implements indexRI {
if (container == null) {
container = riIntern.getContainer(wordHash, urlselection, maxTime);
} else {
container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime);
container.addAllUnique(riIntern.getContainer(wordHash, urlselection, maxTime));
}
// get from collection index
if (container == null) {
container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime);
container.addAllUnique(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime));
}
return container;
}
@ -208,8 +208,8 @@ public class indexCachedRI implements indexRI {
public indexContainer deleteContainer(String wordHash) {
indexContainer c = riIntern.deleteContainer(wordHash);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1);
if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.addAllUnique(riExtern.deleteContainer(wordHash));
if (c == null) c = backend.deleteContainer(wordHash); else c.addAllUnique(backend.deleteContainer(wordHash));
return c;
}

@ -155,7 +155,13 @@ public class indexCollectionRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
container.add(newEntry);
addEntries(container, updateTime, dhtCase);
try {
collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) container);
} catch (kelondroOutOfLimitsException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {

@ -34,7 +34,6 @@ import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -55,7 +54,7 @@ public class indexContainer extends kelondroRowSet {
public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef);
newContainer.add(this, -1);
newContainer.addAllUnique(this);
return newContainer;
}
@ -70,60 +69,53 @@ public class indexContainer extends kelondroRowSet {
public String getWordHash() {
return wordHash;
}
public int add(indexRWIEntry entry) {
public void add(indexRWIEntry entry) {
// add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
this.addUnique(entry.toKelondroEntry());
return 1;
}
public int add(indexRWIEntry entry, long updateTime) {
public void add(indexRWIEntry entry, long updateTime) {
// add without double-occurrence test
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
this.add(entry);
this.lastTimeWrote = updateTime;
return 1;
}
public int add(indexRWIEntry[] entries, long updateTime) {
for (int i = 0; i < entries.length; i++) this.add(entries[i], updateTime);
return entries.length;
/*
public void addAllUnique(indexContainer c) {
// this method can be called if all entries in c are known to be unique with reference to
// the entries in this container; that means: there are no double occurrences anywhere
// in/and between c and this.
super.addAllUnique((kelondroRowCollection) c);
}
public int add(indexContainer c, long maxTime) {
// returns the number of new elements
long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
if (c == null) return 0;
int x = 0;
synchronized (c) {
Iterator i = c.entries();
while (i.hasNext()) {
try {
if (addi((indexRWIEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {
e.printStackTrace();
}
if (System.currentTimeMillis() > timeout) break;
}
public static final indexContainer mergeUnique(indexContainer a, boolean aIsClone, indexContainer b, boolean bIsClone) {
if ((aIsClone) && (bIsClone)) {
if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a);
}
this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated());
return x;
if (aIsClone) return mergeUnique(a, b);
if (bIsClone) return mergeUnique(b, a);
if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a);
}
*/
private boolean addi(indexRWIEntry entry) {
public indexRWIEntry put(indexRWIEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
kelondroRow.Entry r = super.put(entry.toKelondroEntry());
if (r == null) return null;
return new indexRWIEntryNew(r);
}
public boolean putRecent(indexRWIEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize();
// returns true if the new entry was added, false if it already existed
kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry());
if (oldEntryRow == null) {
return true;
} else {
indexRWIEntry oldEntry;
if (entry instanceof indexRWIEntryNew)
oldEntry = new indexRWIEntryNew(oldEntryRow);
else try {
oldEntry = new indexRWIEntryNew(new indexRWIEntryOld(oldEntryRow));
} catch (kelondroException e) {
return false;
}
indexRWIEntry oldEntry = new indexRWIEntryNew(oldEntryRow);
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
@ -133,6 +125,25 @@ public class indexContainer extends kelondroRowSet {
}
}
public int putAllRecent(indexContainer c) {
// adds all entries in c and checks every entry for double-occurrence
// returns the number of new elements
if (c == null) return 0;
int x = 0;
synchronized (c) {
Iterator i = c.entries();
while (i.hasNext()) {
try {
if (putRecent((indexRWIEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {
e.printStackTrace();
}
}
}
this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated());
return x;
}
public indexRWIEntry get(String urlHash) {
kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null;
@ -204,12 +215,13 @@ public class indexContainer extends kelondroRowSet {
}
}
/*
public static Object containerMerge(Object a, Object b) {
indexContainer c = (indexContainer) a;
c.add((indexContainer) b, -1);
return c;
}
*/
public static indexContainer joinContainer(Collection containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis();

@ -432,7 +432,7 @@ public final class indexRAMRI implements indexRI {
entries = container.topLevelClone();
added = entries.size();
} else {
added = entries.add(container, -1);
added = entries.putAllRecent(container);
}
if (added > 0) {
cache.put(wordHash, entries);
@ -445,15 +445,10 @@ public final class indexRAMRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
indexRWIEntry[] entries = new indexRWIEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);
hashScore.incScore(wordHash);
hashDate.setScore(wordHash, intTime(updateTime));
return;
}
container = null;
entries = null;
container.put(newEntry);
cache.put(wordHash, container);
hashScore.incScore(wordHash);
hashDate.setScore(wordHash, intTime(updateTime));
}
public synchronized void close() {

@ -315,8 +315,9 @@ public class kelondroCollectionIndex {
kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false);
// join with new collection
oldcollection.addAll(collection);
oldcollection.addAllUnique(collection);
oldcollection.shape();
oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
oldcollection.trim();
collection = oldcollection;
}

@ -251,20 +251,17 @@ public class kelondroRowCollection {
}
return false;
}
public final void addAll(kelondroRowCollection c) {
assert(rowdef.objectsize() >= c.rowdef.objectsize());
public final void addAllUnique(kelondroRowCollection c) {
if (c == null) return;
assert(rowdef.objectsize() == c.rowdef.objectsize());
synchronized(chunkcache) {
ensureSize(chunkcount + c.size());
}
Iterator i = c.rows();
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
addUnique(entry);
System.arraycopy(c.chunkcache, 0, chunkcache, rowdef.objectsize() * chunkcount, rowdef.objectsize() * c.size());
chunkcount += c.size();
}
}
protected final void removeShift(int pos, int dist, int upBound) {
assert ((pos + dist) * rowdef.objectsize() >= 0) : "pos = " + pos + ", dist = " + dist + ", rowdef.objectsize() = " + rowdef.objectsize;
assert (pos * rowdef.objectsize() >= 0) : "pos = " + pos + ", rowdef.objectsize() = " + rowdef.objectsize;

@ -79,6 +79,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
public void addUnique(kelondroRow.Entry row) {
// add an entry without doing a double-occurrence test
if (removeMarker.size() == 0) {
super.addUnique(row);
} else {

@ -361,8 +361,10 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
profileLocal.startTimer();
long pst = System.currentTimeMillis();
searchResult.add(rcLocal, preorderTime);
searchResult.add(rcContainers, preorderTime);
searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcContainers);
searchResult.shape();
searchResult.uniq();
preorderTime = preorderTime - (System.currentTimeMillis() - pst);
if (preorderTime < 0) preorderTime = 200;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime);

@ -298,14 +298,14 @@ public final class plasmaWordIndex implements indexRI {
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, -1);
} else {
container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1);
container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection, -1));
}
// get from collection index
if (container == null) {
container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1);
container.addAllUnique(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime));
}
return container;
}
@ -362,9 +362,9 @@ public final class plasmaWordIndex implements indexRI {
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
c.add(collections.deleteContainer(wordHash), -1);
c.addAllUnique(dhtInCache.deleteContainer(wordHash));
c.addAllUnique(dhtOutCache.deleteContainer(wordHash));
c.addAllUnique(collections.deleteContainer(wordHash));
return c;
}

@ -57,7 +57,6 @@ import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryNew;
import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
@ -127,7 +126,7 @@ public final class plasmaWordIndexAssortment {
int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) try {
// fill AND convert old entries to new entries
container.add(new indexRWIEntry[] { new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))) }, updateTime);
container.add(new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))), updateTime);
} catch (kelondroException e) {}
return container;
}

@ -533,14 +533,14 @@ public final class yacyClient {
// add the url entry to the word indexes
for (int m = 0; m < words; m++) {
assert (entry instanceof indexRWIEntryNew);
container[m].add(new indexRWIEntry[]{entry}, System.currentTimeMillis());
container[m].add(entry, System.currentTimeMillis());
}
// store url hash for statistics
urls[n] = urlEntry.hash();
}
// insert the containers to the index
for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); }
for (int m = 0; m < words; m++) { containerCache.addAllUnique(container[m]); }
// read index abstract
if (abstractCache != null) {

@ -707,7 +707,7 @@ public final class yacy {
while (entries.hasNext()) {
entry = (indexRWIEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash());
container.add(new indexRWIEntry[] { entry }, System.currentTimeMillis());
container.add(entry, System.currentTimeMillis());
}
// we have read all elements, now delete the entity
entity.deleteComplete();

Loading…
Cancel
Save