From 10d888e70c3ddf35805bb9a2c7dec17452c53e85 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 7 Dec 2006 02:40:57 +0000 Subject: [PATCH] - added a media search for images, audio, video and applications - new search options on search page - new option in ViewInfo to display all links of a file - enhanced collection data structure git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3054 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/ViewFile.html | 16 ++- htroot/ViewFile.java | 108 +++++++++++++----- htroot/index.html | 9 +- htroot/index.java | 17 ++- htroot/yacysearch.html | 20 +++- htroot/yacysearch.java | 26 ++++- source/de/anomic/index/indexCachedRI.java | 8 +- source/de/anomic/index/indexCollectionRI.java | 8 +- source/de/anomic/index/indexContainer.java | 90 ++++++++------- source/de/anomic/index/indexRAMRI.java | 15 +-- .../kelondro/kelondroCollectionIndex.java | 3 +- .../kelondro/kelondroRowCollection.java | 17 ++- source/de/anomic/kelondro/kelondroRowSet.java | 1 + .../de/anomic/plasma/plasmaSearchEvent.java | 6 +- source/de/anomic/plasma/plasmaWordIndex.java | 10 +- .../plasma/plasmaWordIndexAssortment.java | 3 +- source/de/anomic/yacy/yacyClient.java | 4 +- source/yacy.java | 2 +- 19 files changed, 246 insertions(+), 119 deletions(-) diff --git a/build.properties b/build.properties index 8cd3e44f4..7ef576e44 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.491 +releaseVersion=0.492 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html index 9cccb1fdc..bd2edda3b 100644 --- a/htroot/ViewFile.html +++ b/htroot/ViewFile.html @@ -42,7 +42,8 @@ Original | Plain Text | Parsed Text | - Parsed Sentences + Parsed Sentences | + Link List @@ -85,6 +86,19 @@ Unsupported protocol.

Original Resource Content


+:: +

Link List


+ + #{links}# + + + + + + + + #{/links}# +
#[nr]##[type]##[text]##[link]##[attr]#
#(/viewMode)#

diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index ad20fdc50..6bc426371 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -51,8 +51,12 @@ import java.net.MalformedURLException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.Enumeration; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeSet; import de.anomic.data.wikiCode; +import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.index.indexURLEntry; @@ -75,6 +79,7 @@ public class ViewFile { public static final int VIEW_MODE_AS_PARSED_TEXT = 2; public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3; public static final int VIEW_MODE_AS_IFRAME = 4; + public static final int VIEW_MODE_AS_LINKLIST = 5; public static final String[] highlightingColors = new String[] { "255,255,100", @@ -271,7 +276,7 @@ public class ViewFile { } else if (viewMode.equals("iframe")) { prop.put("viewMode", VIEW_MODE_AS_IFRAME); prop.put("viewMode_url", url.toNormalform()); - } else if (viewMode.equals("parsed") || viewMode.equals("sentences")) { + } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("links")) { // parsing the resource content plasmaParserDocument document = null; try { @@ -305,45 +310,52 @@ public class ViewFile { prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT); prop.put("viewMode_parsedText", content); - } else { + } else if (viewMode.equals("sentences")) { prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES); final Enumeration sentences = document.getSentences(pre); boolean dark = true; int i = 0; - if (sentences != null) + if (sentences != null) { + String[] wordArray = wordArray(post.get("words", null)); + + // Search word highlighting while (sentences.hasMoreElements()) { - String currentSentence = wikiCode.replaceHTML((String) sentences.nextElement()); - - // Search word highlighting - String words = post.get("words", null); - if (words != null) { - try { - words = URLDecoder.decode(words, "UTF-8"); - } catch (UnsupportedEncodingException e) { - } - - String[] wordArray = words.substring(1, - words.length() - 1).split(","); - for (int j = 0; j < wordArray.length; j++) { - String currentWord = wordArray[j].trim(); - currentSentence = currentSentence.replaceAll( - currentWord, - "" + currentWord - + ""); - } - } - prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1)); - prop.put("viewMode_sentences_" + i + "_text", currentSentence); + prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, (String) sentences.nextElement())); prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0)); dark = !dark; i++; } + } prop.put("viewMode_sentences", i); + } else if (viewMode.equals("links")) { + prop.put("viewMode", VIEW_MODE_AS_LINKLIST); + String[] wordArray = wordArray(post.get("words", null)); + boolean dark = true; + int i = 0; + i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0)); + i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0)); + i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0)); + dark = (i % 2 == 0); + + TreeSet ts = document.getImages(); + Iterator tsi = ts.iterator(); + htmlFilterImageEntry entry; + while (tsi.hasNext()) { + entry = (htmlFilterImageEntry) tsi.next(); + prop.put("viewMode_links_" + i + "_nr", i); + prop.put("viewMode_links_" + i + "_dark", ((dark) ? 1 : 0)); + prop.put("viewMode_links_" + i + "_type", "image"); + prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt())); + prop.put("viewMode_links_" + i + "_link", "" + markup(wordArray, (String) entry.url().toNormalform()) + ""); + prop.put("viewMode_links_" + i + "_attr", entry.width() + " x " + entry.height()); + dark = !dark; + i++; + } + prop.put("viewMode_links", i); + } if (document != null) document.close(); } @@ -358,4 +370,46 @@ public class ViewFile { return prop; } + private static final String[] wordArray(String words) { + String[] w = null; + if (words != null) try { + words = URLDecoder.decode(words, "UTF-8"); + w = words.substring(1, words.length() - 1).split(","); + if (w.length == 0) return null; + } catch (UnsupportedEncodingException e) {} + return w; + } + + private static final String markup(String[] wordArray, String message) { + message = wikiCode.replaceHTML(message); + if (wordArray != null) for (int j = 0; j < wordArray.length; j++) { + String currentWord = wordArray[j].trim(); + message = message.replaceAll(currentWord, + "" + currentWord + + ""); + } + return message; + } + + private static int putMediaInfo(serverObjects prop, String[] wordArray, int c, Map media, String name, boolean dark) { + Iterator mi = media.entrySet().iterator(); + Map.Entry entry; + int i = 0; + while (mi.hasNext()) { + entry = (Map.Entry) mi.next(); + prop.put("viewMode_links_" + c + "_nr", c); + prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0)); + prop.put("viewMode_links_" + c + "_type", name); + prop.put("viewMode_links_" + c + "_text", markup(wordArray, (String) entry.getValue())); + prop.put("viewMode_links_" + c + "_link", "" + markup(wordArray, (String) entry.getKey()) + ""); + prop.put("viewMode_links_" + c + "_attr", ""); + dark = !dark; + c++; + i++; + } + return i; + } + } diff --git a/htroot/index.html b/htroot/index.html index 30d0a9df9..2fffd0b97 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -21,8 +21,13 @@ - - #(searchoptions)# +
+ Text   + Images   + Audio   + Video   + Applications       + #(searchoptions)# diff --git a/htroot/index.java b/htroot/index.java index 69615c7c6..0bc75fb1e 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -83,6 +83,15 @@ public class index { } } + // search domain + int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; + String cds = (post == null) ? "text" : post.get("contentdom", "text"); + if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; + if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO; + if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO; + if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE; + if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP; + // we create empty entries for template strings String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", ""); if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH"; @@ -123,8 +132,12 @@ public class index { prop.put("display", display); prop.put("constraint", constraint); prop.put("searchoptions_display", display); - - + prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0); + prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0); + prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0); + prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0); + prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0); + return prop; } diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 1910de6f2..75ee4ddd2 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -22,12 +22,20 @@

#[promoteSearchPageGreeting]#

- +
- - - - more options -
+ +
+ + + + more options +
+ Text   + Images   + Audio   + Video   + Applications +
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 327689068..209e51b79 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -126,6 +126,12 @@ public class yacysearch { prop.put("type_resultbottomline", 0); prop.put("type_results", ""); prop.put("display", display); + prop.put("contentdom", "text"); + prop.put("contentdomCheckText", 1); + prop.put("contentdomCheckAudio", 0); + prop.put("contentdomCheckVideo", 0); + prop.put("contentdomCheckImage", 0); + prop.put("contentdomCheckApp", 0); return prop; } @@ -163,8 +169,16 @@ public class yacysearch { final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true"); if (!indexDistributeGranted || !indexReceiveGranted) { global = false; } + // find search domain + int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; + String cds = post.get("contentdom", "text"); + if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; + if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO; + if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO; + if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE; + if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP; + serverObjects prop = new serverObjects(); - if (post.get("cat", "href").equals("href")) { final TreeSet query = plasmaSearchQuery.cleanQuery(querystring); @@ -234,13 +248,13 @@ public class yacysearch { if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR; if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE; if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY; - + // do the search plasmaSearchQuery thisSearch = new plasmaSearchQuery( query, maxDistance, prefermask, - plasmaSearchQuery.CONTENTDOM_TEXT, + contentdom, count, searchtime, urlmask, @@ -408,6 +422,12 @@ public class yacysearch { prop.put("display", display); prop.put("indexof", (indexof) ? "on" : "off"); prop.put("constraint", constraint.exportB64()); + prop.put("contentdom", cds); + prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0); + prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0); + prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0); + prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0); + prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0); // return rewrite properties return prop; diff --git a/source/de/anomic/index/indexCachedRI.java b/source/de/anomic/index/indexCachedRI.java index 3ecfd5bc7..ecf6eff23 100644 --- a/source/de/anomic/index/indexCachedRI.java +++ b/source/de/anomic/index/indexCachedRI.java @@ -145,14 +145,14 @@ public class indexCachedRI implements indexRI { if (container == null) { container = riIntern.getContainer(wordHash, urlselection, maxTime); } else { - container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime); + container.addAllUnique(riIntern.getContainer(wordHash, urlselection, maxTime)); } // get from collection index if (container == null) { container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); } else { - container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime); + container.addAllUnique(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime)); } return container; } @@ -208,8 +208,8 @@ public class indexCachedRI implements indexRI { public indexContainer deleteContainer(String wordHash) { indexContainer c = riIntern.deleteContainer(wordHash); - if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1); - if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1); + if (c == null) c = riExtern.deleteContainer(wordHash); else c.addAllUnique(riExtern.deleteContainer(wordHash)); + if (c == null) c = backend.deleteContainer(wordHash); else c.addAllUnique(backend.deleteContainer(wordHash)); return c; } diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index ad140f00b..41dadcf42 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -155,7 +155,13 @@ public class indexCollectionRI implements indexRI { public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow()); container.add(newEntry); - addEntries(container, updateTime, dhtCase); + try { + collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) container); + } catch (kelondroOutOfLimitsException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } } public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index 172fadf41..cc698572b 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -34,7 +34,6 @@ import java.util.Set; import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; @@ -55,7 +54,7 @@ public class indexContainer extends kelondroRowSet { public indexContainer topLevelClone() { indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef); - newContainer.add(this, -1); + newContainer.addAllUnique(this); return newContainer; } @@ -70,60 +69,53 @@ public class indexContainer extends kelondroRowSet { public String getWordHash() { return wordHash; } - - public int add(indexRWIEntry entry) { + + public void add(indexRWIEntry entry) { + // add without double-occurrence test assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); this.addUnique(entry.toKelondroEntry()); - return 1; } - - public int add(indexRWIEntry entry, long updateTime) { + + public void add(indexRWIEntry entry, long updateTime) { + // add without double-occurrence test assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); this.add(entry); this.lastTimeWrote = updateTime; - return 1; } - - public int add(indexRWIEntry[] entries, long updateTime) { - for (int i = 0; i < entries.length; i++) this.add(entries[i], updateTime); - return entries.length; + + /* + public void addAllUnique(indexContainer c) { + // this method can be called if all entries in c are known to be unique with reference to + // the entries in this container; that means: there are no double occurrences anywhere + // in/and between c and this. + super.addAllUnique((kelondroRowCollection) c); } - public int add(indexContainer c, long maxTime) { - // returns the number of new elements - long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; - if (c == null) return 0; - int x = 0; - synchronized (c) { - Iterator i = c.entries(); - while (i.hasNext()) { - try { - if (addi((indexRWIEntry) i.next())) x++; - } catch (ConcurrentModificationException e) { - e.printStackTrace(); - } - if (System.currentTimeMillis() > timeout) break; - } + public static final indexContainer mergeUnique(indexContainer a, boolean aIsClone, indexContainer b, boolean bIsClone) { + if ((aIsClone) && (bIsClone)) { + if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a); } - this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); - return x; + if (aIsClone) return mergeUnique(a, b); + if (bIsClone) return mergeUnique(b, a); + if (a.size() > b.size()) return mergeUnique(a, b); else return mergeUnique(b, a); } + */ - private boolean addi(indexRWIEntry entry) { + public indexRWIEntry put(indexRWIEntry entry) { + assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); + kelondroRow.Entry r = super.put(entry.toKelondroEntry()); + if (r == null) return null; + return new indexRWIEntryNew(r); + } + + public boolean putRecent(indexRWIEntry entry) { assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize(); // returns true if the new entry was added, false if it already existed kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry()); if (oldEntryRow == null) { return true; } else { - indexRWIEntry oldEntry; - if (entry instanceof indexRWIEntryNew) - oldEntry = new indexRWIEntryNew(oldEntryRow); - else try { - oldEntry = new indexRWIEntryNew(new indexRWIEntryOld(oldEntryRow)); - } catch (kelondroException e) { - return false; - } + indexRWIEntry oldEntry = new indexRWIEntryNew(oldEntryRow); if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container this.put(oldEntry.toKelondroEntry()); // put it back return false; @@ -133,6 +125,25 @@ public class indexContainer extends kelondroRowSet { } } + public int putAllRecent(indexContainer c) { + // adds all entries in c and checks every entry for double-occurrence + // returns the number of new elements + if (c == null) return 0; + int x = 0; + synchronized (c) { + Iterator i = c.entries(); + while (i.hasNext()) { + try { + if (putRecent((indexRWIEntry) i.next())) x++; + } catch (ConcurrentModificationException e) { + e.printStackTrace(); + } + } + } + this.lastTimeWrote = java.lang.Math.max(this.lastTimeWrote, c.updated()); + return x; + } + public indexRWIEntry get(String urlHash) { kelondroRow.Entry entry = this.get(urlHash.getBytes()); if (entry == null) return null; @@ -204,12 +215,13 @@ public class indexContainer extends kelondroRowSet { } } + /* public static Object containerMerge(Object a, Object b) { indexContainer c = (indexContainer) a; c.add((indexContainer) b, -1); return c; } - + */ public static indexContainer joinContainer(Collection containers, long time, int maxDistance) { long stamp = System.currentTimeMillis(); diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index 3b21fe910..869071a70 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -432,7 +432,7 @@ public final class indexRAMRI implements indexRI { entries = container.topLevelClone(); added = entries.size(); } else { - added = entries.add(container, -1); + added = entries.putAllRecent(container); } if (added > 0) { cache.put(wordHash, entries); @@ -445,15 +445,10 @@ public final class indexRAMRI implements indexRI { public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { indexContainer container = (indexContainer) cache.get(wordHash); if (container == null) container = new indexContainer(wordHash, this.payloadrow); - indexRWIEntry[] entries = new indexRWIEntry[] { newEntry }; - if (container.add(entries, updateTime) > 0) { - cache.put(wordHash, container); - hashScore.incScore(wordHash); - hashDate.setScore(wordHash, intTime(updateTime)); - return; - } - container = null; - entries = null; + container.put(newEntry); + cache.put(wordHash, container); + hashScore.incScore(wordHash); + hashDate.setScore(wordHash, intTime(updateTime)); } public synchronized void close() { diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index e0de0dcf8..3ca770532 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -315,8 +315,9 @@ public class kelondroCollectionIndex { kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false); // join with new collection - oldcollection.addAll(collection); + oldcollection.addAllUnique(collection); oldcollection.shape(); + oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries oldcollection.trim(); collection = oldcollection; } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 1b27eae9f..7e8963737 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -251,20 +251,17 @@ public class kelondroRowCollection { } return false; } - - public final void addAll(kelondroRowCollection c) { - assert(rowdef.objectsize() >= c.rowdef.objectsize()); + + public final void addAllUnique(kelondroRowCollection c) { + if (c == null) return; + assert(rowdef.objectsize() == c.rowdef.objectsize()); synchronized(chunkcache) { ensureSize(chunkcount + c.size()); - } - Iterator i = c.rows(); - kelondroRow.Entry entry; - while (i.hasNext()) { - entry = (kelondroRow.Entry) i.next(); - addUnique(entry); + System.arraycopy(c.chunkcache, 0, chunkcache, rowdef.objectsize() * chunkcount, rowdef.objectsize() * c.size()); + chunkcount += c.size(); } } - + protected final void removeShift(int pos, int dist, int upBound) { assert ((pos + dist) * rowdef.objectsize() >= 0) : "pos = " + pos + ", dist = " + dist + ", rowdef.objectsize() = " + rowdef.objectsize; assert (pos * rowdef.objectsize() >= 0) : "pos = " + pos + ", rowdef.objectsize() = " + rowdef.objectsize; diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 131da80b7..978ee514e 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -79,6 +79,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd } public void addUnique(kelondroRow.Entry row) { + // add an entry without doing a double-occurrence test if (removeMarker.size() == 0) { super.addUnique(row); } else { diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 19b3ff0e7..8cfd1ebd1 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -361,8 +361,10 @@ public final class plasmaSearchEvent extends Thread implements Runnable { profileLocal.startTimer(); long pst = System.currentTimeMillis(); - searchResult.add(rcLocal, preorderTime); - searchResult.add(rcContainers, preorderTime); + searchResult.addAllUnique(rcLocal); + searchResult.addAllUnique(rcContainers); + searchResult.shape(); + searchResult.uniq(); preorderTime = preorderTime - (System.currentTimeMillis() - pst); if (preorderTime < 0) preorderTime = 200; plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index f2ee11cbb..e126c51fe 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -298,14 +298,14 @@ public final class plasmaWordIndex implements indexRI { if (container == null) { container = dhtInCache.getContainer(wordHash, urlselection, -1); } else { - container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1); + container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection, -1)); } // get from collection index if (container == null) { container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); } else { - container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1); + container.addAllUnique(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime)); } return container; } @@ -362,9 +362,9 @@ public final class plasmaWordIndex implements indexRI { public indexContainer deleteContainer(String wordHash) { indexContainer c = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); - c.add(dhtInCache.deleteContainer(wordHash), -1); - c.add(dhtOutCache.deleteContainer(wordHash), -1); - c.add(collections.deleteContainer(wordHash), -1); + c.addAllUnique(dhtInCache.deleteContainer(wordHash)); + c.addAllUnique(dhtOutCache.deleteContainer(wordHash)); + c.addAllUnique(collections.deleteContainer(wordHash)); return c; } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index d423ddc98..2c3cf870e 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -57,7 +57,6 @@ import java.io.IOException; import java.util.Iterator; import de.anomic.index.indexContainer; -import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexRWIEntryOld; import de.anomic.kelondro.kelondroBase64Order; @@ -127,7 +126,7 @@ public final class plasmaWordIndexAssortment { int al = assortmentCapacity(row.objectsize()); for (int i = 0; i < al; i++) try { // fill AND convert old entries to new entries - container.add(new indexRWIEntry[] { new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))) }, updateTime); + container.add(new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))), updateTime); } catch (kelondroException e) {} return container; } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 61c000ddc..43e1ba895 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -533,14 +533,14 @@ public final class yacyClient { // add the url entry to the word indexes for (int m = 0; m < words; m++) { assert (entry instanceof indexRWIEntryNew); - container[m].add(new indexRWIEntry[]{entry}, System.currentTimeMillis()); + container[m].add(entry, System.currentTimeMillis()); } // store url hash for statistics urls[n] = urlEntry.hash(); } // insert the containers to the index - for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); } + for (int m = 0; m < words; m++) { containerCache.addAllUnique(container[m]); } // read index abstract if (abstractCache != null) { diff --git a/source/yacy.java b/source/yacy.java index 48bc0eb6a..c8d00ba93 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -707,7 +707,7 @@ public final class yacy { while (entries.hasNext()) { entry = (indexRWIEntry) entries.next(); // System.out.println("ENTRY = " + entry.getUrlHash()); - container.add(new indexRWIEntry[] { entry }, System.currentTimeMillis()); + container.add(entry, System.currentTimeMillis()); } // we have read all elements, now delete the entity entity.deleteComplete();