diff --git a/htroot/api/timeline.java b/htroot/api/timeline.java index a2f6c6a21..d5c6238a4 100644 --- a/htroot/api/timeline.java +++ b/htroot/api/timeline.java @@ -80,7 +80,7 @@ public final class timeline { //yacyCore.log.logInfo("INIT TIMELINE SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(query[0]) + " - " + count + " links"); // get the index container with the result vector - HashMap>[] localSearchContainerMaps = sb.webIndex.localSearchContainers(q, Word.words2hashes(query[1]), null); + HashMap>[] localSearchContainerMaps = sb.webIndex.index().searchTerm(q, Word.words2hashes(query[1]), null); final ReferenceContainer index = ReferenceContainer.joinExcludeContainers( plasmaWordIndex.wordReferenceFactory, diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 376cd7150..84bc74db1 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -209,7 +209,7 @@ public final class search { yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); final long timer = System.currentTimeMillis(); - final Map>[] containers = sb.webIndex.localSearchContainers(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls)); + final Map>[] containers = sb.webIndex.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls)); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.COLLECTION, containers[0].size(), System.currentTimeMillis() - timer), false); if (containers != null) { diff --git a/source/de/anomic/content/dao/PhpBB3Dao.java b/source/de/anomic/content/dao/PhpBB3Dao.java index 81289bfbc..771c51457 100644 --- a/source/de/anomic/content/dao/PhpBB3Dao.java +++ b/source/de/anomic/content/dao/PhpBB3Dao.java @@ -144,13 +144,11 @@ public class PhpBB3Dao implements Dao { } public int size() { - StringBuilder sql = new StringBuilder(256); - sql.append("select count(*) from phpbb_posts"); Statement stmt = null; ResultSet rs = null; try { stmt = conn.createStatement(); - rs = stmt.executeQuery(sql.toString()); + rs = stmt.executeQuery("select count(*) from phpbb_posts"); if (rs.next()) { return rs.getInt(1); } diff --git a/source/de/anomic/content/file/SurrogateReader.java b/source/de/anomic/content/file/SurrogateReader.java index 54d84e3e1..9b9b7dd85 100644 --- a/source/de/anomic/content/file/SurrogateReader.java +++ b/source/de/anomic/content/file/SurrogateReader.java @@ -44,8 +44,6 @@ import de.anomic.content.DCEntry; public class SurrogateReader extends DefaultHandler implements Runnable { - public static final DCEntry poison = new DCEntry(); - // class variables private final StringBuilder buffer; private boolean parsingValue; @@ -83,7 +81,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable { e.printStackTrace(); } finally { try { - this.surrogates.put(poison); + this.surrogates.put(DCEntry.poison); } catch (InterruptedException e1) { e1.printStackTrace(); } @@ -170,7 +168,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable { t.start(); DCEntry s; System.out.println("1"); - while ((s = sr.take()) != SurrogateReader.poison) { + while ((s = sr.take()) != DCEntry.poison) { System.out.println("Title: " + s.title()); System.out.println("Date: " + s.date()); System.out.println("URL: " + s.url()); diff --git a/source/de/anomic/kelondro/text/AbstractIndex.java b/source/de/anomic/kelondro/text/AbstractIndex.java index e8abe06e8..ddcc7295d 100644 --- a/source/de/anomic/kelondro/text/AbstractIndex.java +++ b/source/de/anomic/kelondro/text/AbstractIndex.java @@ -28,6 +28,7 @@ package de.anomic.kelondro.text; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; import java.util.Set; import java.util.TreeSet; @@ -85,4 +86,69 @@ public abstract class AbstractIndex implements } return containers; // this may return less containers as demanded } + + + // methods to search in the index + + /** + * collect containers for given word hashes. This collection stops if a single container does not contain any references. + * In that case only a empty result is returned. + * @param wordHashes + * @param urlselection + * @return map of wordhash:indexContainer + */ + public HashMap> searchConjunction(final TreeSet wordHashes, final Set urlselection) { + // first check if there is any entry that has no match; this uses only operations in ram + /* + Iterator i = wordHashes.iterator(); + while (i.hasNext()) { + if (!this.has(i.next())); return new HashMap>(0); + } + */ + // retrieve entities that belong to the hashes + final HashMap> containers = new HashMap>(wordHashes.size()); + byte[] singleHash; + ReferenceContainer singleContainer; + Iterator i = wordHashes.iterator(); + while (i.hasNext()) { + + // get next word hash: + singleHash = i.next(); + + // retrieve index + try { + singleContainer = this.get(singleHash, urlselection); + } catch (IOException e) { + e.printStackTrace(); + continue; + } + + // check result + if ((singleContainer == null || singleContainer.size() == 0)) return new HashMap>(0); + + containers.put(singleHash, singleContainer); + } + return containers; + } + + @SuppressWarnings("unchecked") + public HashMap>[] searchTerm( + final TreeSet queryHashes, + final TreeSet excludeHashes, + final Set urlselection) { + // search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result + + // retrieve entities that belong to the hashes + HashMap> inclusionContainers = + (queryHashes.size() == 0) ? + new HashMap>(0) : + this.searchConjunction(queryHashes, urlselection); + if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < queryHashes.size())) inclusionContainers = new HashMap>(0); // prevent that only a subset is returned + final HashMap> exclusionContainers = + (inclusionContainers.size() == 0) ? + new HashMap>(0) : + this.searchConjunction(excludeHashes, urlselection); + return new HashMap[]{inclusionContainers, exclusionContainers}; + } + } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 1bbf7fa3b..99b9a4fe0 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -119,7 +119,7 @@ public final class plasmaSearchRankingProcess { public void execQuery() { long timer = System.currentTimeMillis(); - this.localSearchContainerMaps = wordIndex.localSearchContainers(query.queryHashes, query.excludeHashes, null); + this.localSearchContainerMaps = wordIndex.index().searchTerm(query.queryHashes, query.excludeHashes, null); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer), false); // join and exclude the local result diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 459492bc0..724c5dac0 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1233,7 +1233,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch index; + private final IndexCell index; private final Log log; private MetadataRepository metadata; private final yacySeedDB peers; @@ -250,7 +250,7 @@ public final class plasmaWordIndex { return this.peers; } - public BufferedIndex index() { + public IndexCell index() { return this.index; } @@ -561,60 +561,6 @@ public final class plasmaWordIndex { return newEntry; } - @SuppressWarnings("unchecked") - public HashMap>[] localSearchContainers( - final TreeSet queryHashes, - final TreeSet excludeHashes, - final Set urlselection) { - // search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result - - // retrieve entities that belong to the hashes - HashMap> inclusionContainers = - (queryHashes.size() == 0) ? - new HashMap>(0) : - getContainers(queryHashes, urlselection); - if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < queryHashes.size())) inclusionContainers = new HashMap>(0); // prevent that only a subset is returned - final HashMap> exclusionContainers = - (inclusionContainers.size() == 0) ? - new HashMap>(0) : - getContainers(excludeHashes, urlselection); - return new HashMap[]{inclusionContainers, exclusionContainers}; - } - - /** - * collect containers for given word hashes. This collection stops if a single container does not contain any references. - * In that case only a empty result is returned. - * @param wordHashes - * @param urlselection - * @return map of wordhash:indexContainer - */ - private HashMap> getContainers(final TreeSet wordHashes, final Set urlselection) { - // retrieve entities that belong to the hashes - final HashMap> containers = new HashMap>(wordHashes.size()); - byte[] singleHash; - ReferenceContainer singleContainer; - final Iterator i = wordHashes.iterator(); - while (i.hasNext()) { - - // get next word hash: - singleHash = i.next(); - - // retrieve index - try { - singleContainer = index.get(singleHash, urlselection); - } catch (IOException e) { - e.printStackTrace(); - continue; - } - - // check result - if ((singleContainer == null || singleContainer.size() == 0)) return new HashMap>(0); - - containers.put(singleHash, singleContainer); - } - return containers; - } - // The Cleaner class was provided as "UrldbCleaner" by Hydrox public synchronized ReferenceCleaner getReferenceCleaner(final byte[] startHash) { return new ReferenceCleaner(startHash);