- fixed problem with remote search NPE

- more abstraction for search requests

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6015 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 9e18abc2ac
commit cc49aedf12

@ -31,6 +31,7 @@ import java.util.TreeSet;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.Segment;
import de.anomic.kelondro.text.TermSearch;
import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.plasma.plasmaSearchQuery;
@ -78,13 +79,14 @@ public final class timeline {
// retrieve index containers
//yacyCore.log.logInfo("INIT TIMELINE SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(query[0]) + " - " + count + " links");
// get the index container with the result vector
final ReferenceContainer<WordReference> index = sb.indexSegment.termIndex().query(
// get the index container with the result vector
final TermSearch<WordReference> search = sb.indexSegment.termIndex().query(
q,
Word.words2hashes(query[1]),
null,
Segment.wordReferenceFactory,
maxdist);
ReferenceContainer<WordReference> index = search.joined();
Iterator<WordReference> i = index.entries();
WordReference entry;

@ -122,40 +122,13 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
return containers;
}
@SuppressWarnings("unchecked")
public HashMap<byte[], ReferenceContainer<ReferenceType>>[] searchTerm(
final TreeSet<byte[]> queryHashes,
final TreeSet<byte[]> excludeHashes,
final Set<String> urlselection) {
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
// retrieve entities that belong to the hashes
HashMap<byte[], ReferenceContainer<ReferenceType>> inclusionContainers =
(queryHashes.size() == 0) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
this.searchConjunction(queryHashes, urlselection);
if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < queryHashes.size())) inclusionContainers = new HashMap<byte[], ReferenceContainer<ReferenceType>>(0); // prevent that only a subset is returned
final HashMap<byte[], ReferenceContainer<ReferenceType>> exclusionContainers =
(inclusionContainers.size() == 0) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
this.searchConjunction(excludeHashes, urlselection);
return new HashMap[]{inclusionContainers, exclusionContainers};
}
public ReferenceContainer<ReferenceType> query(
public TermSearch<ReferenceType> query(
final TreeSet<byte[]> queryHashes,
final TreeSet<byte[]> excludeHashes,
final Set<String> urlselection,
ReferenceFactory<ReferenceType> termFactory,
int maxDistance) {
HashMap<byte[], ReferenceContainer<ReferenceType>>[] containerMaps = searchTerm(queryHashes, excludeHashes, urlselection);
// join and exclude the result
return ReferenceContainer.joinExcludeContainers(
termFactory,
containerMaps[0].values(),
containerMaps[1].values(),
maxDistance);
return new TermSearch<ReferenceType>(this, queryHashes, excludeHashes, urlselection, termFactory, maxDistance);
}
}

@ -29,6 +29,7 @@
package de.anomic.kelondro.text;
import java.io.IOException;
import java.util.HashMap;
import java.util.Set;
import java.util.TreeSet;
@ -131,7 +132,16 @@ public interface Index <ReferenceType extends Reference> {
boolean rot,
int count
) throws IOException;
/**
* collect containers for given word hashes. This collection stops if a single container does not contain any references.
* In that case only a empty result is returned.
* @param wordHashes
* @param urlselection
* @return map of wordhash:indexContainer
*/
public HashMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final TreeSet<byte[]> wordHashes, final Set<String> urlselection);
/**
* delete all references entries
* @throws IOException

@ -0,0 +1,81 @@
// TermSearch.java
// ---------------
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 3.6.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.text;
import java.util.HashMap;
import java.util.Set;
import java.util.TreeSet;
public class TermSearch <ReferenceType extends Reference> {
private ReferenceContainer<ReferenceType> joinResult;
HashMap<byte[], ReferenceContainer<ReferenceType>> inclusionContainers, exclusionContainers;
public TermSearch(
Index<ReferenceType> base,
final TreeSet<byte[]> queryHashes,
final TreeSet<byte[]> excludeHashes,
final Set<String> urlselection,
ReferenceFactory<ReferenceType> termFactory,
int maxDistance) {
this.inclusionContainers =
(queryHashes.size() == 0) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
base.searchConjunction(queryHashes, urlselection);
if ((inclusionContainers.size() != 0) &&
(inclusionContainers.size() < queryHashes.size()))
inclusionContainers = new HashMap<byte[], ReferenceContainer<ReferenceType>>(0); // prevent that only a subset is returned
this.exclusionContainers =
(inclusionContainers.size() == 0) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
base.searchConjunction(excludeHashes, urlselection);
// join and exclude the result
this.joinResult = ReferenceContainer.joinExcludeContainers(
termFactory,
inclusionContainers.values(),
exclusionContainers.values(),
maxDistance);
}
public ReferenceContainer<ReferenceType> joined() {
return this.joinResult;
}
public HashMap<byte[], ReferenceContainer<ReferenceType>> inclusion() {
return this.inclusionContainers;
}
public HashMap<byte[], ReferenceContainer<ReferenceType>> exclusion() {
return this.exclusionContainers;
}
}

@ -187,9 +187,8 @@ public final class plasmaSearchEvent {
int maxcount = -1;
long mindhtdistance = Long.MAX_VALUE, l;
byte[] wordhash;
assert this.rankedCache.searchContainerMaps() != null;
assert this.rankedCache.searchContainerMaps()[0] != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankedCache.searchContainerMaps()[0].entrySet()) {
assert this.rankedCache.searchContainerMap() != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankedCache.searchContainerMap().entrySet()) {
wordhash = entry.getKey();
final ReferenceContainer container = entry.getValue();
assert (container.getTermHash().equals(wordhash));
@ -206,7 +205,7 @@ public final class plasmaSearchEvent {
IACount.put(wordhash, Integer.valueOf(container.size()));
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
}
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "abstract generation", this.rankedCache.searchContainerMaps()[0].size(), System.currentTimeMillis() - timer), false);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "abstract generation", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
}
}

@ -45,6 +45,7 @@ import de.anomic.kelondro.text.Reference;
import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.ReferenceOrder;
import de.anomic.kelondro.text.Segment;
import de.anomic.kelondro.text.TermSearch;
import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow;
import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.text.referencePrototype.WordReferenceVars;
@ -73,7 +74,7 @@ public final class plasmaSearchRankingProcess {
private final int[] flagcount; // flag counter
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
private final Segment indexSegment;
private HashMap<byte[], ReferenceContainer<WordReference>>[] localSearchContainerMaps;
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private final int[] domZones;
private final ConcurrentHashMap<String, hoststat> hostNavigator;
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
@ -86,7 +87,7 @@ public final class plasmaSearchRankingProcess {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchContainerMaps = null;
this.localSearchInclusion = null;
this.stack = new SortStack<WordReferenceVars>(maxentries);
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashMap<String, String>();
@ -119,12 +120,14 @@ public final class plasmaSearchRankingProcess {
public void execQuery() {
long timer = System.currentTimeMillis();
final ReferenceContainer<WordReference> index = this.indexSegment.termIndex().query(
final TermSearch<WordReference> search = this.indexSegment.termIndex().query(
query.queryHashes,
query.excludeHashes,
null,
Segment.wordReferenceFactory,
query.maxDistance);
this.localSearchInclusion = search.inclusion();
final ReferenceContainer<WordReference> index = search.joined();
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer), false);
if (index.size() == 0) {
return;
@ -248,10 +251,10 @@ public final class plasmaSearchRankingProcess {
return false;
}
public Map<byte[], ReferenceContainer<WordReference>>[] searchContainerMaps() {
public Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
// direct access to the result maps is needed for abstract generation
// this is only available if execQuery() was called before
return localSearchContainerMaps;
return localSearchInclusion;
}
// todo:

Loading…
Cancel
Save