enhanced did-you-mean

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7300 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 43586a2ace
commit becc463d8a

@ -2,9 +2,9 @@
cd "`dirname $0`"
port=$(grep ^port= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
if which curl &>/dev/null; then
curl -s "http://localhost:$port/yacysearch.rss?resource=local&verify=false&query=$1" | awk '/^<link>/{ gsub("<link>","" );gsub("<\/link>","" ); print $0 }'
curl -s "http://localhost:$port/suggest.json?resource=local&verify=false&query=$1"
elif which wget &>/dev/null; then
wget -q -O - "http://localhost:$port/yacysearch.rss?resource=local&verify=false&query=$1" | awk '/^<link>/{ gsub("<link>","" );gsub("<\/link>","" ); print $0 }'
wget -q -O - "http://localhost:$port/suggest.json?resource=local&verify=false&query=$1"
else
echo "Neither curl nor wget installed!"
exit 1

@ -50,13 +50,13 @@ public class DidYouMean {
for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
}
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static char[] alphabet = ALPHABET_LATIN;
private static final String POISON_STRING = "\n";
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
private final IndexCell<WordReference> index;
private char[] alphabet;
private final String word;
private final int wordLen;
private final LinkedBlockingQueue<String> guessGen, guessLib;
@ -81,24 +81,23 @@ public class DidYouMean {
this.INDEX_SIZE_COMPARATOR = new indexSizeComparator();
// identify language
if (this.word.length() == 0) {
this.alphabet = ALPHABET_LATIN;
} else {
if (this.word.length() > 0) {
char testchar = this.word.charAt(0);
this.alphabet = null;
boolean alphafound = false;
alphatest: for (char[] alpha: ALPHABETS) {
if (isAlphabet(alpha, testchar)) {
this.alphabet = alpha;
alphabet = alpha;
alphafound = true;
break alphatest;
}
}
if (this.alphabet == null) {
if (!alphafound) {
// generate generic alphabet using simply a character block of 256 characters
char firstchar = (char) ((0xff & (testchar / 256)) * 256);
char lastchar = (char) (firstchar + 255);
this.alphabet = new char[256];
alphabet = new char[256];
for (char a = firstchar; a <= lastchar; a++) {
this.alphabet[0xff & (a - firstchar)] = a;
alphabet[0xff & (a - firstchar)] = a;
}
}
}
@ -129,7 +128,12 @@ public class DidYouMean {
long timelimit = startTime + timeout;
if (this.word.indexOf(' ') > 0) return getSuggestions(this.word.split(" "), timeout, preSortSelection, this.index);
SortedSet<String> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) return preSorted;
if (System.currentTimeMillis() > timelimit) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
+ (System.currentTimeMillis() - startTime) + "ms");
return preSorted;
}
DynamicScore<String> scored = new ScoreCluster<String>();
for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
@ -146,8 +150,13 @@ public class DidYouMean {
}
// finished
Log.logInfo("DidYouMean", "found " + preSorted.size() + " terms, returned " + countSorted.size() + " suggestions; execution time: "
+ (System.currentTimeMillis() - startTime) + "ms" + " - remaining queue size: " + guessLib.size());
if (countSorted.size() == 0) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (2); execution time: "
+ (System.currentTimeMillis() - startTime) + "ms");
return preSorted;
}
Log.logInfo("DidYouMean", "found " + preSorted.size() + " unsorted terms, returned " + countSorted.size() + " sorted suggestions; execution time: "
+ (System.currentTimeMillis() - startTime) + "ms");
return countSorted;
}

@ -32,6 +32,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@ -143,10 +144,12 @@ public class DidYouMeanLibrary {
}
SortedMap<String, IntScore> u = this.commonWords.tailMap(string);
String vv;
for (final Map.Entry<String, IntScore> v: u.entrySet()) {
vv = v.getKey();
if (vv.startsWith(string) && vv.length() > string.length()) ret.add(vv); else break;
}
try {
for (final Map.Entry<String, IntScore> v: u.entrySet()) {
vv = v.getKey();
if (vv.startsWith(string) && vv.length() > string.length()) ret.add(vv); else break;
}
} catch (ConcurrentModificationException e) {}
string = reverse(string);
t = this.tcid.tailSet(string);
for (final String r: t) {

@ -522,14 +522,14 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
public synchronized boolean containsKey(byte[] key) {
public boolean containsKey(byte[] key) {
blobItem bi = keeperOf(key);
return bi != null;
//for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
//return false;
}
public synchronized blobItem keeperOf(final byte[] key) {
public blobItem keeperOf(final byte[] key) {
// because the index is stored only in one table,
// and the index is completely in RAM, a concurrency will create
// not concurrent File accesses
@ -538,41 +538,43 @@ public class ArrayStack implements BLOB {
// start a concurrent query to database tables
final CompletionService<blobItem> cs = new ExecutorCompletionService<blobItem>(executor);
int accepted = 0;
for (final blobItem bi : blobs) {
synchronized (this) {
for (final blobItem bi : blobs) {
try {
cs.submit(new Callable<blobItem>() {
public blobItem call() {
if (bi.blob.containsKey(key)) return bi;
return null;
}
});
accepted++;
} catch (final RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency
if (bi.blob.containsKey(key)) return bi;
}
}
// read the result
try {
cs.submit(new Callable<blobItem>() {
public blobItem call() {
if (bi.blob.containsKey(key)) return bi;
return null;
for (int i = 0; i < accepted; i++) {
final Future<blobItem> f = cs.take();
//hash(System.out.println("**********accepted = " + accepted + ", i =" + i);
if (f == null) continue;
final blobItem index = f.get();
if (index != null) {
//System.out.println("*DEBUG SplitTable success.time = " + (System.currentTimeMillis() - start) + " ms");
return index;
}
});
accepted++;
} catch (final RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency
if (bi.blob.containsKey(key)) return bi;
}
}
// read the result
try {
for (int i = 0; i < accepted; i++) {
final Future<blobItem> f = cs.take();
//hash(System.out.println("**********accepted = " + accepted + ", i =" + i);
if (f == null) continue;
final blobItem index = f.get();
if (index != null) {
//System.out.println("*DEBUG SplitTable success.time = " + (System.currentTimeMillis() - start) + " ms");
return index;
}
//System.out.println("*DEBUG SplitTable fail.time = " + (System.currentTimeMillis() - start) + " ms");
return null;
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
} catch (final ExecutionException e) {
Log.logSevere("ArrayStack", "", e);
throw new RuntimeException(e.getCause());
}
//System.out.println("*DEBUG SplitTable fail.time = " + (System.currentTimeMillis() - start) + " ms");
return null;
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
} catch (final ExecutionException e) {
Log.logSevere("ArrayStack", "", e);
throw new RuntimeException(e.getCause());
}
//System.out.println("*DEBUG SplitTable fail.time = " + (System.currentTimeMillis() - start) + " ms");
return null;

Loading…
Cancel
Save