Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit 9e12fdff23

@ -218,7 +218,7 @@ public class yacysearchitem {
prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject());
final List<String> query = theSearch.query.getQueryGoal().getAllStrings();
final List<String> query = theSearch.query.getQueryGoal().getIncludeStrings();
final StringBuilder s = new StringBuilder(query.size() * 20);
for (final String t: query) {
s.append('+').append(t);

@ -49,18 +49,16 @@ public class QueryGoal {
private String query_original;
private HandleSet include_hashes, exclude_hashes;
private final ArrayList<String> include_words, exclude_words, all_words;
private final ArrayList<String> include_strings, exclude_strings, all_strings;
private final ArrayList<String> include_words, exclude_words;
private final ArrayList<String> include_strings, exclude_strings;
public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes) {
this.query_original = null;
this.include_words = new ArrayList<String>();
this.exclude_words = new ArrayList<String>();
this.all_words = new ArrayList<String>();
this.include_strings = new ArrayList<String>();
this.exclude_strings = new ArrayList<String>();
this.all_strings = new ArrayList<String>();
this.include_hashes = include_hashes;
this.exclude_hashes = exclude_hashes;
}
@ -71,10 +69,8 @@ public class QueryGoal {
this.query_original = query_original;
this.include_words = new ArrayList<String>();
this.exclude_words = new ArrayList<String>();
this.all_words = new ArrayList<String>();
this.include_strings = new ArrayList<String>();
this.exclude_strings = new ArrayList<String>();
this.all_strings = new ArrayList<String>();
// remove funny symbols
query_words = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(query_words.toCharArray())).toLowerCase().trim();
@ -86,11 +82,11 @@ public class QueryGoal {
}
// parse first quoted strings
parseQuery(query_words, this.include_strings, this.exclude_strings, this.all_strings);
parseQuery(query_words, this.include_strings, this.exclude_strings);
// .. end then take these strings apart to generate word lists
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words);
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words, this.all_words);
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words);
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words);
WordCache.learn(this.include_strings);
WordCache.learn(this.exclude_strings);
@ -111,7 +107,7 @@ public class QueryGoal {
* sq = '\''
* dq = '"'
*/
private static void parseQuery(String s, ArrayList<String> include_string, ArrayList<String> exclude_string, ArrayList<String> all_string) {
private static void parseQuery(String s, ArrayList<String> include_string, ArrayList<String> exclude_string) {
while (s.length() > 0) {
// parse query
int p = 0;
@ -135,7 +131,6 @@ public class QueryGoal {
p++; // go behind the stop character (eats up space, sq and dq)
s = p < s.length() ? s.substring(p) : "";
if (string.length() > 0) {
if (!all_string.contains(string)) all_string.add(string);
if (inc) {
if (!include_string.contains(string)) include_string.add(string);
} else {
@ -195,11 +190,7 @@ public class QueryGoal {
for (String e: this.exclude_strings) if (t.indexOf(e.toLowerCase()) >= 0) return false;
return true;
}
public ArrayList<String> getAllStrings() {
return all_strings;
}
public void filterOut(final SortedSet<String> blueList) {
// filter out words that appear in this set
// this is applied to the queryHashes

Loading…
Cancel
Save