diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index f1b709475..f7e58166f 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -449,7 +449,7 @@ public class ViewFile { words = words.substring(1, words.length() - 1); } words = UTF8.decodeURL(words); - if (words.indexOf(' ',0) >= 0) return CommonPattern.SPACE.split(words); + if (words.indexOf(' ',0) >= 0) return CommonPattern.SPACES.split(words); if (words.indexOf(',',0) >= 0) return CommonPattern.COMMA.split(words); if (words.indexOf('+',0) >= 0) return words.split("\\+"); w = new String[1]; diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 14e04092a..3050ebde7 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -164,7 +164,7 @@ public class Vocabulary_p { t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim(); if (t.isEmpty()) continue; if (discoverFromTitleSplitted) { - String[] ts = CommonPattern.SPACE.split(t); + String[] ts = CommonPattern.SPACES.split(t); for (String s: ts) { if (s.isEmpty()) continue; if (s.endsWith(".jpg") || s.endsWith(".gif")) continue; diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index d1250dc9e..f4203772f 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -70,7 +70,7 @@ public class yacysearch_location { int placemarkCounter = 0; if (query.length() > 0 && search_query) { final Set locations = LibraryProvider.geoLoc.find(query, true); - for (final String qp: CommonPattern.SPACE.split(query)) { + for (final String qp: CommonPattern.SPACES.split(query)) { locations.addAll(LibraryProvider.geoLoc.find(qp, true)); } String ip = sb.peers.mySeed().getIP(); diff --git a/pom.xml b/pom.xml index b213b123f..07e035b38 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 1.7 1.7 - 4.10.3 + 5.2.1 9.2.11.v20150529 diff --git a/source/net/yacy/cora/document/feed/RSSMessage.java b/source/net/yacy/cora/document/feed/RSSMessage.java index 340d01e99..4ea98cc6e 100644 --- a/source/net/yacy/cora/document/feed/RSSMessage.java +++ b/source/net/yacy/cora/document/feed/RSSMessage.java @@ -199,7 +199,7 @@ public class RSSMessage implements Hit, Comparable, Comparator= 0) return CommonPattern.COMMA.split(subject); if (subject.indexOf(';') >= 0) return CommonPattern.SEMICOLON.split(subject); - return CommonPattern.SPACE.split(subject); + return CommonPattern.SPACES.split(subject); } @Override diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index bbabed6b5..18758b72e 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -985,7 +985,7 @@ public class MultiProtocolURL implements Serializable, Comparable= 0) t = t.substring(0, p) + t.substring(p + 1); // split the string into tokens and add all camel-case splitting - final String[] u = CommonPattern.SPACE.split(t); + final String[] u = CommonPattern.SPACES.split(t); final Set token = new LinkedHashSet(); for (final String r: u) token.add(r); for (final String r: u) token.addAll(parseCamelCase(r)); diff --git a/source/net/yacy/cora/util/CommonPattern.java b/source/net/yacy/cora/util/CommonPattern.java index a31d0132a..04edb797e 100644 --- a/source/net/yacy/cora/util/CommonPattern.java +++ b/source/net/yacy/cora/util/CommonPattern.java @@ -39,6 +39,7 @@ import java.util.regex.Pattern; public class CommonPattern { public final static Pattern SPACE = Pattern.compile(" "); + public final static Pattern SPACES = Pattern.compile(" +"); // pattern for one or multiple spaces public final static Pattern COMMA = Pattern.compile(","); public final static Pattern SEMICOLON = Pattern.compile(";"); public final static Pattern DOUBLEPOINT = Pattern.compile(":"); diff --git a/source/net/yacy/data/DidYouMean.java b/source/net/yacy/data/DidYouMean.java index c33aecb34..eea995d9e 100644 --- a/source/net/yacy/data/DidYouMean.java +++ b/source/net/yacy/data/DidYouMean.java @@ -305,7 +305,7 @@ public class DidYouMean { s = snippet + (afterSnippet.length() > 0 ? " " + afterSnippet : ""); for (int i = 0; i < s.length(); i++) {char c = s.charAt(i); if (c < 'A') s = s.replace(c, ' ');} // remove funny symbols s = s.replaceAll("", " ").replaceAll("", " ").replaceAll(" ", " ").trim(); // wipe superfluous whitespace - String[] sx = CommonPattern.SPACE.split(s); + String[] sx = CommonPattern.SPACES.split(s); StringBuilder sb = new StringBuilder(s.length()); for (String x: sx) if (x.length() > 1 && sb.length() < 28) sb.append(x).append(' '); else break; s = sb.toString().trim(); diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 304d790c7..502555c13 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -284,7 +284,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri add(doc, CollectionSchema.title_chars_val, cv); } if (allAttr || contains(CollectionSchema.title_words_val)) { - Integer[] cv = new Integer[]{new Integer(CommonPattern.SPACE.split(title).length)}; + Integer[] cv = new Integer[]{new Integer(CommonPattern.SPACES.split(title).length)}; add(doc, CollectionSchema.title_words_val, cv); } @@ -297,7 +297,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri add(doc, CollectionSchema.description_chars_val, description_exist ? new Integer[]{new Integer(description.length())} : new Integer[0]); } if (allAttr || contains(CollectionSchema.description_words_val)) { - add(doc, CollectionSchema.description_words_val, description_exist ? new Integer[]{new Integer(description.length() == 0 ? 0 : CommonPattern.SPACE.split(description).length)} : new Integer[0]); + add(doc, CollectionSchema.description_words_val, description_exist ? new Integer[]{new Integer(description.length() == 0 ? 0 : CommonPattern.SPACES.split(description).length)} : new Integer[0]); } String keywords = md.dc_subject(); @@ -460,7 +460,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } if (allAttr || contains(CollectionSchema.title_words_val)) { ArrayList cv = new ArrayList(titles.size()); - for (String s: titles) cv.add(new Integer(CommonPattern.SPACE.split(s).length)); + for (String s: titles) cv.add(new Integer(CommonPattern.SPACES.split(s).length)); add(doc, CollectionSchema.title_words_val, cv); } @@ -479,7 +479,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } if (allAttr || contains(CollectionSchema.description_words_val)) { ArrayList cv = new ArrayList(descriptions.length); - for (String s: descriptions) cv.add(new Integer(CommonPattern.SPACE.split(s).length)); + for (String s: descriptions) cv.add(new Integer(CommonPattern.SPACES.split(s).length)); add(doc, CollectionSchema.description_words_val, cv); }