diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 116f87dbc..c5ac42ae5 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -641,7 +641,7 @@ public final class RankingProcess extends Thread { // take out relevant information for reference computation if ((resultEntry.url() == null) || (resultEntry.title() == null)) return; //final String[] urlcomps = htmlFilterContentScraper.urlComps(resultEntry.url().toNormalform(true, true)); // word components of the url - final String[] descrcomps = resultEntry.title().toLowerCase().split(DigestURI.splitrex); // words in the description + final String[] descrcomps = DigestURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description // add references //addTopic(urlcomps); diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 74608aa7d..849602b2f 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -373,7 +373,7 @@ public class ResultFetcher { // apply 'common-sense' heuristic using references final String urlstring = rentry.url().toNormalform(true, true); final String[] urlcomps = DigestURI.urlComps(urlstring); - final String[] descrcomps = rentry.title().toLowerCase().split(DigestURI.splitrex); + final String[] descrcomps = DigestURI.splitpattern.split(rentry.title().toLowerCase()); Integer tc; for (int j = 0; j < urlcomps.length; j++) { tc = topwords.get(urlcomps[j]); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 299a96ee8..5c4fdc9c2 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -616,7 +616,7 @@ public final class yacyClient { final String references = result.get("references"); yacyCore.log.logInfo("remote search (client): peer " + target.getName() + " sent references " + references); if (references != null) { - // add references twice, so they can be countet (must have at least 2 entries) + // add references twice, so they can be counted (must have at least 2 entries) containerCache.addTopic(references.split(",")); containerCache.addTopic(references.split(",")); } diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 2d5e6faf3..63e3bb037 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -402,7 +402,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { if (s == null) s = metas.get("dc.description"); if (s == null) s = ""; if (s.length() == 0) { - return getTitle().toLowerCase().split(DigestURI.splitrex); + return DigestURI.splitpattern.split(getTitle().toLowerCase()); } if (s.contains(",")) return s.split(" |,"); if (s.contains(";")) return s.split(" |;"); diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index fa778d05a..7f164e0ea 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -938,11 +938,12 @@ public class DigestURI implements Serializable { return language; } - public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"'; + private static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"'; + public static final Pattern splitpattern = Pattern.compile(splitrex); public static String[] urlComps(String normalizedURL) { final int p = normalizedURL.indexOf("//"); if (p > 0) normalizedURL = normalizedURL.substring(p + 2); - return normalizedURL.toLowerCase().split(splitrex); // word components of the url + return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url } public static void main(final String[] args) {