using more pre-compile pattern for split methods

pull/1/head
Michael Peter Christen 12 years ago
parent d48e9788d2
commit 8fc3679c66

@ -1,6 +1,7 @@
import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.BookmarkHelper;
@ -51,12 +52,12 @@ public class get_folders {
// loop through folderList
it = BookmarkHelper.getFolderList(root, sb.bookmarksDB.getTagIterator(isAdmin));
int n = root.split("/").length;
int n = Pattern.compile("/").split(root, 0).length;
if (n == 0) n = 1;
int count = 0;
while (it.hasNext()) {
final String folder = it.next();
foldername = folder.split("/");
foldername = Pattern.compile("/").split(folder, 0);
if (foldername.length == n+1) {
prop.put("folders_"+count+"_foldername", foldername[n]);
prop.put("folders_"+count+"_expanded", "false");

@ -2,6 +2,7 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.UserDB;
@ -91,7 +92,7 @@ public class get_metadata {
}
public static int putTags(final String tagString, final String var) {
final String list[] = tagString.split(YMarkUtil.TAGS_SEPARATOR);
final String list[] = Pattern.compile(YMarkUtil.TAGS_SEPARATOR).split(tagString, 0);
int count = 0;
for (final String element : list) {
final String tag = element;

@ -4,6 +4,7 @@ import java.util.Date;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.regex.Pattern;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
@ -96,7 +97,7 @@ public class get_treeview {
} catch (final IOException e) {
Log.logException(e);
}
int n = root.split(YMarkUtil.FOLDERS_SEPARATOR).length;
int n = Pattern.compile(YMarkUtil.FOLDERS_SEPARATOR).split(root, 0).length;
if (n == 0) n = 1;
while (it.hasNext()) {
final String folder = it.next();

@ -53,6 +53,7 @@ import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.ftp.FTPClient;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
/**
* MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
@ -729,7 +730,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
}
public String[] getPaths() {
return this.path == null ? null : this.path.charAt(0) == '/' ? this.path.substring(1).split("/") : this.path.split("/");
return this.path == null ? null : this.path.charAt(0) == '/' ? CommonPattern.SLASH.split(this.path.substring(1)) : CommonPattern.SLASH.split(this.path);
}
/**
@ -805,7 +806,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public Map<String, String> getSearchpartMap() {
if (this.searchpart == null) return null;
this.searchpart = this.searchpart.replaceAll("&amp;", "&");
String[] parts = this.searchpart.split("&");
String[] parts = CommonPattern.AMP.split(this.searchpart);
Map<String, String> map = new LinkedHashMap<String, String>();
for (String part: parts) {
int p = part.indexOf('=');
@ -846,7 +847,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
while ((p = t.indexOf(" ",0)) >= 0) t = t.substring(0, p) + t.substring(p + 1);
// split the string into tokens and add all camel-case splitting
final String[] u = t.split(" ");
final String[] u = CommonPattern.SPACE.split(t);
final Map<String, Object> token = new LinkedHashMap<String, Object>();
for (final String r: u) {
token.putAll(parseCamelCase(r));

@ -38,6 +38,7 @@ import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.lod.vocabulary.Geo;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern;
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {
@ -179,9 +180,9 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
@Override
public String[] getSubject() {
final String subject = Token.subject.valueFrom(this.map, "");
if (subject.indexOf(',') >= 0) return subject.split(",");
if (subject.indexOf(';') >= 0) return subject.split(";");
return subject.split(" ");
if (subject.indexOf(',') >= 0) return CommonPattern.COMMA.split(subject);
if (subject.indexOf(';') >= 0) return CommonPattern.SEMICOLON.split(subject);
return CommonPattern.SPACE.split(subject);
}
@Override

@ -25,6 +25,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import java.util.Properties;
import java.util.Set;
@ -104,7 +105,7 @@ public class Classification {
private static void addSet(Set<String> set, final String extString) {
if ((extString == null) || (extString.isEmpty())) return;
for (String s: extString.split(",")) set.add(s.toLowerCase().trim());
for (String s: Pattern.compile(",").split(extString, 0)) set.add(s.toLowerCase().trim());
}
public static boolean isTextExtension(String textExt) {

@ -32,6 +32,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern;
import net.yacy.peers.operation.yacyVersion;
import net.yacy.search.Switchboard;
@ -105,7 +106,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
public String sort = null, action = null, direction = null, mode = null, format = null;
public Sort(String d) {
this.sort = d;
String[] s = d.split(":");
String[] s = CommonPattern.DOUBLEPOINT.split(d);
if (s.length < 1) return;
this.action = s[0]; // date
this.direction = s.length > 1 ? s[1] : "D"; // A or D
@ -309,7 +310,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
}
public static String highlight(String text, String query) {
String[] q = query.trim().toLowerCase().replaceAll(Pattern.quote("+"), " ").split(" ");
String[] q = CommonPattern.SPACE.split(CommonPattern.PLUS.matcher(query.trim().toLowerCase()).replaceAll(" "));
for (String s: q) {
int p = text.toLowerCase().indexOf(s.toLowerCase());
if (p < 0) continue;

@ -32,6 +32,7 @@ import java.util.concurrent.BlockingQueue;
import org.apache.log4j.Logger;
import net.yacy.cora.storage.Files;
import net.yacy.cora.util.CommonPattern;
/**
* Stemming library: reads stemming files and creates a mapping from words to synonyms
@ -59,7 +60,7 @@ public class SynonymLibrary {
if (line.length() == 0 || line.charAt(0) == '#') continue;
if (line.charAt(line.length() - 1) == '}') line = line.substring(0, line.length() - 1);
if (line.charAt(0) == '{') line = line.substring(1);
String[] words = line.split(",");
String[] words = CommonPattern.COMMA.split(line);
Set<String> synonyms = new HashSet<String>();
Set<String> keys = new HashSet<String>();
for (String s: words) {

@ -56,6 +56,7 @@ import net.yacy.cora.plugin.ClassProvider;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.storage.KeyList;
import net.yacy.cora.util.CommonPattern;
import com.google.common.net.InetAddresses;
import com.google.common.util.concurrent.SimpleTimeLimiter;
@ -661,7 +662,7 @@ public class Domains {
}
public static List<Pattern> makePatterns(final String patternList) throws PatternSyntaxException {
final String[] entries = (patternList != null) ? patternList.split(",") : new String[0];
final String[] entries = (patternList != null) ? CommonPattern.COMMA.split(patternList) : new String[0];
final List<Pattern> patterns = new ArrayList<Pattern>(entries.length);
for (final String entry : entries) {
patterns.add(Pattern.compile(entry.trim()));
@ -826,14 +827,13 @@ public class Domains {
try {globalHosts.clear();} catch (IOException e) {}
}
private final static Pattern dotPattern = Pattern.compile("\\.");
public static final InetAddress parseInetAddress(String ip) {
if (ip == null || ip.length() < 8) return null;
ip = ip.trim();
if (ip.charAt(0) == '[' && ip.charAt(ip.length() - 1) == ']') ip = ip.substring(1, ip.length() - 1);
if (isLocalhost(ip)) ip = "127.0.0.1"; // normalize to IPv4 here since that is the way to calculate the InetAddress
final String[] ips = dotPattern.split(ip);
final String[] ips = CommonPattern.DOT.split(ip);
if (ips.length != 4) return null;
final byte[] ipb = new byte[4];
try {

@ -42,6 +42,7 @@ import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.NumberTools;
@ -423,7 +424,7 @@ public class HeaderFramework extends TreeMap<String, String> implements Map<Stri
final String mimeType = mime();
if (mimeType == null) return null;
final String[] parts = mimeType.split(";");
final String[] parts = CommonPattern.SEMICOLON.split(mimeType);
if (parts == null || parts.length <= 1) return null;
for (int i=1; i < parts.length; i++) {

@ -0,0 +1,51 @@
/**
* CommonPattern
* Copyright 2012 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First published 26.11.2012 on http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.util;
import java.util.regex.Pattern;
/**
* This class provides Pattern constants to be used
* to replace a regex in s.split(regex) method calls.
* Because s.split(regex) causes an execution of
* Pattern.compile(regex).split(s, 0), it is wise to pre-compile
* all regex to a pattern p.
* Therefore do the following: transform your code into
* Pattern p = Pattern.compile(regex); p.split(s);
* The compilation of a specific pattern should be done only once.
* Therefore this class provides Pattern objects for the most common regex Strings.
*
* The same applies to s.replaceall(regex, replacement) which is equal to
* Pattern.compile(regex).matcher(s).replaceAll(replacement);
*/
public class CommonPattern {
public final static Pattern SPACE = Pattern.compile(" ");
public final static Pattern COMMA = Pattern.compile(",");
public final static Pattern SEMICOLON = Pattern.compile(";");
public final static Pattern DOUBLEPOINT = Pattern.compile(":");
public final static Pattern SLASH = Pattern.compile("/");
public final static Pattern AMP = Pattern.compile("&");
public final static Pattern PLUS = Pattern.compile(Pattern.quote("+"));
public final static Pattern DOT = Pattern.compile("\\.");
public final static Pattern NEWLINE = Pattern.compile("\n");
}

@ -38,6 +38,7 @@ import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.ByteArray;
@ -81,7 +82,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
* @return list of host hashes without separation
*/
public static String hosthashes(final String hostlist) {
String[] hs = hostlist.split(",");
String[] hs = CommonPattern.COMMA.split(hostlist);
StringBuilder sb = new StringBuilder(hostlist.length());
for (String h: hs) {
if (h == null) continue;

@ -48,6 +48,7 @@ import net.yacy.cora.federate.yacy.ConfigurationSet;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser;
@ -223,7 +224,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
add(doc, YaCySchema.title_chars_val, cv);
}
if (allAttr || contains(YaCySchema.title_words_val)) {
Integer[] cv = new Integer[]{new Integer(title.split(" ").length)};
Integer[] cv = new Integer[]{new Integer(CommonPattern.SPACE.split(title).length)};
add(doc, YaCySchema.title_words_val, cv);
}
@ -235,7 +236,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
add(doc, YaCySchema.description_chars_val, cv);
}
if (allAttr || contains(YaCySchema.description_words_val)) {
Integer[] cv = new Integer[]{new Integer(description.split(" ").length)};
Integer[] cv = new Integer[]{new Integer(CommonPattern.SPACE.split(description).length)};
add(doc, YaCySchema.description_words_val, cv);
}
@ -353,13 +354,13 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
if (allAttr || contains(YaCySchema.title_words_val)) {
ArrayList<Integer> cv = new ArrayList<Integer>(titles.size());
for (String s: titles) cv.add(new Integer(s.split(" ").length));
for (String s: titles) cv.add(new Integer(CommonPattern.SPACE.split(s).length));
add(doc, YaCySchema.title_words_val, cv);
}
String description = document.dc_description();
List<String> descriptions = new ArrayList<String>();
for (String s: description.split("\n")) descriptions.add(s);
for (String s: CommonPattern.NEWLINE.split(description)) descriptions.add(s);
if (allAttr || contains(YaCySchema.description)) add(doc, YaCySchema.description, description);
if (allAttr || contains(YaCySchema.description_count_i)) add(doc, YaCySchema.description_count_i, descriptions.size());
if (allAttr || contains(YaCySchema.description_chars_val)) {
@ -369,7 +370,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
if (allAttr || contains(YaCySchema.description_words_val)) {
ArrayList<Integer> cv = new ArrayList<Integer>(descriptions.size());
for (String s: descriptions) cv.add(new Integer(s.split(" ").length));
for (String s: descriptions) cv.add(new Integer(CommonPattern.SPACE.split(s).length));
add(doc, YaCySchema.description_words_val, cv);
}
@ -680,7 +681,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
inboundlinksRel.add(rel.length() > 0 ? rel : "");
inboundlinksText.add(text.length() > 0 ? text : "");
inboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
inboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
inboundlinksTextWords.add(text.length() > 0 ? CommonPattern.SPACE.split(text).length : 0);
inboundlinksTag.add(
"<a href=\"" + url.toNormalform(false) + "\"" +
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +
@ -728,7 +729,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
outboundlinksRel.add(rel.length() > 0 ? rel : "");
outboundlinksText.add(text.length() > 0 ? text : "");
outboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
outboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
outboundlinksTextWords.add(text.length() > 0 ? CommonPattern.SPACE.split(text).length : 0);
outboundlinksTag.add(
"<a href=\"" + url.toNormalform(false) + "\"" +
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +

@ -48,6 +48,7 @@ import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
@ -360,7 +361,7 @@ public final class RankingProcess extends Thread {
for (Map.Entry<String, String> v: this.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
String[] tags = ni.next().toString().split(",");
String[] tags = CommonPattern.COMMA.split(ni.next().toString());
for (String tag: tags) {
ScoreMap<String> voc = this.vocabularyNavigator.get(v.getKey());
if (voc == null) {

@ -60,6 +60,7 @@ import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.Element;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.WorkTables;
import net.yacy.document.Condenser;
@ -554,7 +555,7 @@ public final class SearchEvent {
for (Map.Entry<String, String> v: this.rankingProcess.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
String[] tags = ni.next().toString().split(",");
String[] tags = CommonPattern.COMMA.split(ni.next().toString());
for (String tag: tags) {
ScoreMap<String> voc = this.rankingProcess.vocabularyNavigator.get(v.getKey());
if (voc == null) {

@ -101,6 +101,7 @@ import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.NumberTools;
import net.yacy.data.UserDB;
import net.yacy.document.parser.htmlParser;
@ -792,9 +793,7 @@ public final class HTTPDFileHandler {
int indexOfDelimiter = processOutput.indexOf("\n\n", 0);
final String[] cgiHeader;
if (indexOfDelimiter > -1) {
cgiHeader =
processOutput.substring(
0, indexOfDelimiter).split("\n");
cgiHeader = CommonPattern.NEWLINE.split(processOutput.substring(0, indexOfDelimiter));
} else {
cgiHeader = new String[0];
}
@ -1166,7 +1165,7 @@ public final class HTTPDFileHandler {
final String rangeHeaderVal = requestHeader.get(HeaderFramework.RANGE).trim();
if (rangeHeaderVal.startsWith("bytes=")) {
final String rangesVal = rangeHeaderVal.substring("bytes=".length());
final String[] ranges = rangesVal.split(",");
final String[] ranges = CommonPattern.COMMA.split(rangesVal);
if ((ranges.length == 1)&&(ranges[0].endsWith("-"))) {
rangeStartOffset = NumberTools.parseIntDecSubstring(ranges[0], 0, ranges[0].length() - 1);
statusCode = 206;
@ -1302,7 +1301,7 @@ public final class HTTPDFileHandler {
try {
line = br.readLine();
if (line.startsWith("#!")) {
ret.addAll(Arrays.asList(line.substring(2).split(" ")));
ret.addAll(Arrays.asList(CommonPattern.SPACE.split(line.substring(2))));
}
ret.add(targetFile.getAbsolutePath());
} catch (IOException e) {
@ -1457,7 +1456,7 @@ public final class HTTPDFileHandler {
boolean ret = false;
if (suffixList != null && name != null) {
final String[] suffixes = suffixList.split(",");
final String[] suffixes = CommonPattern.COMMA.split(suffixList);
find:
for (int i = 0; i < suffixes.length; i++) {
if (name.endsWith("." + suffixes[i].trim())) {

@ -28,6 +28,7 @@
package net.yacy.server.http;
import net.yacy.cora.util.CommonPattern;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverSwitch;
@ -86,7 +87,7 @@ public final class RobotsTxtConfig {
public static RobotsTxtConfig init(final serverSwitch env) {
final String cfg = env.getConfig(SwitchboardConstants.ROBOTS_TXT, SwitchboardConstants.ROBOTS_TXT_DEFAULT);
if (cfg == null) return new RobotsTxtConfig();
return new RobotsTxtConfig(cfg.split(","));
return new RobotsTxtConfig(CommonPattern.COMMA.split(cfg));
}
@Override

@ -58,6 +58,7 @@ import javax.crypto.spec.PBEParameterSpec;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.logging.Log;
@ -187,7 +188,7 @@ public class cryptbig {
final Set<?> keys = provider.keySet();
for (Object name : keys) {
String key = (String) name;
key = key.split(" ")[0];
key = CommonPattern.SPACE.split(key)[0];
if (key.startsWith(serviceType + ".")) {
result.add(key.substring(serviceType.length() + 1));
} else if (key.startsWith("Alg.Alias." + serviceType + ".")) {

Loading…
Cancel
Save