removed synchronization and concurrency in Fulltext class, concurrent

deletions are now handled in ConcurrentUpdateSolrConnector
pull/1/head
Michael Peter Christen 12 years ago
parent f965d04496
commit b24d1d18e4

@ -124,7 +124,7 @@ public class CrawlResults {
if (post.containsKey("deletedomain")) {
final String domain = post.get("domain", null);
if (domain != null) {
sb.index.fulltext().deleteDomainHostname(domain, null, false);
sb.index.fulltext().deleteDomainHostname(domain, null);
ResultURLs.deleteDomain(tabletype, domain);
}
}

@ -301,7 +301,7 @@ public class Crawler_p {
siteFilter = CrawlProfile.siteFilter(rootURLs);
if (deleteold) {
for (DigestURI u: rootURLs) {
sb.index.fulltext().deleteDomainHashpart(u.hosthash(), deleteageDate, rootURLs.size() > 1);
sb.index.fulltext().deleteDomainHashpart(u.hosthash(), deleteageDate);
}
}
} else if (subPath) {
@ -310,7 +310,7 @@ public class Crawler_p {
for (DigestURI u: rootURLs) {
String basepath = u.toNormalform(true);
if (!basepath.endsWith("/")) {int p = basepath.lastIndexOf("/"); if (p > 0) basepath = basepath.substring(0, p + 1);}
int count = sb.index.fulltext().remove(basepath, deleteageDate, rootURLs.size() > 1);
int count = sb.index.fulltext().remove(basepath, deleteageDate);
if (count > 0) Log.logInfo("Crawler_p", "deleted " + count + " documents for host " + u.getHost());
}
}

@ -279,7 +279,7 @@ public class IndexControlURLs_p {
if (post.containsKey("deletedomain")) {
final String domain = post.get("domain");
segment.fulltext().deleteDomainHostname(domain, null, false);
segment.fulltext().deleteDomainHostname(domain, null);
// trigger the loading of the table
post.put("statistics", "");
}

@ -417,12 +417,7 @@ public final class Fulltext {
//Date sdDate = (Date) connector.getFieldById(id, CollectionSchema.last_modified.getSolrFieldName());
//Date docDate = null;
//if (sdDate == null || (docDate = SchemaConfiguration.getDate(doc, CollectionSchema.last_modified)) == null || sdDate.before(docDate)) {
if (this.collectionConfiguration.contains(CollectionSchema.ip_s)) {
// ip_s needs a dns lookup which causes blockings during search here
connector.add(doc);
} else synchronized (this.solrInstances) {
connector.add(doc);
}
//}
} catch (SolrException e) {
throw new IOException(e.getMessage(), e);
@ -546,7 +541,7 @@ public final class Fulltext {
* @param freshdate either NULL or a date in the past which is the limit for deletion. Only documents older than this date are deleted
* @throws IOException
*/
public void deleteDomainHashpart(final String hosthash, Date freshdate, boolean concurrent) {
public void deleteDomainHashpart(final String hosthash, Date freshdate) {
// first collect all url hashes that belong to the domain
assert hosthash.length() == 6;
final String collection1Query = CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + hosthash + "\"" +
@ -559,18 +554,14 @@ public final class Fulltext {
(" AND " + WebgraphSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]") :
""
);
Thread t = new Thread() {
public void run() {
// delete in solr
synchronized (Fulltext.this.solrInstances) {
try {Fulltext.this.getDefaultConnector().deleteByQuery(collection1Query);} catch (IOException e) {}
try {Fulltext.this.getWebgraphConnector().deleteByQuery(webgraphQuery);} catch (IOException e) {}
}
// delete in old metadata structure
if (Fulltext.this.urlIndexFile != null) {
final ArrayList<String> l = new ArrayList<String>();
synchronized (this) {
CloneableIterator<byte[]> i;
try {
i = Fulltext.this.urlIndexFile.keys(true, null);
@ -584,7 +575,6 @@ public final class Fulltext {
for (final String h: l) Fulltext.this.urlIndexFile.delete(ASCII.getBytes(h));
} catch (IOException e) {}
}
}
// finally remove the line with statistics
if (Fulltext.this.statsDump != null) {
@ -599,14 +589,8 @@ public final class Fulltext {
}
}
}
};
if (concurrent) t.start(); else {
t.run();
Fulltext.this.commit(true);
}
}
public void deleteDomainHostname(final String hostname, Date freshdate, boolean concurrent) {
public void deleteDomainHostname(final String hostname, Date freshdate) {
// first collect all url hashes that belong to the domain
final String collectionQuery =
CollectionSchema.host_s.getSolrFieldName() + ":\"" + hostname + "\"" +
@ -620,13 +604,11 @@ public final class Fulltext {
(" AND " + WebgraphSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]") :
""
);
Thread t = new Thread() {
public void run() {
// delete in solr
synchronized (Fulltext.this.solrInstances) {
try {Fulltext.this.getDefaultConnector().deleteByQuery(collectionQuery);} catch (IOException e) {}
try {Fulltext.this.getWebgraphConnector().deleteByQuery(webgraphQuery);} catch (IOException e) {}
}
// finally remove the line with statistics
if (Fulltext.this.statsDump != null) {
final Iterator<HostStat> hsi = Fulltext.this.statsDump.iterator();
@ -640,12 +622,6 @@ public final class Fulltext {
}
}
}
};
if (concurrent) t.start(); else {
t.run();
Fulltext.this.commit(true);
}
}
/**
* remove a full subpath from the index
@ -653,15 +629,13 @@ public final class Fulltext {
* @param freshdate either NULL or a date in the past which is the limit for deletion. Only documents older than this date are deleted
* @param concurrently if true, then the method returnes immediately and runs concurrently
*/
public int remove(final String basepath, Date freshdate, final boolean concurrently) {
public int remove(final String basepath, Date freshdate) {
DigestURI uri;
try {uri = new DigestURI(basepath);} catch (MalformedURLException e) {return 0;}
final String host = uri.getHost();
final String collectionQuery = CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"" +
((freshdate != null && freshdate.before(new Date())) ? (" AND " + CollectionSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]") : "");
final AtomicInteger count = new AtomicInteger(0);
Thread t = new Thread(){
public void run() {
final BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(collectionQuery, 0, 1000000, 600000, -1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
try {
SolrDocument doc;
@ -674,9 +648,6 @@ public final class Fulltext {
}
if (count.get() > 0) Fulltext.this.commit(true);
} catch (InterruptedException e) {}
}
};
if (concurrently) t.start(); else t.run();
return count.get();
}
@ -688,11 +659,8 @@ public final class Fulltext {
public void remove(final Collection<String> deleteIDs) {
if (deleteIDs == null || deleteIDs.size() == 0) return;
try {
synchronized (Fulltext.this.solrInstances) {
this.getDefaultConnector().deleteByIds(deleteIDs);
this.getWebgraphConnector().deleteByIds(deleteIDs);
this.commit(true);
}
} catch (final Throwable e) {
Log.logException(e);
}
@ -708,10 +676,8 @@ public final class Fulltext {
if (urlHash == null) return false;
try {
String id = ASCII.String(urlHash);
synchronized (this.solrInstances) {
this.getDefaultConnector().deleteById(id);
this.getWebgraphConnector().deleteById(id);
}
} catch (final Throwable e) {
Log.logException(e);
}

Loading…
Cancel
Save