fixes deleting during recrawl

pull/389/head
sgaebel 4 years ago
parent 64a17faca0
commit 9bc2297161

@ -26,10 +26,8 @@ package net.yacy.crawler;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
@ -300,7 +298,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
try { try {
// query all or only httpstatus=200 depending on includefailed flag // query all or only httpstatus=200 depending on includefailed flag
docList = solrConnector.getDocumentListByQuery(RecrawlBusyThread.buildSelectionQuery(this.currentQuery, this.includefailed), docList = solrConnector.getDocumentListByQuery(RecrawlBusyThread.buildSelectionQuery(this.currentQuery, this.includefailed),
this.solrSortBy, this.chunkstart, this.chunksize, CollectionSchema.sku.getSolrFieldName()); this.solrSortBy, this.chunkstart, this.chunksize, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
this.urlsToRecrawl = docList.getNumFound(); this.urlsToRecrawl = docList.getNumFound();
} catch (final Throwable e) { } catch (final Throwable e) {
this.urlsToRecrawl = 0; this.urlsToRecrawl = 0;
@ -308,7 +306,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
} }
if (docList != null) { if (docList != null) {
List<String> tobedeletedIDs = new ArrayList<>(); Set<String> tobedeletedIDs = new HashSet<>();
for (final SolrDocument doc : docList) { for (final SolrDocument doc : docList) {
try { try {
this.urlstack.add(new DigestURL((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()))); this.urlstack.add(new DigestURL((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName())));
@ -324,6 +322,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
if (!tobedeletedIDs.isEmpty()) try { if (!tobedeletedIDs.isEmpty()) try {
solrConnector.deleteByIds(tobedeletedIDs); solrConnector.deleteByIds(tobedeletedIDs);
solrConnector.commit(false);
} catch (IOException e) { } catch (IOException e) {
ConcurrentLog.severe(THREAD_NAME, "error deleting IDs ", e); ConcurrentLog.severe(THREAD_NAME, "error deleting IDs ", e);
} }

Loading…
Cancel
Save