fixes for putDocument and putMetadata

pull/1/head
orbiter 13 years ago
parent cc47a0876e
commit ee01c12e56

@ -279,7 +279,7 @@ public class URIMetadataRow implements URIMetadata {
this.comp = null;
}
public static URIMetadata importEntry(final String propStr) {
public static URIMetadataRow importEntry(final String propStr) {
if (propStr == null || (!propStr.isEmpty() && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
return null;
}

@ -259,41 +259,81 @@ public final class Fulltext implements Iterable<byte[]> {
}
public void putDocument(final SolrInputDocument doc) throws IOException {
if (!this.connectedSolr()) return;
this.solr.add(doc);
String id = (String) doc.getFieldValue(YaCySchema.id.name());
byte[] idb = ASCII.getBytes(id);
if (this.connectedSolr()) {
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) {
this.solr.add(doc);
}
} catch (SolrException e) {
throw new IOException(e.getMessage(), e);
}
} else if (this.urlIndexFile != null) {
URIMetadata oldEntry = null;
try {
final Row.Entry oe = this.urlIndexFile.get(idb, false);
oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0);
} catch (final Throwable e) {
Log.logException(e);
oldEntry = null;
}
URIMetadata entry = new URIMetadataNode(ClientUtils.toSolrDocument(doc));
if (oldEntry == null || oldEntry.isOlder(entry)) {
try {
URIMetadataRow row = URIMetadataRow.importEntry(entry.toString());
this.urlIndexFile.put(row.toRowEntry());
} catch (final SpaceExceededException e) {
throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage());
}
}
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
}
public void putMetadata(final URIMetadata entry) throws IOException {
if (entry instanceof URIMetadataNode) {
putDocument(ClientUtils.toSolrInputDocument(((URIMetadataNode) entry).getDocument()));
}
assert entry instanceof URIMetadataRow;
URIMetadataRow row = (URIMetadataRow) entry;
byte[] idb = row.hash();
String id = ASCII.String(idb);
if (this.connectedSolr()) {
try {
SolrDocument sd = getSolr().get(ASCII.String(entry.url().hash()));
if (sd == null || !entry.isOlder(new URIMetadataNode(sd))) {
getSolr().add(getSolrScheme().metadata2solr(entry));
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
this.solr.add(getSolrScheme().metadata2solr(row));
}
} catch (SolrException e) {
throw new IOException(e.getMessage(), e);
}
} else if (this.urlIndexFile != null && entry instanceof URIMetadataRow) {
} else if (this.urlIndexFile != null) {
URIMetadata oldEntry = null;
try {
final Row.Entry oe = this.urlIndexFile.get(entry.hash(), false);
final Row.Entry oe = this.urlIndexFile.get(idb, false);
oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0);
} catch (final Throwable e) {
Log.logException(e);
oldEntry = null;
}
if (oldEntry == null || !entry.isOlder(oldEntry)) {
if (oldEntry == null || oldEntry.isOlder(row)) {
try {
this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry());
this.urlIndexFile.put(row.toRowEntry());
} catch (final SpaceExceededException e) {
throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage());
}
}
}
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
}
public boolean remove(final byte[] urlHash) {
if (urlHash == null) return false;
try {

@ -51,10 +51,13 @@ import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import de.anomic.crawler.retrieval.Response;
@ -150,6 +153,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (isEmpty() || contains(key)) solrdoc.addSolr(key, value);
}
public Date getDate(SolrInputDocument doc, final YaCySchema key) {
Date x = (Date) doc.getFieldValue(key.name());
return (x == null) ? new Date(0) : x;
}
public Date getDate(SolrDocument doc, final YaCySchema key) {
Date x = (Date) doc.getFieldValue(key.name());
return (x == null) ? new Date(0) : x;
}
/**
* save configuration to file and update enum SolrFields
* @throws IOException
@ -171,7 +184,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
} catch (final IOException e) {}
}
public SolrDoc metadata2solr(final URIMetadata md) {
public SolrInputDocument metadata2solr(final URIMetadata md) {
if (md instanceof URIMetadataNode) {
return ClientUtils.toSolrInputDocument(((URIMetadataNode) md).getDocument());
}
final SolrDoc solrdoc = new SolrDoc();
final DigestURI digestURI = new DigestURI(md.url());
boolean allAttr = this.isEmpty();

@ -211,7 +211,7 @@ public enum YaCySchema implements Schema {
@Override
public final String getComment() {
return this.comment;
}
}
}

Loading…
Cancel
Save