- added xml output in IndexControlURLs to get the storage page of index

dump commands
- adjusted the apicall.sh script to get the downloaded text as output to
stdout which is necessary to parse the content out of it
- added indexdump.sh script which creates a solr dump and prints out the
storage path for the index dump
- added synchronization to the Fulltext class to prevent that data is
stored to a non-existing solr index while this index is disabled during
the storage of the dump
pull/1/head
Michael Peter Christen 12 years ago
parent 1b474139dd
commit 15ea053c3a

@ -4,9 +4,9 @@ port=$(grep ^port= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
pw=$(grep ^adminAccountBase64MD5= ../DATA/SETTINGS/yacy.conf |cut -d= -f2) pw=$(grep ^adminAccountBase64MD5= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
if which curl &>/dev/null; then if which curl &>/dev/null; then
curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" > /dev/null curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1"
elif which wget &>/dev/null; then elif which wget &>/dev/null; then
wget -q -t 1 --timeout=5 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" -O /dev/null wget -q -t 1 --timeout=120 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" -O -
else else
exit 1 exit 1
fi fi

@ -0,0 +1,3 @@
#!/bin/bash
cd "`dirname $0`"
./apicall0.sh "/IndexControlURLs_p.xml?indexdump=" | awk '/<dumpfile>/{ gsub("<dumpfile>","" );gsub("<\/dumpfile>","" ); print $0 }' | awk '{print $1}';

@ -0,0 +1,25 @@
<?xml version="1.0"?>
<data>
#(statisticslines)#::
<domains>
#{domains}#
<domain>
<host>#[domain]#</host>
<count>#[count]#</count>
</domain>
#{/domains}#
</domains>
#(/statisticslines)#
#(indexdump)#::
<dumpfile>#[dumpfile]#</dumpfile>::
#(/indexdump)#
#(urlhashsimilar)#::
<urls>
#{rows}#
#{cols}#
<urlhash>#[urlHash]#</urlhash>
#{/cols}#
#{/rows}#
</urls>
#(/urlhashsimilar)#
</data>

@ -293,10 +293,12 @@ public final class Fulltext implements Iterable<byte[]> {
if (this.connectedSolr()) { if (this.connectedSolr()) {
try { try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id); synchronized (this.solr) {
if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { SolrDocument sd = this.solr.get(id);
this.solr.add(doc); if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) {
} this.solr.add(doc);
}
}
} catch (SolrException e) { } catch (SolrException e) {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
@ -334,10 +336,12 @@ public final class Fulltext implements Iterable<byte[]> {
if (this.connectedSolr()) { if (this.connectedSolr()) {
try { try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id); synchronized (this.solr) {
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { SolrDocument sd = this.solr.get(id);
this.solr.add(getSolrScheme().metadata2solr(row)); if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
} this.solr.add(getSolrScheme().metadata2solr(row));
}
}
} catch (SolrException e) { } catch (SolrException e) {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
@ -365,7 +369,9 @@ public final class Fulltext implements Iterable<byte[]> {
public boolean remove(final byte[] urlHash) { public boolean remove(final byte[] urlHash) {
if (urlHash == null) return false; if (urlHash == null) return false;
try { try {
this.solr.delete(ASCII.String(urlHash)); synchronized (this.solr) {
this.solr.delete(ASCII.String(urlHash));
}
} catch (final Throwable e) { } catch (final Throwable e) {
Log.logException(e); Log.logException(e);
} }
@ -470,17 +476,19 @@ public final class Fulltext implements Iterable<byte[]> {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs(); int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath(); File storagePath = esc.getStoragePath();
this.disconnectLocalSolr();
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip"); File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
try { synchronized (this.solr) {
ZIPWriter.zip(storagePath, zipOut); this.disconnectLocalSolr();
} catch (IOException e) {
Log.logException(e);
} finally {
try { try {
this.connectLocalSolr(commitWithin); ZIPWriter.zip(storagePath, zipOut);
} catch (IOException e) { } catch (IOException e) {
Log.logException(e); Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
} }
} }
return zipOut; return zipOut;
@ -494,16 +502,18 @@ public final class Fulltext implements Iterable<byte[]> {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs(); int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath(); File storagePath = esc.getStoragePath();
this.disconnectLocalSolr(); synchronized (this.solr) {
try { this.disconnectLocalSolr();
ZIPReader.unzip(solrDumpZipFile, storagePath);
} catch (IOException e) {
Log.logException(e);
} finally {
try { try {
this.connectLocalSolr(commitWithin); ZIPReader.unzip(solrDumpZipFile, storagePath);
} catch (IOException e) { } catch (IOException e) {
Log.logException(e); Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
} }
} }
} }
@ -783,7 +793,9 @@ public final class Fulltext implements Iterable<byte[]> {
// first collect all url hashes that belong to the domain // first collect all url hashes that belong to the domain
assert hosthash.length() == 6; assert hosthash.length() == 6;
// delete in solr // delete in solr
this.solr.deleteByQuery(YaCySchema.host_id_s.name() + ":\"" + hosthash + "\""); synchronized (this.solr) {
this.solr.deleteByQuery(YaCySchema.host_id_s.name() + ":\"" + hosthash + "\"");
}
// delete in old metadata structure // delete in old metadata structure
final ArrayList<String> l = new ArrayList<String>(); final ArrayList<String> l = new ArrayList<String>();

Loading…
Cancel
Save