- added xml output in IndexControlURLs to get the storage page of index

dump commands
- adjusted the apicall.sh script to get the downloaded text as output to
stdout which is necessary to parse the content out of it
- added indexdump.sh script which creates a solr dump and prints out the
storage path for the index dump
- added synchronization to the Fulltext class to prevent that data is
stored to a non-existing solr index while this index is disabled during
the storage of the dump
pull/1/head
Michael Peter Christen 12 years ago
parent 1b474139dd
commit 15ea053c3a

@ -4,9 +4,9 @@ port=$(grep ^port= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
pw=$(grep ^adminAccountBase64MD5= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
if which curl &>/dev/null; then
curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" > /dev/null
curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1"
elif which wget &>/dev/null; then
wget -q -t 1 --timeout=5 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" -O /dev/null
wget -q -t 1 --timeout=120 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" -O -
else
exit 1
fi

@ -0,0 +1,3 @@
#!/bin/bash
cd "`dirname $0`"
./apicall0.sh "/IndexControlURLs_p.xml?indexdump=" | awk '/<dumpfile>/{ gsub("<dumpfile>","" );gsub("<\/dumpfile>","" ); print $0 }' | awk '{print $1}';

@ -0,0 +1,25 @@
<?xml version="1.0"?>
<data>
#(statisticslines)#::
<domains>
#{domains}#
<domain>
<host>#[domain]#</host>
<count>#[count]#</count>
</domain>
#{/domains}#
</domains>
#(/statisticslines)#
#(indexdump)#::
<dumpfile>#[dumpfile]#</dumpfile>::
#(/indexdump)#
#(urlhashsimilar)#::
<urls>
#{rows}#
#{cols}#
<urlhash>#[urlHash]#</urlhash>
#{/cols}#
#{/rows}#
</urls>
#(/urlhashsimilar)#
</data>

@ -293,10 +293,12 @@ public final class Fulltext implements Iterable<byte[]> {
if (this.connectedSolr()) {
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) {
this.solr.add(doc);
}
synchronized (this.solr) {
SolrDocument sd = this.solr.get(id);
if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) {
this.solr.add(doc);
}
}
} catch (SolrException e) {
throw new IOException(e.getMessage(), e);
}
@ -334,10 +336,12 @@ public final class Fulltext implements Iterable<byte[]> {
if (this.connectedSolr()) {
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id);
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
this.solr.add(getSolrScheme().metadata2solr(row));
}
synchronized (this.solr) {
SolrDocument sd = this.solr.get(id);
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
this.solr.add(getSolrScheme().metadata2solr(row));
}
}
} catch (SolrException e) {
throw new IOException(e.getMessage(), e);
}
@ -365,7 +369,9 @@ public final class Fulltext implements Iterable<byte[]> {
public boolean remove(final byte[] urlHash) {
if (urlHash == null) return false;
try {
this.solr.delete(ASCII.String(urlHash));
synchronized (this.solr) {
this.solr.delete(ASCII.String(urlHash));
}
} catch (final Throwable e) {
Log.logException(e);
}
@ -470,17 +476,19 @@ public final class Fulltext implements Iterable<byte[]> {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath();
this.disconnectLocalSolr();
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
try {
ZIPWriter.zip(storagePath, zipOut);
} catch (IOException e) {
Log.logException(e);
} finally {
synchronized (this.solr) {
this.disconnectLocalSolr();
try {
this.connectLocalSolr(commitWithin);
ZIPWriter.zip(storagePath, zipOut);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
}
}
return zipOut;
@ -494,16 +502,18 @@ public final class Fulltext implements Iterable<byte[]> {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
int commitWithin = esc.getCommitWithinMs();
File storagePath = esc.getStoragePath();
this.disconnectLocalSolr();
try {
ZIPReader.unzip(solrDumpZipFile, storagePath);
} catch (IOException e) {
Log.logException(e);
} finally {
synchronized (this.solr) {
this.disconnectLocalSolr();
try {
this.connectLocalSolr(commitWithin);
ZIPReader.unzip(solrDumpZipFile, storagePath);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
this.connectLocalSolr(commitWithin);
} catch (IOException e) {
Log.logException(e);
}
}
}
}
@ -783,7 +793,9 @@ public final class Fulltext implements Iterable<byte[]> {
// first collect all url hashes that belong to the domain
assert hosthash.length() == 6;
// delete in solr
this.solr.deleteByQuery(YaCySchema.host_id_s.name() + ":\"" + hosthash + "\"");
synchronized (this.solr) {
this.solr.deleteByQuery(YaCySchema.host_id_s.name() + ":\"" + hosthash + "\"");
}
// delete in old metadata structure
final ArrayList<String> l = new ArrayList<String>();

Loading…
Cancel
Save