diff --git a/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java b/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java
new file mode 100644
index 000000000..bfdc8bac4
--- /dev/null
+++ b/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java
@@ -0,0 +1,62 @@
+/**
+ * MetadataVocabulary
+ * Copyright 2012 by Michael Peter Christen
+ * First released 12.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.kelondro.data.meta;
+
+import java.util.Set;
+
+import net.yacy.cora.lod.Literal;
+import net.yacy.cora.lod.Vocabulary;
+
+public enum MetadataVocabulary implements Vocabulary {
+
+ moddate, url;
+
+ public final static String IDENTIFIER = "http://yacy.net/metadata";
+ public final static String PREFIX = "ym";
+
+ private final String predicate;
+
+ private MetadataVocabulary() {
+ this.predicate = PREFIX + ":" + this.name().toLowerCase();
+ }
+
+ @Override
+ public String getIdentifier() {
+ return IDENTIFIER;
+ }
+
+ @Override
+ public String getPrefix() {
+ return PREFIX;
+ }
+
+ @Override
+ public String getPredicate() {
+ return this.predicate;
+ }
+
+ @Override
+ public Set getLiterals() {
+ return null;
+ }
+}
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadata.java b/source/net/yacy/kelondro/data/meta/URIMetadata.java
index fe1b41cfa..27a0a446c 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadata.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadata.java
@@ -1,55 +1,53 @@
-// URIMetadata.java
-// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 03.04.2009 on http://yacy.net
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+/**
+ * URIMetadata
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
package net.yacy.kelondro.data.meta;
import java.util.Date;
-import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.rwi.Reference;
-public interface URIMetadata {
+public interface URIMetadata extends URIReference {
+ public String dc_title();
- public Row.Entry toRowEntry();
-
- public byte[] hash();
-
- public long ranking();
+ public String dc_creator();
+
+ public String dc_publisher();
+
+ public String dc_subject();
+
+ public float lat();
- public Date moddate();
+ public float lon();
+
+ public long ranking();
public Date loaddate();
public Date freshdate();
- public byte[] referrerHash();
-
public String md5();
public char doctype();
@@ -79,10 +77,7 @@ public interface URIMetadata {
public Reference word();
public boolean isOlder(final URIMetadata other);
-
- public String toString(final String snippet);
- @Override
- public String toString();
+ public String toString(final String snippet);
}
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
index dfd2d66f1..e110337cb 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
@@ -1,3 +1,25 @@
+/**
+ * URIMetadataNode
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
package net.yacy.kelondro.data.meta;
import net.yacy.cora.lod.Node;
@@ -5,7 +27,7 @@ import net.yacy.cora.lod.vocabulary.Rdf;
import net.yacy.kelondro.data.word.WordReferenceVars;
-public class URIMetadataNode /*implements URIMetadata*/ {
+public class URIMetadataNode /*extends URIReferenceNode implements URIMetadata*/ {
private final Node entry;
private final String snippet;
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
index dbef4fe98..3ab29c0d3 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
@@ -30,6 +30,7 @@ import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
@@ -166,6 +167,12 @@ public class URIMetadataRow implements URIMetadata {
this.comp = null;
}
+ @Override
+ public Map toMap() {
+ // TODO to be implemented
+ return null;
+ }
+
private void encodeDate(final int col, final Date d) {
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
// 86400000 is the number of milliseconds in one day
@@ -642,4 +649,5 @@ public class URIMetadataRow implements URIMetadata {
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
}
}
+
}
diff --git a/source/net/yacy/kelondro/data/meta/URIReference.java b/source/net/yacy/kelondro/data/meta/URIReference.java
new file mode 100644
index 000000000..d2ba476a3
--- /dev/null
+++ b/source/net/yacy/kelondro/data/meta/URIReference.java
@@ -0,0 +1,71 @@
+/**
+ * URIReference
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.kelondro.data.meta;
+
+import java.util.Date;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+public interface URIReference {
+
+ /**
+ * The hash of a URIReference is a unique key for the stored URL.
+ * It is in fact equal to url().hash()
+ * @return the hash of the stored url
+ */
+ public byte[] hash();
+
+ /**
+ * The modification date of the URIReference is given if
+ * the record was created first and is defined with the
+ * creation date. If the record is modified later, the date shall change.
+ * @return the modification date of this record
+ */
+ public Date moddate();
+
+ /**
+ * The DigestURI is the payload of the URIReference
+ * @return the url as DigestURI with assigned URL hash according to the record hash
+ */
+ public DigestURI url();
+
+ /**
+ * check if the url matches agains a given matcher
+ * @param matcher
+ * @return true if the url() matches
+ */
+ public boolean matches(final Pattern matcher);
+
+ /**
+ * transform the record into a map which can be stored
+ * @return
+ */
+ public Map toMap();
+
+ /**
+ * produce a visible representation of the record
+ * @return a string for the url()
+ */
+ @Override
+ public String toString();
+}
diff --git a/source/net/yacy/kelondro/data/meta/URIReferenceNode.java b/source/net/yacy/kelondro/data/meta/URIReferenceNode.java
new file mode 100644
index 000000000..bacdfe998
--- /dev/null
+++ b/source/net/yacy/kelondro/data/meta/URIReferenceNode.java
@@ -0,0 +1,84 @@
+/**
+ * URIReferenceNode
+ * Copyright 2012 by Michael Peter Christen
+ * First released 5.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.kelondro.data.meta;
+
+import java.net.MalformedURLException;
+import java.text.ParseException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import net.yacy.cora.date.ISO8601Formatter;
+import net.yacy.cora.document.ASCII;
+
+public class URIReferenceNode extends HashMap implements URIReference {
+
+ private static final long serialVersionUID = -1580155759116466570L;
+
+ private byte[] hash;
+
+ public URIReferenceNode(DigestURI uri, Date date) {
+ this.hash = uri.hash();
+ this.put(MetadataVocabulary.url.name(), ASCII.getBytes(uri.toNormalform(true, false)));
+ this.put(MetadataVocabulary.moddate.name(), ASCII.getBytes(ISO8601Formatter.FORMATTER.format(date)));
+ }
+
+ @Override
+ public byte[] hash() {
+ return this.hash;
+ }
+
+ @Override
+ public Date moddate() {
+ byte[] x = this.get(MetadataVocabulary.moddate.name());
+ try {
+ return x == null ? null : ISO8601Formatter.FORMATTER.parse(ASCII.String(x));
+ } catch (ParseException e) {
+ return null;
+ }
+ }
+
+ @Override
+ public DigestURI url() {
+ byte[] x = this.get(MetadataVocabulary.moddate.name());
+ try {
+ return x == null ? null : new DigestURI(ASCII.String(x), this.hash);
+ } catch (MalformedURLException e) {
+ return null;
+ }
+ }
+
+ @Override
+ public boolean matches(Pattern matcher) {
+ byte[] x = this.get(MetadataVocabulary.moddate.name());
+ if (x == null) return false;
+ return matcher.matcher(ASCII.String(x)).matches();
+ }
+
+ @Override
+ public Map toMap() {
+ return this;
+ }
+
+}
diff --git a/source/net/yacy/search/index/DocumentMetadata.java b/source/net/yacy/search/index/DocumentMetadata.java
new file mode 100644
index 000000000..b3cce470b
--- /dev/null
+++ b/source/net/yacy/search/index/DocumentMetadata.java
@@ -0,0 +1,119 @@
+/**
+ * DocumentMetadata
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search.index;
+
+import java.io.IOException;
+
+import net.yacy.cora.order.CloneableIterator;
+import net.yacy.cora.sorting.WeakPriorityBlockingQueue.Element;
+import net.yacy.kelondro.data.meta.URIMetadata;
+import net.yacy.kelondro.data.word.WordReference;
+
+public class DocumentMetadata implements Metadata {
+
+ @Override
+ public void clearCache() {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void clear() throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public int size() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public void close() {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public int writeCacheSize() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public URIMetadata load(Element obrwi) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public URIMetadata load(byte[] urlHash) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void store(URIMetadata entry) throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public boolean remove(byte[] urlHashBytes) {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ @Override
+ public boolean exists(byte[] urlHash) {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ @Override
+ public CloneableIterator keys(boolean up, byte[] firstKey) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public CloneableIterator entries() throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public CloneableIterator entries(boolean up, String firstHash)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public int deleteDomain(String hosthash) throws IOException {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+}
diff --git a/source/net/yacy/search/index/DocumentReference.java b/source/net/yacy/search/index/DocumentReference.java
new file mode 100644
index 000000000..51906c65e
--- /dev/null
+++ b/source/net/yacy/search/index/DocumentReference.java
@@ -0,0 +1,94 @@
+/**
+ * DocumentReference
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search.index;
+
+import java.io.IOException;
+
+import net.yacy.cora.order.CloneableIterator;
+import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
+import net.yacy.cora.storage.MapStore;
+import net.yacy.kelondro.data.meta.URIReference;
+import net.yacy.kelondro.data.word.WordReference;
+
+public class DocumentReference {
+
+ public MapStore data;
+
+ public void clear() throws IOException {
+ data.clear();
+ }
+
+ public int size() {
+ return data.size();
+ }
+
+ public void close() {
+ if (data != null) {
+ data.close();
+ }
+ data = null;
+ }
+
+ public void store(final URIReference entry) throws IOException {
+ data.put(entry.hash(), entry.toMap());
+ }
+
+ public URIReference load(final WeakPriorityBlockingQueue.Element obrwi) {
+ return null;
+ }
+
+ public URIReference load(final byte[] urlHash){
+ return null;
+ }
+
+ public boolean remove(final byte[] urlHashBytes) {
+ return false;
+ }
+
+ public boolean exists(final byte[] urlHash) {
+ return false;
+ }
+
+ public CloneableIterator keys(final boolean up, final byte[] firstKey) {
+ return null;
+ }
+
+ public CloneableIterator entries() throws IOException {
+ return null;
+ }
+
+ public CloneableIterator entries(final boolean up, final String firstHash) throws IOException {
+ return null;
+ }
+
+ /**
+ * using a fragment of the url hash (5 bytes: bytes 6 to 10) it is possible to address all urls from a specific domain
+ * here such a fragment can be used to delete all these domains at once
+ * @param hosthash
+ * @return number of deleted domains
+ * @throws IOException
+ */
+ public int deleteDomain(final String hosthash) throws IOException {
+ return -1;
+ }
+}
diff --git a/source/net/yacy/search/index/Metadata.java b/source/net/yacy/search/index/Metadata.java
new file mode 100644
index 000000000..96263296f
--- /dev/null
+++ b/source/net/yacy/search/index/Metadata.java
@@ -0,0 +1,68 @@
+/**
+ * Metadata
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search.index;
+
+import java.io.IOException;
+
+import net.yacy.cora.order.CloneableIterator;
+import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
+import net.yacy.kelondro.data.meta.URIMetadata;
+import net.yacy.kelondro.data.word.WordReference;
+
+public interface Metadata {
+
+ public void clearCache();
+
+ public void clear() throws IOException;
+
+ public int size();
+
+ public void close();
+
+ public int writeCacheSize();
+
+ public URIMetadata load(final WeakPriorityBlockingQueue.Element obrwi);
+
+ public URIMetadata load(final byte[] urlHash);
+
+ public void store(final URIMetadata entry) throws IOException;
+
+ public boolean remove(final byte[] urlHashBytes);
+
+ public boolean exists(final byte[] urlHash);
+
+ public CloneableIterator keys(final boolean up, final byte[] firstKey);
+
+ public CloneableIterator entries() throws IOException;
+
+ public CloneableIterator entries(final boolean up, final String firstHash) throws IOException;
+
+ /**
+ * using a fragment of the url hash (5 bytes: bytes 6 to 10) it is possible to address all urls from a specific domain
+ * here such a fragment can be used to delete all these domains at once
+ * @param hosthash
+ * @return number of deleted domains
+ * @throws IOException
+ */
+ public int deleteDomain(final String hosthash) throws IOException;
+}
diff --git a/source/net/yacy/search/index/MetadataMigration.java b/source/net/yacy/search/index/MetadataMigration.java
new file mode 100644
index 000000000..21911762f
--- /dev/null
+++ b/source/net/yacy/search/index/MetadataMigration.java
@@ -0,0 +1,34 @@
+/**
+ * MetadataMigration
+ * Copyright 2012 by Michael Peter Christen
+ * First released 3.4.2012 at http://yacy.net
+ *
+ * This file is part of YaCy Content Integration
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search.index;
+
+/*
+ * migration class that combines old MetadataRepository classes
+ * with the new DocumentMetadata class
+ */
+public class MetadataMigration /*implements Metadata*/ {
+
+ private MetadataRepository metadataRepository;
+ private DocumentMetadata documentMetadata;
+
+}
diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java
index 5f953dde4..6bf2f8225 100644
--- a/source/net/yacy/search/index/MetadataRepository.java
+++ b/source/net/yacy/search/index/MetadataRepository.java
@@ -62,7 +62,7 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist;
import de.anomic.crawler.CrawlStacker;
-public final class MetadataRepository implements Iterable {
+public final class MetadataRepository implements /*Metadata,*/ Iterable {
// class objects
protected Index urlIndexFile;