refactoring: more dublin core - compliant naming

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4354 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent efd0b8371a
commit 85dc62c16f

@ -39,6 +39,7 @@ import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.plasma.plasmaCondenser;
@ -115,11 +116,11 @@ public class IndexControlRWIs_p {
// generate an urlx array
indexContainer index = null;
index = sb.wordIndex.getContainer(keyhash, null);
Iterator en = index.entries();
Iterator<indexRWIRowEntry> en = index.entries();
int i = 0;
urlx = new String[index.size()];
while (en.hasNext()) {
urlx[i++] = ((indexRWIEntry) en.next()).urlHash();
urlx[i++] = en.next().urlHash();
}
index = null;
}
@ -146,7 +147,7 @@ public class IndexControlRWIs_p {
sb.urlRemove(urlx[i]);
}
}
Set urlHashes = new HashSet();
Set<String> urlHashes = new HashSet<String>();
for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
sb.wordIndex.removeEntries(keyhash, urlHashes);
// this shall lead to a presentation of the list; so handle that the remaining program
@ -193,13 +194,13 @@ public class IndexControlRWIs_p {
long starttime = System.currentTimeMillis();
index = sb.wordIndex.getContainer(keyhash, null);
// built urlCache
Iterator urlIter = index.entries();
HashMap knownURLs = new HashMap();
HashSet unknownURLEntries = new HashSet();
Iterator<indexRWIRowEntry> urlIter = index.entries();
HashMap<String, indexURLEntry> knownURLs = new HashMap<String, indexURLEntry>();
HashSet<String> unknownURLEntries = new HashSet<String>();
indexRWIEntry iEntry;
indexURLEntry lurl;
while (urlIter.hasNext()) {
iEntry = (indexRWIEntry) urlIter.next();
iEntry = urlIter.next();
lurl = sb.wordIndex.loadedURL.load(iEntry.urlHash(), null, 0);
if (lurl == null) {
unknownURLEntries.add(iEntry.urlHash());
@ -212,7 +213,7 @@ public class IndexControlRWIs_p {
// transport to other peer
String gzipBody = sb.getConfig("indexControl.gzipBody","false");
int timeout = (int) sb.getConfigLong("indexControl.timeout",60000);
HashMap resultObj = yacyClient.transferIndex(
HashMap<String, Object> resultObj = yacyClient.transferIndex(
seed,
new indexContainer[]{index},
knownURLs,
@ -225,7 +226,7 @@ public class IndexControlRWIs_p {
// generate list
if (post.containsKey("keyhashsimilar")) {
final Iterator containerIt = sb.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator();
final Iterator<indexContainer> containerIt = sb.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator();
indexContainer container;
int i = 0;
int rows = 0, cols = 0;
@ -248,7 +249,7 @@ public class IndexControlRWIs_p {
if (post.containsKey("blacklist")) {
String blacklist = post.get("blacklist", "");
Set urlHashes = new HashSet();
Set<String> urlHashes = new HashSet<String>();
if (post.containsKey("blacklisturls")) {
PrintWriter pw;
try {
@ -323,11 +324,11 @@ public class IndexControlRWIs_p {
if (post.get("flags","").length() == 0) return null;
return new kelondroBitfield(4, (String) post.get("flags"));
}
if (post.get("reference", "").equals("on")) b.set(indexRWIEntry.flag_app_reference, true);
if (post.get("description", "").equals("on")) b.set(indexRWIEntry.flag_app_descr, true);
if (post.get("author", "").equals("on")) b.set(indexRWIEntry.flag_app_author, true);
if (post.get("tag", "").equals("on")) b.set(indexRWIEntry.flag_app_tags, true);
if (post.get("url", "").equals("on")) b.set(indexRWIEntry.flag_app_url, true);
if (post.get("reference", "").equals("on")) b.set(indexRWIEntry.flag_app_dc_description, true);
if (post.get("description", "").equals("on")) b.set(indexRWIEntry.flag_app_dc_title, true);
if (post.get("author", "").equals("on")) b.set(indexRWIEntry.flag_app_dc_creator, true);
if (post.get("tag", "").equals("on")) b.set(indexRWIEntry.flag_app_dc_subject, true);
if (post.get("url", "").equals("on")) b.set(indexRWIEntry.flag_app_dc_identifier, true);
if (post.get("emphasized", "").equals("on")) b.set(indexRWIEntry.flag_app_emphasized, true);
if (post.get("image", "").equals("on")) b.set(plasmaCondenser.flag_cat_hasimage, true);
if (post.get("audio", "").equals("on")) b.set(plasmaCondenser.flag_cat_hasaudio, true);
@ -343,7 +344,7 @@ public class IndexControlRWIs_p {
int hc = 0;
prop.put("searchresult_keyhash", startHash);
if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) {
Iterator e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
Iterator<yacySeed> e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
while (e.hasNext()) {
seed = (yacySeed) e.next();
if (seed != null) {
@ -369,11 +370,11 @@ public class IndexControlRWIs_p {
} else {
prop.put("searchresult", 3);
prop.put("searchresult_allurl", ranked.filteredCount());
prop.put("searchresult_reference", ranked.flagCount()[indexRWIEntry.flag_app_reference]);
prop.put("searchresult_description", ranked.flagCount()[indexRWIEntry.flag_app_descr]);
prop.put("searchresult_author", ranked.flagCount()[indexRWIEntry.flag_app_author]);
prop.put("searchresult_tag", ranked.flagCount()[indexRWIEntry.flag_app_tags]);
prop.put("searchresult_url", ranked.flagCount()[indexRWIEntry.flag_app_url]);
prop.put("searchresult_reference", ranked.flagCount()[indexRWIEntry.flag_app_dc_description]);
prop.put("searchresult_description", ranked.flagCount()[indexRWIEntry.flag_app_dc_title]);
prop.put("searchresult_author", ranked.flagCount()[indexRWIEntry.flag_app_dc_creator]);
prop.put("searchresult_tag", ranked.flagCount()[indexRWIEntry.flag_app_dc_subject]);
prop.put("searchresult_url", ranked.flagCount()[indexRWIEntry.flag_app_dc_identifier]);
prop.put("searchresult_emphasized", ranked.flagCount()[indexRWIEntry.flag_app_emphasized]);
prop.put("searchresult_image", ranked.flagCount()[plasmaCondenser.flag_cat_hasimage]);
prop.put("searchresult_audio", ranked.flagCount()[plasmaCondenser.flag_cat_hasaudio]);
@ -439,11 +440,11 @@ public class IndexControlRWIs_p {
((entry.word().flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains audio, " : "") +
((entry.word().flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains video, " : "") +
((entry.word().flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains applications, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_url)) ? "appears in url, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_descr)) ? "appears in description, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_author)) ? "appears in author, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_tags)) ? "appears in tags, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_reference)) ? "appears in reference, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_dc_identifier)) ? "appears in url, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_dc_title)) ? "appears in description, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_dc_creator)) ? "appears in author, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_dc_subject)) ? "appears in tags, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_dc_description)) ? "appears in reference, " : "") +
((entry.word().flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears emphasized, " : "") +
((yacyURL.probablyRootURL(entry.word().urlHash())) ? "probably root url" : "")
);
@ -453,7 +454,7 @@ public class IndexControlRWIs_p {
i++;
if ((maxlines >= 0) && (i >= maxlines)) break;
}
Iterator iter = ranked.miss(); // iterates url hash strings
Iterator<String> iter = ranked.miss(); // iterates url hash strings
while (iter.hasNext()) {
us = (String) iter.next();
prop.put("genUrlList_urlList_"+i+"_urlExists", "0");

@ -32,12 +32,13 @@ import de.anomic.kelondro.kelondroRow.Entry;
public interface indexRWIEntry {
// appearance flags, used in RWI entry
// some names are derived from the Dublin Core Metadata tag set
// the flags 0..23 are identical to the category flags in plasmaCondenser
public static final int flag_app_reference = 24; // word appears in anchor description text (the reference to an url), or any alternative text field of a link
public static final int flag_app_descr = 25; // word appears in headline (or any description part)
public static final int flag_app_author = 26; // word appears in author
public static final int flag_app_tags = 27; // word appears in header tags
public static final int flag_app_url = 28; // word appears in url
public static final int flag_app_dc_description= 24; // word appears in anchor description text (the reference to an url), or any alternative text field of a link
public static final int flag_app_dc_title = 25; // word appears in title or headline or any description part
public static final int flag_app_dc_creator = 26; // word appears in author
public static final int flag_app_dc_subject = 27; // word appears in header tags or other descriptive part
public static final int flag_app_dc_identifier = 28; // word appears in url or document identifier
public static final int flag_app_emphasized = 29; // word is emphasized in text (i.e. bold, italics, special size)
public String toPropertyForm();

@ -135,11 +135,11 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIEntry> imp
+ ( (((t.lother() - min.lother() ) << 8) / (1 + max.lother() - min.lother()) ) << ranking.coeff_lother)
+ ( (((t.hitcount() - min.hitcount() ) << 8) / (1 + max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ ( authority(t.urlHash()) << ranking.coeff_authority)
+ (((flags.get(indexRWIEntry.flag_app_url)) ? 255 << ranking.coeff_appurl : 0))
+ (((flags.get(indexRWIEntry.flag_app_descr)) ? 255 << ranking.coeff_appdescr : 0))
+ (((flags.get(indexRWIEntry.flag_app_author)) ? 255 << ranking.coeff_appauthor : 0))
+ (((flags.get(indexRWIEntry.flag_app_tags)) ? 255 << ranking.coeff_apptags : 0))
+ (((flags.get(indexRWIEntry.flag_app_reference)) ? 255 << ranking.coeff_appref : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_title)) ? 255 << ranking.coeff_appdescr : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_creator)) ? 255 << ranking.coeff_appauthor : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_subject)) ? 255 << ranking.coeff_apptags : 0))
+ (((flags.get(indexRWIEntry.flag_app_dc_description)) ? 255 << ranking.coeff_appref : 0))
+ (((flags.get(indexRWIEntry.flag_app_emphasized)) ? 255 << ranking.coeff_appemph : 0))
+ (((flags.get(plasmaCondenser.flag_cat_indexof)) ? 255 << ranking.coeff_catindexof : 0))
+ (((flags.get(plasmaCondenser.flag_cat_hasimage)) ? 255 << ranking.coeff_cathasimage : 0))

@ -93,12 +93,12 @@ public final class plasmaCondenser {
public static final int flag_cat_entertainment = 10; // boulevard, entertainment, cultural content
public static final int flag_cat_knowledge = 11; // science, school stuff, help for homework
public static final int flag_cat_computer = 12; // any computer related stuff, networks, operation systems
public static final int flag_cat_p2p = 13; // p2p support, filesharing archives etc.
public static final int flag_cat_p2p = 13; // p2p support, file-sharing archives etc.
public static final int flag_cat_sex = 14; // sexual content
public static final int flag_cat_spam = 15; // pages that anybody would consider as not interesting
public static final int flag_cat_linux = 16; // pages about linux software
public static final int flag_cat_macos = 17; // pages about macintosh, apple computers and the mac os
public static final int flag_cat_windows = 18; // pages about windows os and softare
public static final int flag_cat_windows = 18; // pages about windows os and software
public static final int flag_cat_osreserve = 19; // reserve
public static final int flag_cat_hasimage = 20; // the page refers to (at least one) images
public static final int flag_cat_hasaudio = 21; // the page refers to (at least one) audio file
@ -131,7 +131,7 @@ public final class plasmaCondenser {
//System.out.println("DEBUG: condensing " + document.getMainLongTitle() + ", indexText=" + Boolean.toString(indexText) + ", indexMedia=" + Boolean.toString(indexMedia));
insertTextToWords(document.dc_source().toNormalform(false, true), 0, indexRWIEntry.flag_app_url, RESULT_FLAGS);
insertTextToWords(document.dc_source().toNormalform(false, true), 0, indexRWIEntry.flag_app_dc_identifier, RESULT_FLAGS);
Map.Entry<yacyURL, String> entry;
if (indexText) {
@ -148,9 +148,9 @@ public final class plasmaCondenser {
// phrase 99 is taken from the media Link url and anchor description
// phrase 100 and above are lines from the text
insertTextToWords(document.dc_title(), 1, indexRWIEntry.flag_app_descr, RESULT_FLAGS);
insertTextToWords(document.dc_description(), 3, indexRWIEntry.flag_app_descr, RESULT_FLAGS);
insertTextToWords(document.dc_creator(), 4, indexRWIEntry.flag_app_descr, RESULT_FLAGS);
insertTextToWords(document.dc_title(), 1, indexRWIEntry.flag_app_dc_title, RESULT_FLAGS);
insertTextToWords(document.dc_description(), 3, indexRWIEntry.flag_app_dc_description, RESULT_FLAGS);
insertTextToWords(document.dc_creator(), 4, indexRWIEntry.flag_app_dc_creator, RESULT_FLAGS);
// missing: tags!
String[] titles = document.getSectionTitles();
for (int i = 0; i < titles.length; i++) {
@ -161,8 +161,9 @@ public final class plasmaCondenser {
Iterator<Map.Entry<yacyURL, String>> i = document.getAnchors().entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
insertTextToWords(entry.getKey().toNormalform(false, false), 98, indexRWIEntry.flag_app_reference, RESULT_FLAGS);
insertTextToWords((String) entry.getValue(), 98, indexRWIEntry.flag_app_reference, RESULT_FLAGS);
if ((entry == null) || (entry.getKey() == null)) continue;
insertTextToWords(entry.getKey().toNormalform(false, false), 98, indexRWIEntry.flag_app_dc_identifier, RESULT_FLAGS);
insertTextToWords((String) entry.getValue(), 98, indexRWIEntry.flag_app_dc_description, RESULT_FLAGS);
}
} else {
this.RESULT_NUMB_WORDS = 0;

Loading…
Cancel
Save