*) characters are saved in ASCII-safe notation in language data files now ('\u0063' instead of 'a'), this hopefully avoids lots of problems

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4019 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 18 years ago
parent 0339c65ee2
commit ea05c7d463

@ -7,7 +7,7 @@
// //
// This File is contributed by Marc Nause // This File is contributed by Marc Nause
// //
// $LastChangedDate: 2007-07-29 $ // $LastChangedDate: 2007-07-31 $
// $LastChangedRevision: $ // $LastChangedRevision: $
// $LastChangedBy: low012 $ // $LastChangedBy: low012 $
// //
@ -52,7 +52,6 @@
// The program can be started with the following arguments: // The program can be started with the following arguments:
// //
// input=filename name of the file the text is stored in // input=filename name of the file the text is stored in
// output=filename name of the file the data will be stored in
// name=langugaename name of the language the text is written in // name=langugaename name of the language the text is written in
// name=code code of the language the text is written in (e.g. en-GB) // name=code code of the language the text is written in (e.g. en-GB)
@ -143,12 +142,21 @@ public class languageDataExtractor {
//Trying to read from input file and put quantity of letters into map. //Trying to read from input file and put quantity of letters into map.
try { try {
while ((line = inputFile.readLine()) != null) { while (inputFile.ready()) {
for(int i=0;i<line.length();i++){ key = (char)inputFile.read();
key = line.charAt(i);
if(Character.isLetter(key)){ if(Character.isLetter(key)){
key = Character.toLowerCase(key); key = Character.toLowerCase(key);
sKey = "" + key; sKey=Integer.toHexString((int)key);
switch (sKey.length()){
case 1: sKey = "\\u000"+sKey; break;
case 2: sKey = "\\u00"+sKey; break;
case 3: sKey = "\\u0"+sKey; break;
case 4: sKey = "\\u"+sKey; break;
default: throw new RuntimeException(key+" too long to be a character");
}
if (map.containsKey(sKey)) { if (map.containsKey(sKey)) {
fValue = new Float(Float.parseFloat(map.get(sKey).toString()) + 1); fValue = new Float(Float.parseFloat(map.get(sKey).toString()) + 1);
} }
@ -160,7 +168,6 @@ public class languageDataExtractor {
} }
} }
} }
}
catch (IOException e) { catch (IOException e) {
System.out.println("Error reading file "+input); System.out.println("Error reading file "+input);
System.out.println("Program aborted! No data has been written!"); System.out.println("Program aborted! No data has been written!");
@ -171,9 +178,9 @@ public class languageDataExtractor {
file = "<language name=\""+name+"\" code=\""+code+"\">\n"; file = "<language name=\""+name+"\" code=\""+code+"\">\n";
mapiter = map.keySet().iterator(); mapiter = map.keySet().iterator();
while(mapiter.hasNext()){ while(mapiter.hasNext()){
key = mapiter.next().toString().charAt(0);
sKey = "" + key; sKey = mapiter.next().toString();
file += "\n <letter>\n <name>"+key+"</name>\n <quantity>"+(Float.parseFloat(map.get(sKey).toString())/quantity*100)+"</quantity>\n </letter>\n"; file += "\n <letter>\n <name>"+sKey+"</name>\n <quantity>"+(Float.parseFloat(map.get(sKey).toString())/quantity*100)+"</quantity>\n </letter>\n";
} }
file += "\n</language>"; file += "\n</language>";

Loading…
Cancel
Save