diff --git a/check_postcsv.py b/check_postcsv.py index 11a9f2b..814de1d 100755 --- a/check_postcsv.py +++ b/check_postcsv.py @@ -7,7 +7,7 @@ FNAME_CHECKED = 'checked_numbers.csv' checked_list = {} tr_list = [] dub_tr = [] -dub_detected = 0 +new_dub_tr = [] dirname = sys.argv[1] @@ -26,9 +26,9 @@ dirlist = os.listdir(dirname) for fname in dirlist: fbase, fext = os.path.splitext(fname) fullfname = os.path.join(dirname, fname) - print fname, fbase, fext + #print fname, fbase, fext if fext != ".csv" or fname == FNAME_CHECKED: - print "Ueberspringe Datei", fname + #print "Ueberspringe Datei", fname continue print "Verarbeite Datei", fullfname @@ -37,19 +37,21 @@ for fname in dirlist: # skip header d.next() for r in d: - #print r - tr_id = r[8].replace("=", "") + tr_id = int(r[8].replace("=", "").strip('"')) if tr_id in tr_list: - print "double detected" - dub_detected += 1 + print "doublette detected", tr_id dub_tr.append(r) - print r[7], tr_id + #print r[7], tr_id + #print checked_list, tr_id + if not checked_list.has_key(tr_id): + new_dub_tr.append(r) else: tr_list.append(tr_id) - + +print print "***********************" -print "Dubletten erkannt:", dub_detected +print "Dubletten erkannt:", len(dub_tr) print "***********************" for dub in dub_tr: @@ -59,3 +61,8 @@ for dub in dub_tr: else: print dub[0], dub[4], dub[5], dub[6], dub[7], int(dub[8].replace("=", "").strip('"')), "***** CHECK-IT! *****" +print "***********************" +print "Neue erkannt: %i / %i" %(len(new_dub_tr), len(dub_tr)) +print "***********************" +for dub in new_dub_tr: + print dub[0], dub[4], dub[5], dub[6], dub[7], int(dub[8].replace("=", "").strip('"')), "***** CHECK-IT! *****"