#!/usr/bin/python import csv, sys, os, glob FNAME_CHECKED = 'checked_numbers.csv' checked_list = {} tr_list = [] dub_tr = [] new_dub_tr = [] dirname = sys.argv[1] print "Verwende Verzeichnis", dirname # read checked tracking numbers with open (os.path.join(dirname, FNAME_CHECKED)) as checked_obj: d = csv.reader(checked_obj, delimiter=',', quoting=csv.QUOTE_ALL) for r in d: tr_id = int(r[0]) reason = r[1] checked_list[tr_id] = reason dirlist = os.listdir(dirname) for fname in dirlist: fbase, fext = os.path.splitext(fname) fullfname = os.path.join(dirname, fname) #print fname, fbase, fext if fext != ".csv" or fname == FNAME_CHECKED: #print "Ueberspringe Datei", fname continue print "Verarbeite Datei", fullfname with open (fullfname, 'rb') as fobj: d = csv.reader(fobj, delimiter=';', quoting=csv.QUOTE_ALL) # skip header d.next() for r in d: tr_id = int(r[8].replace("=", "").strip('"')) if tr_id in tr_list: print "doublette detected", tr_id dub_tr.append(r) #print r[7], tr_id #print checked_list, tr_id if not checked_list.has_key(tr_id): new_dub_tr.append(r) else: tr_list.append(tr_id) print print "***********************" print "Dubletten erkannt:", len(dub_tr) print "***********************" for dub in dub_tr: tracking_nr = int(dub[8].replace("=", "").strip('"')) if checked_list.has_key(tracking_nr): print dub[0], dub[4], dub[5], dub[6], dub[7], int(dub[8].replace("=", "").strip('"')), checked_list[tracking_nr] else: print dub[0], dub[4], dub[5], dub[6], dub[7], int(dub[8].replace("=", "").strip('"')), "***** CHECK-IT! *****" print "***********************" print "Neue erkannt: %i / %i" %(len(new_dub_tr), len(dub_tr)) print "***********************" for dub in new_dub_tr: print dub[0], dub[4], dub[5], dub[6], dub[7], int(dub[8].replace("=", "").strip('"')), "***** CHECK-IT! *****"