2017年9月27日水曜日

python 1) read CSV file 2) regular expression 3) utilize multi dimension array to compare file contents.

#
#  USAGE:
# "py3 t4.py oldfilename newfilename "
# when more than 1 filename is given, this program to compare each other  and output newly added e-mail address indexed list
#            OR
# "py3 t4.py filename"
# when only 1 filename is give, to output e-mail addr index list.
#
# import required package csv for CSV, re for regular expression sys for system.
import csv
import re
import sys

# input CSV file format
# 0 for incident ID
# 12 for new alias
# 21 for assignee
# 40 for cc history to contain all related email addr.
#
# prepare buffers for 1) original file 2) new file to compare each other 3) final output.
# prepare global multi dimension array, there sould be better way but it is enough for now.
origin_arr = [[]]
new_arr = [[]]
arr = [[]]

# define functions here function to be defined before main(try).
def  make_arr(fname,buff):
    with open(fname,'r',encoding='shift_jis') as csvfile:  # file encoding = shift jis.
        csv_reader = csv.reader(csvfile,delimiter=',',quotechar='"')
        next(csv_reader) # discard the fist line.
        line_counter = 0
        for row in csv_reader:
            outbuf = re.sub(' ',"",row[40]) # remove all unnecessary spaces from the line
            outbuf = re.sub(';$',"",outbuf) # delete the last semi colon other wise blank is appended to the array.
            text = outbuf.split(";") # split by semi colon
            for addr in text:
                list = [re.sub(' ',"",row[0]),row[12],row[21],re.sub(' ',"",addr)]
                buff.append(list)
                line_counter += 1
    # print(buff) #debug

def comp_arr(origin,new):
    line_counter = 0
    # print("new size is ",len(new))  # debug
    # print("origin size is ",len(origin))  #dubug
    for new_num in range(1,len(new)):
        for origin_num in range(1,len(origin)):
            if new[new_num][3] == origin[origin_num][3]:
                if new[new_num][0] == origin[origin_num][0]:
                    break
                else:
                    m = re.search(r"@kaspersky.co", new[new_num][3])
                    if m:
                        break
                    else:
                        const_array(new[new_num][0],new[new_num][1],new[new_num][2],new[new_num][3])
                        break
            else:
                if origin_num == len(origin)-1:
                    m = re.search(r"@kaspersky.co", new[new_num][3])
                    if m:
                        pass
                    else:
                        # 0 = incident id, 1 = new alias, 2 = assignee, 3 = addr
                        const_array(new[new_num][0],new[new_num][1],new[new_num][2],new[new_num][3])
    # print(arr) # debug

def const_array(inc,alias,assignee,addr):
    work = re.sub(r"$","\"",alias)
    work = re.sub(r"^","\"",work)
    list = [assignee,addr,work,inc]
    arr.append(list)

def output_array(arr):
    for arr_num in range(1,len(arr)):  # skip the fist array to avoid vacant.
        outbuf = ','.join(map(str,arr[arr_num]))
        outbuf = re.sub(r"^\[","",outbuf) # remove sq bracket at the start and the end of the line
        outbuf = re.sub(r"\]$","",outbuf) #
        outbuf = re.sub(r"'","",outbuf)   # remove single quotation
        outbuf = re.sub(r", ",",",outbuf) # remove uncessary spaces in the line. this is necessary to be CSV.
        print(outbuf)

try:
    argvs = sys.argv
    fname = argvs[1]
    make_arr(fname,origin_arr)    # 1)pick up inc id, addr and other info from the original file, 2) construct table.
    if len(argvs) > 2:            # if # of args is more than 2, compare old file and new file and output delta.
        fname = argvs[2]
        make_arr(fname,new_arr)       # from the new file.
        comp_arr(origin_arr,new_arr)  # 1)compare each other 2)output result to arr.
    else:                         # if not pick up related infos, indexes them by addr and build output in arr.
        # print(" 1 args")
        make_arr(fname,new_arr)
        for line_counter in range(1,len(new_arr)):
            const_array(new_arr[line_counter][0],new_arr[line_counter][1],new_arr[line_counter][2],new_arr[line_counter][3])
    arr.sort()  # in both cases, sort arr and output with format.
    output_array(arr)
    sys.exit()


except FileNotFoundError as e:
    print(e)
except csv.Error as e:
    print(e)
ß

0 件のコメント: