2017年1月18日水曜日

Edit CSV with Perl

​Install CSV_VS module for you through CPAN
$ perl -MCPAN -e shell
cpan[3] > install Text::CSV_XS

  1. read csv file
  2. disassemble each column in the line.
  3. replace "," with ";" in 3rd column.
  4. replace "," with " " in 7th column.
  5. rewrite 1,2,3,5,6,7,8 column in the first line.
  6. when 1 st column includes "#", surpress file output
  7. append "," at the end of each data.


use strict;

use Text::CSV_XS;

my $csv = Text::CSV_XS->new ({ binary => 1 });

open my $fh, "<", "emp_in.csv";

open my $os, "<", "os_in.csv";

open my $fh_out, ">", "emp_out2.csv";
open my $os_out, ">", "os_out2.csv";
my $line_num = 1;
while (my $columns = $csv->getline ($fh)) {
  # print $fh_out map({ "[$_]\t" } @$columns), "\n";
  @$columns[3] =~ s/,/; /g; # replace , by ; in column no. 3. this is for the case to include multiple TFS Bug ID in this column.
  @$columns[7] =~ s/,/ /g; #remove comma from the subject.
  # replace original header with english version.
  if($line_num == 1){ @$columns[0] = "RM ID";
                      @$columns[1] = "RM Priority";
                      @$columns[2] = "RM Status";
                      @$columns[3] = "TFS Bug ID";
                      @$columns[5] = "RM Project";
                      @$columns[6] = "RM Filer";
                      @$columns[8] = "RM Component";
                      @$columns[7] = "RM subject";}
  # print @$columns[3],"\n";
  # print $fh_out map({"$_,"} @$columns), "\n"; # put , at the end of each column
  # if the first column includes pound, this line might be duplicated
  if(@$columns[0] =~ /#/){
    print "mark found\n"
  }else{
    print $fh_out map({"$_,"} @$columns), "\n";
  }
  $line_num++;

}
my $line_num = 1; # initialize the line counter
while (my $columns = $csv->getline ($os)) {
  # print $fh_out map({ "[$_]\t" } @$columns), "\n";
  @$columns[3] =~ s/,/; /g;
  @$columns[7] =~ s/,/ /g; #remove comma from the subject.

  if($line_num == 1){ @$columns[0] = "RM ID";
                        @$columns[1] = "RM Priority";
                        @$columns[2] = "RM Status";
                        @$columns[3] = "TFS Bug ID";
                        @$columns[5] = "RM Project";
                        @$columns[6] = "RM Filer";
                        @$columns[8] = "RM Component";
                        @$columns[7] = "RM subject";
  }
  if(@$columns[0] =~ /#/){
    # print "mark found\n"
  }else{
    print $os_out map({"$_,"} @$columns), "\n";
  }
  # print $columns;
  $line_num++;
}

$csv->eof;
close $fh_out;
close $os_out;
close $fh;
close $os;


0 件のコメント: