onom: perl

ラベル perl の投稿を表示しています。すべての投稿を表示

2017年9月12日火曜日

PERL multi dimensional array

use strict;
use Text::CSV_XS;
my $csv = Text::CSV_XS->new ({ binary => 1 }); # open file w/ binary mode
# prepare array to store input file names;
# 2 arguments. no.1($ARGV[0]) is the original file and no.2($ARGV[1]) is the new file to compare each other.
my @inputfile = ($ARGV[0],$ARGV[1]);
my @origin;
my @new;

foreach my $count(@inputfile){
# print "input file = $count\n"; # this line is for debug
open my $fh, "<", $count; # case for foreach
while (my $columns = $csv->getline ($fh)) {
if($. == 1){ next;} # skip the first line, as they are header info.
#
# 40 for cc addr, 21 for assignee, 21 for new alias(customer name), 0 for incident ID.
#
$$columns[40] =~ s/,/;/g; # replace comma with semi colong in the column no.40 CC history.
$$columns[40] =~ s/ //g; # surpress all spaces in the line for future purpose.
my @line;
@line = ($$columns[21],$$columns[0],$$columns[12],$$columns[40]);
if($count eq $ARGV[0]){ # arg no.0 is the name of original file.
push @{$origin[$.-1]}, @line; # store data into the multi dimension aray "origin" for original to compare.
}
if($count eq $ARGV[1]){ # arg no.1 is the name of new file.
push @{$new[$.-1]}, @line; # into the buffer for new.
}
}
close $fh;
# print "close fh\n"; # this is for debug
}
my $size = @origin;
my $origin_size = @origin;
my $new_size = @new;
print "new size is ",$new_size,"\n"; # debug.
# print "file read end\n";
# foreach my $outbuf(@origin){
# print @$outbuf[1],@$outbuf[2],@$outbuf[3],@$outbuf[4]," NL\n";
#
# $size = @origin;
# print "origin is ",$size,"\n";
# }
my $inc = 1;
my $alias = 2;
my $cc = 3;
my $assign = 0;
# debug output
# print "1 ",$origin[0][1],$origin[0][2],$origin[0][3],$origin[0][0]," NL\n";
# print $size," ",$origin[$size-1][1],$origin[$size-1][2],$origin[$size-1][3],$origin[$size-1][0]," NL\n";
# print "##################################\n";
# $size = @new;
# print "1 ",$new[0][1],$new[0][2],$new[0][3],$new[0][0]," NL\n";
# print $size," ",$new[$size-1][1],$new[$size-1][2],$new[$size-1][3],$new[$size-1][0]," NL\n";
print "############## start ####################\n";
# my $i = 1; my $j = 1;
for(my $i =0; $i <= $new_size-1; $i++){
my $break = 0; # initialize the flag if it is newly added in new or not.
for(my $j=0; $j <= $origin_size-1; $j++){
if($new[$i][$inc] eq $origin[$j][$inc]){ # all incident id in new is compared to the ones in origin.
if($new[$i][$cc] ne $origin[$j][$cc]){ # when found the entry, compare cc history each other
print "### unmatch ### ,"; # for the case there is delta.
}else{
print "### matched ### ,"; # or cc history is not updated at all.
}
print $new[$i][$assign],",",$new[$i][$inc],",\"",$new[$i][$alias],"\",",$new[$i][$cc],",=,",$origin[$j][$cc],"\n";
last; # exit if find the equal in the origin.
}else{
if($j == $origin_size -1){ # reached the final entry means this is new.
print "### __new__ ### ,";
print $new[$i][$assign],",",$new[$i][$inc],",\"",$new[$i][$alias],"\",",$new[$i][$cc],"\n";
}
}
}
}
$csv->eof;

2017年2月25日土曜日

Get JGB historical data from MOF - Part 2.

Updated the previous version to use foreach, join,push and regular expression. this is more like real Perl.
using "foreeach" instead of "for", no longer denepds on the fixed length list.
with "push" and "join", codes are more readable.
prepare the condition clause after heisei era.

system("wget http://www.mof.go.jp/jgbs/reference/interest_rate/data/jgbcm_all.csv");
$file="./jgbcm_all.csv";
$outfile="> ./jgb_seireki.csv";

open(IN,$file) or die "$!";
open(OUT,$outfile) or die "$!";
print("# start\n");
print OUT "Date,oneY,twoY,threeY,fourY,fiveY,sixY,sevenY,eightY,nineY\n";
$line_num = 0;
while(<IN>){
$line_num++; # increment counter before "next" otherwise......
next if($line_num <3); # skip iteration if counter is less than 3 to skip 1st and 2nd lines.
$gengou = substr($_,0,1); # pick up the first character of the line and store into $gengou.
@buff = split(/,/, $_); # split the line by comma.
@data = split(/\./, $buff[0]); # split the first field by period.
$year = substr($data[0],1,length($data[0])); # pick up the numerical part of wareki data.
if($gengou eq 'S'){ # if the first character = S, the data belongs to showa.
$year=$year+1925; # add 1925 to adjust as showa period started in 1926.
}
if($gengou eq 'H'){ # for the case of heisei until 2019.
$year=$year+1988; # adjust for heisei period.
}
# if($gengou eq 'Z'){ # for the case of after heisei. change 'Z' to the appropriate
# $year=$year+2018; # adjust
# }
$outbuff = ""; # initialize buffer to construct output
my @interest = (); # the list to store zero padded interest rate
foreach $i(@buff){ # pick up element
if($i !~ /^[A-Z]/ and $i =~ /[0-9]/){ # if the element is all numeric.
push(@interest,sprintf("%.3f", $i)); # zero padd and push into the array
}else{
next; # otherwise junmp to the next element
}
}
$outbuff = join(",",@interest,); # join array concat w/ comma.
print OUT "$year-$data[1]-$data[2],$outbuff\n";
}
###

close(IN);
close(OUT);
system("rm jgbcm_all.csv")

2017年2月24日金曜日

Get JGB historical data from MOF

財務省のサイトより日本国債の金利データを取得し、XTS形式で扱えるように変換する。すなわち、第一フィールドの日付データが和暦(Sで昭和を、Hで平成を表している)に、第二フィールド以降の金利データを小数点以下3桁固定のフォーマットに変換する。

data comes as csv format and filename is jgbcm_all.csv
execute wget command.
skip 1st and 2nd lines
convert data(see 7.).
execute system("perl jgb.pl");jgb_xts <- as.xts(read.zoo(read.csv("jgb_seireki.csv")));
don't forget to remove jgbcm_all.csv!
data comes as "S49.10.3,10.388,9.378,8.839,8.520,8.354,8.298,8.244,8.120,8.203"

1st field is date. S49.10.3 must be converted to 1974-10-3.
H for heisei, S for showa.
other fields contain interest data. must do zero padding.

system("wget http://www.mof.go.jp/jgbs/reference/interest_rate/data/jgbcm_all.csv ");
$file="./jgbcm_all.csv";
$outfile="> ./jgb_seireki.csv";

open(IN,$file) or die "$!";
open(OUT,$outfile) or die "$!";
print("# start\n");
print OUT "Date,oneY,twoY,threeY,fourY,fiveY,sixY,sevenY,eightY,nineY\n"; # output head line
$line_num = 0;
while(<IN>){
if($line_num > 1){ # skip 1st and 2nd lines.

$gengou = substr($_,0,1); # pick up the first character of the line.
@buff = split(/,/, $_); # split the line by comma.
@data = split(/\./, $buff[0]); # split the first field by period.
$data[0];
$year = substr($data[0],1,length($data[0])); # pick up the numerical part of wareki data.
# print OUT "$gengou";
if($gengou eq 'S'){ # if the first character = S, data belongs to showa.
$year=$year+1925; # add 1925 to adjust as showa period started in 1926.
}
else{ # just for the case of heisei until 2019.
$year=$year+1988; # adjust for heisei period.
}
for ($count = 1; $count < 10; $count++){
$interest[$count] = sprintf("%.3f", $buff[$count]); # zero padding each interest data.
}
$output = $interest[1].",".$interest[2].",".$interest[3].",".$interest[4].",".$interest[5].",".$interest[6].",".$interest[7].",".$interest[8].",".$interest[9];

print OUT "$year-$data[1]-$data[2],$output\n";
}
# print OUT "$year-$data[1]-$data[2],$buff[2],$buff[9]\n";}
$line_num++;
# print
}
close(IN);
close(OUT);
system("rm jgbcm_all.csv")