onom: 11月 2019

2019年11月14日木曜日

序数の算出 how to find sequence number from data frame

wpm <- rbind(merge(wp,rep(1,length(index(wp))),suffixes =c('','p_or_m')),merge(wm,rep(-1,length(index(wm)))))
wpm
result period_length rate open_p close_p p_or_m
9 1970 0.9157072 9 -0.0097365802 92.06 84.30 -1
1 1973 1.3763938 28 0.0114748680 84.30 116.03 1
1 1975 0.6634491 24 -0.0169506551 116.03 76.98 -1
5 1976 1.3013769 16 0.0166002074 76.98 100.18 1
7 1977 0.9867239 14 -0.0009541901 100.18 98.85 -1
<skip>
4 2016 1.0031085 16 0.0001940004 2058.90 2065.30 -1
12 2017 1.2933674 20 0.0129455360 2067.17 2673.61 1
8 2019 1.0904450 19 0.0045675511 2683.73 2926.46 -1

# return 1.0 以上のマイナスフェーズを探す。
# return 1.0 以上のマイナスフェーズを探して、その日付を取得する。

index(wpm[,1])[wpm[,6] == -1 & wpm[,1] > 1]

[1] " 6 1980" "11 1984" " 7 1986" "10 1989" " 1 1991" "12 1991" "11 1992" " 6 1995" " 1 1996" "10 1998" " 5 2005" " 6 2006"
[13] " 7 2010" " 9 2012" " 8 2014" " 4 2016" " 8 2019"

# return 1.0 以上のマイナスフェーズを探して、その日付ではなく
# その序数を取得する。51が最終エントリを指す序数であることに注意。

index(as.vector(wpm[,1]))[wpm[,6] == -1 & wpm[,1] > 1]

[1] 7 11 13 17 19 21 23 25 27 29 35 37 41 45 47 49 51

# 取得した序数に+1し、その次のエントリの序数を得る。

index(as.vector(wpm[,1]))[wpm[,6] == -1 & wpm[,1] > 1]+1

[1] 8 12 14 18 20 22 24 26 28 30 36 38 42 46 48 50 52

#シーケンスの要素数を調べる。51が最終エントリなので、序数＝51+1に該当する要素は
# 元のデータフレームにはないことに注意。

length(index(as.vector(wpm[,1]))[wpm[,6] == -1 & wpm[,1] > 1]+1)
[1] 17

#[-17]をつけて該当する序数を削除の上、データを再構成する

wpm[(index(as.vector(wpm[,1]))[wpm[,6] == -1 & wpm[,1] > 1]+1)[-17],]

result period_length rate open_p close_p p_or_m
12 1980 1.1883753 6 0.029182211 114.24 135.76 1
2 1985 1.1075926 3 0.034649717 163.58 181.18 1
8 1987 1.3967474 13 0.026036742 236.12 329.80 1
2 1990 0.9751147 4 -0.006280247 340.36 331.89 1
8 1991 1.1498066 7 0.020142134 343.91 395.43 1
4 1992 0.9950124 4 -0.001249244 417.03 414.95 1
9 1994 1.0727019 22 0.003195123 431.35 462.71 1
10 1995 1.0674621 4 0.016454915 544.75 581.50 1
9 1997 1.4893871 20 0.020117927 636.02 947.28 1
3 2000 1.3639946 17 0.018427587 1098.67 1498.58 1
5 2006 1.0659588 12 0.005337085 1191.50 1270.09 1
6 2007 1.1836842 12 0.014151848 1270.06 1503.35 1
2 2011 1.1983603 7 0.026187620 1107.53 1327.22 1
3 2014 1.2994239 18 0.014657552 1440.90 1872.34 1
12 2014 1.0273593 4 0.006770750 2004.07 2058.90 1
12 2017 1.2933674 20 0.012945536 2067.17 2673.61 1

Download composite leading indicator full data and load them into the R environment CLI

# go to OECD, and download 'Full Indicator data(.csv)".
# change the filename to CLI3.csv upon download.
# go to the directory where the downloaded csv exists.
# run the script in git repo. for example as the below.

$~/R/R2/index/cli_download.sed

# there will be

$ls -l *.csv
-rw-r--r--@ 1 honomoto staff 1283086 11 14 09:50 CLI3.csv
-rw-r--r-- 1 honomoto staff 7105 11 14 09:50 chn.csv
-rw-r--r-- 1 honomoto staff 12848 11 14 09:50 ea19.csv
-rw-r--r-- 1 honomoto staff 13923 11 14 09:50 oecd.csv
-rw-r--r-- 1 honomoto staff 15465 11 14 09:50 usa.csv

# run cli_download.

cli_xts <- merge(as.xts(read.zoo(read.csv("~/Downloads/oecd.csv"))),
as.xts(read.zoo(read.csv("~/Downloads/usa.csv"))),
as.xts(read.zoo(read.csv("~/Downloads/chn.csv"))),
as.xts(read.zoo(read.csv("~/Downloads/ea19.csv"))),
suffixes = c("oecd","usa","china","ea19"))

# draw graphs

par(mfrow=c(4,1))
plot(diff(cli_xts$oecd)["2011::"],type='p',pwd=1,pch='+')
plot(diff(cli_xts$usa)["2011::"],type='p',pwd=1,pch='+')
plot(diff(cli_xts$ea19)["2011::"],type='p',pwd=1,pch='+')
plot(diff(cli_xts$china)["2011::"],type='p',pwd=1,pch='+')

par(mfrow=c(1,1))

2019年11月13日水曜日

Download Composite Leading Indicator from OECD site. Early Bird Version

# go to OECD site and download CSV from Export menu.
# assume download file name is "MEI_CLI.csv".
# pick up amplitute justfied OECD total data from CSV

~~$sed -n '/OECD\ -\ Total/p' MEI_CLI_13112019035737453.csv |grep LOLITOAA | grep Ampli | awk -F, 'BEGIN{ORS = ""}{print $(NF-2)","}'~~

the improved version is below to integrate all query string into one criteria.

sed -n '/LOLITOAA.*Ampli.*OECD\ -\ Total/p' MEI_CLI.csv | awk -F, 'BEGIN{ORS = "";print "c("}{print $(NF-2)","}END{print $(NF-2)")\n"}'

OR do as below.

awk -F, 'BEGIN{ORS = "";print "w <- c("}/LOLITOAA.*Ampli.*OECD\ -\ Total/{print $(NF-2)","}END{print $(NF-2)")\n length(w)\n"}' MEI_CLI.csv

output sample is like below. now output is in the from of R statement. Construct vector in c() and substitute to w().

100.7565,100.7652,100.7551,100.7274,100.6736,100.5974,100.512,100.4191,100.3163,100.2006,100.0703,99.93036,99.79047,99.6553,99.53469,99.43623,99.36298,99.30513,99.25108,99.20237,99.15969,99.12648,99.10692

w <- c(100.7565,100.7652,100.7551,100.7274,100.6736,100.5974,100.512,100.4191,100.3163,100.2006,100.0703,99.93036,99.79047,99.6553,99.53469,99.43623,99.36298,99.30513,99.25108,99.20237,99.15969,99.12648,99.10692)

length(w)
[1] 23

# The last month will be found in this way.

sed -n '/LOLITOAA.*Ampli.*OECD\ -\ Total/p' MEI_CLI.csv | awk -F, 'END{print $7}'
"2019-09"

# as the last entry is "2019-09-01" do as below

w <- as.xts(w,last(seq(as.Date("2010-01-01"),as.Date("2019-09-01"),by='months'),length(w))

2019年11月12日火曜日

CLI1ヶ月変化値、X軸はCLI5ヶ月変化値、月間騰落率、COV

# 0) Dropbox/R-script/monthreturn_delta5month_delta1monthcolor.r 参照。
# 1)Y軸はCLI1ヶ月変化値、X軸はCLI5ヶ月変化値、同じく月間騰落率を色と形で表す。
# 2)原点からの角度をX、月間騰落率をYに取ったグラフ。
# 3)同じく、2)原点からの角度をX、COVをYに取ったグラフ。
#

w <- (to.monthly(SP5)[,4]/to.monthly(SP5)[,1])["1970::2018"]
c <- (apply.monthly(SP5[,4],sd) / apply.monthly(SP5[,4],mean) )["1970::2018"]
w <- w-1
# w <- (apply.monthly(SP5[,4],sd)/apply.monthly(SP5[,4],mean))["1970::2018"]
d <- na.omit(diff(cli_xts$oecd,5))["1970::2018"]
func <- function(x){
if(x > 0.1){return("a")}
if(x > 0.025){return("b")}
if(x > 0){return("c")}
if(x > -0.025){return("d")}
if(x > -0.05){return("e")}
if(x < -0.05){return("f")}
}
# df <- data.frame(monthlyreturn=as.vector(w),delta=as.vector(d),sign=as.vector(apply(diff(cli_xts$oecd)["1970::2018"],1,func)))

df <- data.frame(monthlyreturn=as.vector(apply(w,1,func)),
delta=as.vector(d),sign=as.vector(diff(cli_xts$oecd)["1970::2018"]))
# as.vector(w),delta=as.vector(d),sign=as.vector(apply(diff(cli_xts$oecd)["1970::2018"],1,func)))
p <- ggplot(df, aes(x=delta,y=sign))
p <- p + theme(panel.background = element_rect(fill = "black",
colour = "lightblue"),
legend.key = element_rect(fill='black',colour='white'))
# legend.box.background=element_rect(fill='black',colour='lightblue'))

p <- p + geom_point(alpha=1,aes(color=monthlyreturn,shape=monthlyreturn))
p <- p + geom_vline(xintercept =as.vector(last(diff(cli_xts$oecd,5))),size=0.5,linetype=2,colour="white",alpha=0.5)
p <- p + geom_hline(yintercept =as.vector(last(diff(cli_xts$oecd))),size=0.5,linetype=2,colour="white",alpha=0.5)
p <- p + theme(axis.title.x=element_blank(),axis.title.y=element_blank())
# p <- p + geom_smooth(method = "auto")
# p <- p + scale_shape_manual(values=c(0,1,2,10,11,12))
# p <- p + scale_color_brewer(label=c("more than 0.1","more than 0.025","more than ZERO","more then -0.025","more than -0.1","less than -0.1")) #x-axis label
p <- p + scale_color_brewer(palette="Spectral", label=c("more than 0.1","more than 0.025","more than ZERO","more then -0.025","more than -0.05","less than -0.05")) #x-axis label
p <- p + scale_shape_manual(values=c(0,1,2,10,11,12),label=c("more than 0.1","more than 0.025","more than ZERO","more then -0.025","more than -0.05","less than -0.05"))
p <- p + stat_smooth(aes(x=delta,y=sign),method="loess",color='white',size=0.3,se=FALSE)
# p <- p + guides(shape = FALSE)
plot(p)

wdf <- data.frame(dist=sqrt(df$delta**2+df$sign**2),
angle=atan2(df$delta,df$sign),
return=w,
mon=seq(1,length(wdf[,1]),1),
cov=c)
colnames(wdf)[3] <- "return"
# wdf <- cbind(wdf,seq(1,length(wdf[,1]),1))
colnames(wdf) [4] <- "mon"
# wdf <- cbind(wdf,c)
# wdf
colnames(wdf)[5] <- "cov"

df <- data.frame(r<-wdf[,3],a<-wdf[,2],mon<-wdf[,4],cov <- wdf[,5])
colnames(df)[1] <- 'r'
colnames(df)[2] <- 'a'
colnames(df)[3] <- 'mon'
colnames(df)[4] <- 'c'

# df <- last(df,284)
p <- ggplot(df,aes(x=a,y=r))
p <- p + geom_point(alpha=1,aes(color=mon))
p <- p + geom_vline(xintercept =as.vector(atan2(last(diff(cli_xts$oecd,5),1),last(diff(cli_xts$oecd),1))),size=0.5,linetype=2,colour="red",alpha=0.5)
p <- p + geom_vline(xintercept =as.vector(seq(-0.9,0.9,0.1))*pi,size=0.5,linetype=2,colour="white",alpha=0.5)
# p <- p + scale_color_brewer(palette="Spectral")
p <- p + stat_smooth(aes(x=a,y=r),method="loess",color='white',size=0.3,se=FALSE)
p <- p + scale_color_gradient(low = "red", high = "green")
p <- p + theme(panel.background = element_rect(fill = "black",
colour = "lightblue"))
# panel.grid = element_blank())
plot(p)

p <- ggplot(df,aes(x=a,y=c))
p <- p + geom_point(alpha=1,aes(color=mon))
p <- p + geom_vline(xintercept =as.vector(atan2(last(diff(cli_xts$oecd,5),1),last(diff(cli_xts$oecd),1))),size=0.5,linetype=2,colour="red",alpha=0.5)
p <- p + geom_vline(xintercept =as.vector(seq(-0.9,0.9,0.1))*pi,size=0.5,linetype=2,colour="white",alpha=0.5)
# p <- p + scale_color_brewer(palette="Spectral")
p <- p + stat_smooth(aes(x=a,y=c),method="loess",color='white',size=0.3,se=FALSE)
p <- p + scale_color_gradient(low = "red", high = "green")
p <- p + theme(panel.background = element_rect(fill = "black",
colour = "lightblue"))
# panel.grid = element_blank())
plot(p)

2019年11月11日月曜日

EPS 2019NOV11

before

> eps_year_xts["2019::"]
[,1]
2019-01-01 134.39
2019-04-01 135.27
2019-07-01 135.99
2019-10-01 145.71
2020-01-01 148.71
2020-04-01 154.31
2020-07-01 159.83
2020-10-01 165.38

# Edit delete 2 lines in the middle

12/31/2020		$46.68	$42.96	17.40	18.68		$177.27	$165.15
9/30/2020		$45.92	$42.75	18.01	19.31		$171.35	$159.78
6/30/2020		$43.85	$40.74	18.62	20.29		$165.72	$152.08
3/31/2020		$40.82	$38.70	19.04	21.09		$162.01	$146.27
12/31/2019		$40.76	$37.59	19.38	21.64		$159.18	$142.58
9/30/2019 (89.2%)	2976.74	$40.29	$35.04	20.11	23.03		$153.45	$133.95
				19.40	22.22	(P/E on Sep,'19 price)
ACTUALS
6/30/2019	2941.76	$40.14	$34.93	19.04	21.75		$154.54	$135.27
3/31/2019	2834.40	$37.99	$35.02	18.52	21.09		$153.05	$134.39

# result is like this.

honomoto@~$cat eps.txt
12/31/2020 $46.68 $42.96 17.40 18.68 $177.27$165.15
9/30/2020 $45.92 $42.75 18.01 19.31 $171.35$159.78
6/30/2020 $43.85 $40.74 18.62 20.29 $165.72$152.08
3/31/2020 $40.82 $38.70 19.04 21.09 $162.01$146.27
12/31/2019 $40.76 $37.59 19.38 21.64 $159.18$142.58
9/30/2019 (89.2%) 2976.74 $40.29 $35.04 20.11 23.03 $153.45 $133.95
6/30/2019 2941.76 $40.14 $34.93 19.04 21.75 $154.54$135.27
3/31/2019 2834.40 $37.99 $35.02 18.52 21.09 $153.05$134.39

honomoto@~$tac eps.txt | awk '{gsub("\\$","",$NF);print "eps_year_xts[\"2019::\"]["NR"] <- "$NF}

eps_year_xts["2019::"][1] <- 134.39
eps_year_xts["2019::"][2] <- 135.27
eps_year_xts["2019::"][3] <- 133.95
eps_year_xts["2019::"][4] <- 142.58
eps_year_xts["2019::"][5] <- 146.27
eps_year_xts["2019::"][6] <- 152.08
eps_year_xts["2019::"][7] <- 159.78
eps_year_xts["2019::"][8] <- 165.15

2019年11月6日水曜日

確率密度の計算　dnorm() qnorm()

オブジェクト wdf はこちらから。

以下のようなデータの集合があったとする。また、そのヒストグラムを示す。

> wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]

[17] 0.0711926070 -0.0854386059 0.0094247436 0.0030609850 0.0432560109 0.0165413246 0.0213577174 0.0673432082
[25] 0.0222028328 0.0003197991 -0.0197827495 0.0095648204 -0.0217844120 0.0101077662 0.0070690542 0.0104836274
[33] 0.0317760404 -0.0003202381 0.0400974724 0.0069337016 0.0079165671 0.0134314222 0.0591259963 0.0563752866
[41] 0.0410094415 0.0180991114 0.0751759043 0.0075738342 0.0810441069 0.0508987097 0.0113222145 -0.0001427142
[49] 0.0359682872 -0.0112221049 0.0936167706 0.0998248066 0.0531659206 0.0085731462 0.0425385266 -0.0199458599
[57] 0.0028183707 0.0069545273 -0.0158576835 0.0236426721 0.0244524103 0.0144061834 0.0023497169 0.0353730253

その時平均と分散はそれぞれmean()とsd()によって算出される。

> mean(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1])
[1] 0.02359295
> sd(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1])
[1] 0.03504609

念のため、シャピロウイルク検定を実施する。このテストの帰無仮説は「正規分布は成立している」だから注意すること。結果は9.5%の確率で正規分布が成立することになる。あまり良い値ではない。

> shapiro.test(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1])

Shapiro-Wilk normality test

data: wdf[, 3][wdf[, 2] > -0.5 * pi & wdf[, 2] < 1]
W = 0.96799, p-value = 0.095

平均と分散が判明すればそれに応じて確率を計算することが可能になる。

下方累積確率 = pnorm(標本値、平均、分散)

> pnorm(0,mean(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]), sd(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]))
[1] 0.2504107

正規分布に従えば、平均と分散が上に示された集団で標本の値が0以下となる確率は25.04%となる。

> pnorm(0.02359295,mean(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]), sd(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]))
[1] 0.5

また、値が0.02359295以下となる確率は50％である。また逆に確率からありうべき標本の値を計算することも可能になる。確率50％を期待できるのは、無限小から0.02359295までの累積となる。

想定標本値= qnorm(下方累積確率、平均、分散)

> qnorm(0.5,mean(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]), sd(wdf[,3][wdf[,2] > -0.5*pi & wdf[,2] < 1]))
[1] 0.02359295

2019年11月5日火曜日

rolling COV , grit.ticks.on='months', sapply

#
# calculate OCV of each last 30 business days with given parameters.
#
func <- function(date,xts=SP5[,4],days=30){
hensa <- sd(last(xts[paste("::",date,sep="")],days))
heikin <- mean(last(xts[paste("::",date,sep="")],days))
return(hensa/heikin)
}
#
# construct XTS object
# use sapply() instead of for loop to calculate rolling COV
#
as.xts(sapply(last(index(SP5),365),func),last(index(SP5),365))
# plot!!
plot(as.xts(sapply(last(index(SP5),730),func),last(index(SP5),730)),main="moving COV",grid.ticks.on="months")
length(index(SP5["2012-01-01::"]))
#
# 1)automatically the number of business days from the specific date (2012/12/1).
# 2)calculate rolling COV from the date above.
# 3)plot graph with monthly grid.
#
plot(as.xts(sapply(last(index(SP5),length(index(SP5["2012-01-01::"]))),func),last(index(SP5),length(index(SP5["2012-01-01::"])))),main="moving COV",grid.ticks.on="months")

# 評価比較用 for を使用した場合
system.time(for( i in as.character(last(index(SP5),365))){cat(i); print(func(i))})

system.time(print(as.xts(sapply(last(index(SP5),365),func),last(index(SP5),365))))

登録: 投稿 (Atom)