2019年9月2日月曜日

ggplot() geom_histgram ggplotを使用したヒストグラム

 
!!!CAUTION!!!

!「行列はベクトルなので、すべての要素は同じ型でなければならない。numeric型と character型 のように、異なる型のデータを含む行列を使いたい場合は、データフレームを利用する」

The sample #1 connects rows #1 contains "numeric" and #2 does "character", the matrix of output is forced to standardize all elements into a single data type. In this case the data type is set to "character" as it is more flexible than the other. the data is eventually converted to "factor" in dataframe and it causes the data handling issue in geo_histogram(), which expects continuous data type like numeric as parameter. the data type "factor" is discrete. it requires <stat="count"> and it blocks to display all data in a sigle array which includes defunct parts.

## codes start here sample #1
# use sample #2 rather than #1.

w <- apply.monthly(SP5[,4],sd)/apply.monthly(SP5[,4],mean)
# use round() with digits parameter to adjust binwidth. somehow bandwidth parameter doesn't work at all.
# DON'T DO THIS, UNLESS TO TREAT DATA AS DESCRETE. USE SAMPLE#2
x <- rbind(
cbind(round(as.double(w["1970::2018"][diff(cli_xts$oecd)["1970::2018"] > 0]),digits=3),rep("p",length(round(as.double(w["1970::2018"][diff(cli_xts$oecd)["1970::2018"] > 0]),digits=3)))),
cbind(round(as.double(w["1970::2018"][diff(cli_xts$oecd)["1970::2018"] < 0]),digits=3),rep("m",length(round(as.double(w["1970::2018"][diff(cli_xts$oecd)["1970::2018"] < 0]),digits=3))))
)
y <- data.frame(x)
colnames(y)[1] <- "data"
colnames(y)[2] <- "sign"
p <- ggplot(y, aes(x=data))
# should insert start="count" here to avoid error message.
p <- p + geom_histogram(aes(fill=sign),stat="count",position = "identity", alpha = 0.5)
plot(p)


## codes end here.

sample #1 a case of discrete??

## codes start here sample #2
# round() might not be necessary anymore. keep here as it is harmless.

w <- apply.monthly(SP5[,4],sd)/apply.monthly(SP5[,4],mean)
func <- function(x){if(x > 0){return("p")}else{return("m")}}
# as.vector(apply(diff(cli_xts$oecd)["1970::2018"],1,func))
y <- data.frame(data=round(as.vector(w["1970::2018"]),digits=3),sign=as.vector(apply(diff(cli_xts$oecd)["1970::2018"],1,func)))
# parameter ase() better be put into a single line.
p <- ggplot(y, aes(x=data,fill=sign))
p <- p + geom_histogram(bins=50,position = "identity", alpha = 0.5)
plot(p)

## codes end here

sampel #2 a case of continous

## code start here sample #3
#  position = "fill"
#  use sample #2 until here

p <- ggplot(y, aes(x=data,fill=sign))
# should insert start="count" here to avoid error message.
# p <- p + geom_histogram(stat="count",position = "identity", alpha = 0.5,bins=60)
p <- p + geom_histogram(bins=50,position = "fill", alpha = 0.9)
plot(p)

## code end here.

sample #3 position="fill"

## codes start sample #4

w <- (apply.monthly(SP5[,4],sd)/apply.monthly(SP5[,4],mean))["1970::2018"]
d <- na.omit(diff(cli_xts$oecd,5))["1970::2018"]
func <- function(x){
  if(x > 0.1){return("upper")}
  if(x > 0){return("uppermiddle")}
  if(x > -0.1){return("lowermiddle")}
  if(x < -0.1){return("lower")}
}
df <- data.frame(sd=as.vector(w),delta=as.vector(d),sign=as.vector(apply(diff(cli_xts$oecd)["1970::2018"],1,func)))
p <- ggplot(df,aes(x=sd))
p <- p + geom_histogram(aes(fill=sign),position = "identity", alpha = 0.3,bins=120)
# p <- p + geom_point(alpha=0.5)
# p <- p + geom_point(alpha=0.5, aes(color=sign))
# p <- p + geom_smooth(method = "lm")
plot(p)

## codes end

sample #4 coef. of variance by cli 5month delta



sample #5 position="stack" and alpha=0.9



0 件のコメント: