资源描述
#期末测验专项温习
#一、矩阵与数据框
#1.天生特定的矩阵与数据框
#矩阵
#办法一
a=array(1:10,dim=c(2,5))
rownames(a)=1:2
colnames(a)=c("one","two","three","four","five")
a
dimnames(a)=list(1:2,c("one","two","three","four","five"))
nrow=nrow(a)
ncol=ncol(a)
dim(a)
#办法二
a=matrix(1:10,nrow=2,byrow=F)
rownames(a)=1:2
colnames(a)=c("one","two","three","four","five")
a=matrix(1:10,nrow=2,byrow=F,
dimnames=list(1:2,c("one","two","three","four","five"))
)
#数据框的天生
df=data.frame(
Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5)
);df
Lst=list(Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5))
Lst
Lst[["Name"]]
Lst["Name"]
Lst[1]
Lst[[1]]
Lst$Name
df=as.data.frame(Lst)
df
x=array(1:6,dim=c(2,3))
as.data.frame(x)
#数据框的援用
df[1:2,3:5]
df[["Height"]]
df$Weight
names(df)#此属性必定非空
rownames(df)=c("one","two","three","four","five")
df
attach(df)
r=Height/Weight
r
df$r=r
names(df)
detach()
r=Height/Weight
#2.矩阵的运算
a=diag(1:3)
a[2][1]=1
a
#1转置运算
t(a)
#2行列式
det(a)
#3向量内积
x=1:5
y=2*1:5
x%*%y
t(x)%*%y
crossprod(x,y)
#4向量的外积
x%*%t(y)
tcrossprod(x,y)
outer(x,y)
x%o%y
#矩阵的乘法
a=array(1:9,dim=c(3,3))
b=array(9:1,dim=c(3,3))
x=1:3
a*b
a%*%b
x%*%a%*%x
crossprod(a,b)#t(a)%*%b
tcrossprod(a,b)#a%*%t(b)
#矩阵的逆
solve(a)
b=1:3
solve(a,b)#ax=b的解
#矩阵的特点值与特点向量
sm=eigen(a)
sm
e=diag(1:3)
svde=svd(e)
svde
attach(svde)
u%*%diag(d)%*%t(v)
#与矩阵运算有关的函数
#取维数
a=diag(1:4)
nrow(a)
ncol(a)
#矩阵的兼并
x1=rbind(c(1,2),c(3,4))
x2=x1+10
x3=cbind(x1,x2)
x3
x4=rbind(x1,x2)
x4
cbind(1,x1)
#矩阵的拉直
a=matrix(1:6,ncol=2,
dimnames=list(c("one","two","three"),
c("first","second")),byrow=T)
as.vector(a)
#apply函数
apply(a,1,mean)
apply(a,2,sum)
tapply(1:5,factor(c("f","f","m","m","m")),mean)
#第二题
#发生随机数
x=rnorm(100,0,1)
x
#画随机数的直方图
hist(x,freq=F)
#核密度曲线
density(x)
lines(density(x),col="blue")
#增加正态散布散布函数
y=seq(-4,3,0.2)
lines(y,dnorm(y,mean(x),sd(x)),col="red")
#画随机数的经历散布函数
z=rnorm(50,0,1)
plot(ecdf(z),do.p=F,verticals=T)
d=seq(-3,2,0.2)
lines(d,pnorm(d,mean(z),sd(z)),col="red")
y=rpois(100,2)
plot(ecdf(y),col="red",verticals=T,do.p=F)
x=0:8
lines(x,ppois(x,mean(y)),col="blue")
w=c(75,64,47.4,66.9,62.2,62.2,58.7,63.5,66.6,64.0,57.0,69.0
,56.9,50.0,72.0)
hist(w,freq=F)
lines(density(w),col="blue")
x=44:76
lines(x,dnorm(x,mean(w),sd(w)),col="red")
plot(ecdf(w),do.p=F,verticals=T)
lines(x,pnorm(x,mean(w),sd(w)),col="red")
#编写函数求随机数的种种描绘统计量
data_outline=function(x){
n=length(x)
m=mean(x)
v=var(x)
s=sd(x)
me=median(x)
cv=100*s/m
css=sum((x-m)^2)
uss=sum(x^2)
R=max(x)-min(x)#样本极差
R1=quantile(x,3/4)-quantile(x,1/4)#四分位差
sm=s/sqrt(n)#样本规范误
g1=n/(n-1)/(n-2)*sum((x-m)^3)/s^3
g2=n*(n+1)/(n-1)/(n-2)/(n-3)*sum((x-m)^4)/s^4
-3*(n-1)^2/(n-2)/(n-3)
data.frame(N=n,Mean=m,Var=v,std_dev=s,
Median=me,std_mean=sm,CV=cv,CSS=css,USS=uss,
R=R,R1=R1,Skewness=g1,Kurtosis=g2,row.names=1)
}
x=rnorm(100)
data_outline(x)
#第三题
#r,p,q,d
rnorm(100,0,1)
pnorm(1:5,0,1)
dnorm(-3:3,0,1)
qnorm(seq(0,1,0.25),0,1)
rbeta(100,2,2)
rbinom(100,100,0.5)
pbinom(1:100,100,0.5)
dbinom(1:5,100,0.5)
qbinom(seq(0,1,0.1),100,0.5)
rchisq(100,1〕
qchisq(seq(0,1,0.2),10)
pchisq(1:10,10)
dchisq(1:10,10)
rexp(100,0.5)
rpois(100,2)
ppois(1:1000,2)
dpois(1:100,2)
runif(100,0,1)
qunif(c(0,0.2,0.8),0,1)
punif(seq(0,1,0.2),0,1)
dunif(seq(0,1,0.01),0,1)
rt(100,2)
qt(0.8,2)
pt(-3:3,2)
dt(-3:3,2)
rf(100,1,2)
qf(0.8,1,2)
#四相信区间
#1
#〔1〕sigma曾经明白
interval_estimate1=function(x,side=0,sigma=1,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=xb-tmp;b=Inf}
else{
tmp=sigma/sqrt(n)*qnorm(1-alpha/2)
a=xb-tmp;b=xb+tmp}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,4)
interval_estimate1(x,sigma=4,side=0)
interval_estimate1(x,sigma=4,side=-1)
interval_estimate1(x,sigma=4,side=1)
#〔2〕sigma未知
interval_estimate2=function(x,side=0,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=xb-tmp;b=Inf
}
else{
tmp=sd(x)/sqrt(n)*qt(1-alpha/2,n-1)
a=xb-tmp;b=xb+tmp
}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,1)
interval_estimate2(x,side=-1)
interval_estimate2(x,side=0)
interval_estimate2(x,side=1)
t.test(x,side=-1)
t.test(x,side=0)
t.test(x,side=1)
#两个总体sigma1=sigma2但未知
interval_estimate3=function(x,y,alpha=0.05){
xb=mean(x);yb=mean(y)
n1=length(x);n2=length(y)
sw=((n1-1)*var(x)+(n2-1)*var(y))/(n1+n1-2)
tmp=sqrt((1/n1+1/n2)*sw)*qt(1-alpha/2,n1+n2-2)
a=xb-yb-tmp;b=xb-yb+tmp
data.frame(mean=xb-yb,a=a,b=b)
}
x=rnorm(100,0,1)
y=rnorm(100,1,1)
interval_estimate3(x,y)
t.test(x,y)
-0.03643479-0.98699097
#第五题假定测验
#〔1〕sigam曾经明白,双侧,测验mu=mu0
mean.test1=function(x,mu=0,sigma=1){
xb=mean(x);n=length(x)
z=(xb-mu)/sigma*sqrt(n)
p=pnorm(z)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb,Z=z,p_value=P)
}
x=rnorm(100,0,2)
mean.test1(x,mu=0,sigma=2)
#(2)sigma未知,双侧,测验mu=mu0
mean.test2=function(x,mu=0){
xb=mean(x);n=length(x)
z=(xb-mu)/sd(x)*sqrt(n)
p=pt(z,n-1)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb,Z=z,p_value=P)
}
x=rnorm(100)
mean.test2(x,mu=0)
t.test(x,mu=0,alt="two.side")
#两个总体sigma1=sigma2但未知,测验mu1=mu2
mean.test3=function(x,y,mu=0){
xb=mean(x);yb=mean(y)
n1=length(x);n2=length(y)
sw=((n1-1)*var(x)+(n2-1)*var(y))/(n1+n2-2)
t=(xb-yb-mu)/sqrt(sw*(1/n1+1/n2))
p=pt(t,n1+n2-1)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb-yb,T=t,p_value=P)
}
x=rnorm(100,0,1)
y=rnorm(100,2,1)
mean.test3(x,y,mu=-2)
t.test(x,y,var.equal=T,mu=-2)
x=rnorm(100,0,1)
y=rnorm(100,0,2)
mean.test3(x,y)
t.test(x,y,var.equal=T)
#第六题挪用R函数
#k-s测验两组数能否同散布
x=rnorm(100,0,1)
y=rt(100,5)
z=rnorm(100,0,1)
ks.test(x,y),alt="l"
ks.test(x,z)
#测验一组数能否听从曾经明白散布
ks.test(x,"pnorm",0,2)
ks.test(x,"pt",1)
#标记测验两组数能否有差别
x=rbinom(100,100,0.5)
binom.test(sum(x>=50),100)
y=rbinom(100,100,0.4)
binom.test(sum(x<y),length(x)),alt="g"
#wilcoxon标记秩跟测验〔准确或年夜样本近似〕
#wilcox.test(x,y,alt,mu,paired=F,exact=NULL,correct=T,conf.int=F,
conf.level=0.95)
r=runif(100,136,145)
wilcox.test(r,mu=140,alt="l",exact=F,conf.int=T,correct=F)
x=rnorm(100)
y=rnorm(100)
wilcox.test(x,y,paired=T,alt="g")
wilcox.test(x-y,alt="g")
binom.test(sum(x>y),length(x),alt="g")
#第七题
#相干性测验
x=1:6
y=6:1
z=2:7
cor.test(x,y,alt="g",method="spearman")
cor.test(x,z,alt="g",method="spearman")
#无节点
x=c(2,3,1,4,5,8,6)
y=1:7
cor.test(x,y,alt="g",method="spearman",correct=T)
n=length(x)
r=rank(x)
r
R=rank(y)
R
s=sum((r-R)^2)
rho=1-6*s/n/(n^2-1)
rho
#有节点
x=c(2,3,4,4,5,8,6)
y=1:7
cor.test(x,y,alt="g",method="spearman",correct=T)exact=F,
n=length(x)
r=rank(x)
r
R=rank(y)
R
sxy=sum((r*R))
sx=sum(r^2)
sy=sum(y^2)
t=n*((n+1)/2)^2
rho=(sxy-t)/sqrt(sx-t)/sqrt(sy-t)
rho
#第八题回归
x=c(seq(0.1,0.18,0.01),0.20,0.21,0.23)
y=c(42,43.5,45,45.5,45,47.5,49,53,50,55,55,60)
#散点图
plot(x,y)
#做回归
lm.sol=lm(y~x)
lm.sol=lm(y~1+x)
#汇总统计量
summary(lm.sol)
#画回归线
abline(lm.sol)
#求回归系数的区间估量
beta.int=function(lm.sol,alpha=0.05){
A=summary(lm.sol)$coefficients
df=lm.sol$df.residual
left=A[,1]-A[,2]*qt(1-alpha/2,df)
right=A[,1]+A[,2]*qt(1-alpha/2,df)
rowname=dimnames(A)[[1]]#列表的第一个元素
colname=c("estimate","left","right")
matrix(c(A[,1],left,right),ncol=3,dimnames=list(rowname,colname))
}
beta.int(lm.sol)
#对新的自变量求因变量的猜测值及猜测区间
new=data.frame(x=c(0.16,0.19,1.20))
lm.predict=predict(lm.sol,new)
lm.predict
lm.predict=predict(lm.sol,new,interval="confidence",level=0.95)
lm.predict=predict(lm.sol,new,interval="prediction",level=0.95)
#残差图
resid=lm.sol$residuals
plot(resid)
y.res=resid(lm.sol)
y.fit=predict(lm.sol)
plot(y.res~y.fit)
plot(y.res~x)
plot(lm.sol,1)
plot(lm.sol,2)
plot(lm.sol,3)
展开阅读全文