R 語言程式設計藝術筆記

1 quick begineer
source("z.R")

pdf("xh.pdf")
hist(rnorm(100))
dev.off()

$R CMD BATCH z.R #shell NO window
data()

oddcount<-function(x){
k<-0
for (n in x) {
if(n%%2==1) k<-k+1
}
return(k) ## 如果沒有詞句，則返回最後執行的一句
}

mode(z)
u<-paste("abc","de","f")
v<-strsplit(u,"")
m<-rbind(c(1,4),c(2,2))
x<-list(u=2,v="abc")
str(hn)
d<-data.frame(list(kids=c("Jack","Jill"),ages=c(12,10)))

examquiz<-read.table("ExamsQuiz.txt",header=FALSE)
class(examquiz)
head(examquiz)
lma<-lm(examquiz[,2]~examquiz[,1]
lma<-lm(examquiz$V2~examquiz$V1)
attribute(lma)
lma$coef
option(editor="/usr/bin/vim")
getwd()
setwd()
help()
?"for"
example(seq)
help.search("multivariate normal")
help(package=MASS)
R CMD INSTALL --help ## No Window

# 向量

first1<-function(x){
for(i in length(x)){ # 注意當x為NULL時，已經執行了一次， bug；
if((x[i]==1) break
}
return (i)
}

y<-vector(length=2)
y[1]<-5
y[2]<-12

c(1,2,4)+c(6,0,9,20,22) ##迴圈補齊

"+"(2,3)

x<-c(4,2,17,5)
y<-x[c(1,1,3)] ##索引是允許重複的

1:i-1 # this means(1:i)-1 # 冒號的優先順序高於減號

seq(from=12,to=30,by=3)
seq(from 1.1, to 2, length=10)

for( i in seq(x)) # 解決了注意當x為NULL時，已經執行了一次， bug；

rep(c(5,12,3),3)
rep(c(5,12,3),each=2)

all(x>8)
any(x>8)

findruns<-function(x,k){
n<-length(x)
runs<-NULL
for(i in 1:(n-k+1)){
if (all(x[i:(i+k-1)]==1)) runs<-c(runs,i)
}
}

改進，預先分配記憶體
findruns<-function(x,k){
n<-length(x)
runs<-vector(length(n))
count<-0
for(i in 1:(n-k+1)){
if (all(x[i:(i+k-1)]==1)) {
count<-count+1
runs[count]<-i
}
}
if(count>0){
runs<-runs[1:count]
} else runs<-NULL
return (runs）

}

向量

z<-c(5,2,-3,8)
z[z*z>8]

y<-1:10
y<-ifelse(y%%2==0,5,12)

x<-c(5,2,9,12)
ifelse(x>6,2*x,3*x)

findud<-function(v){
vud-v[-1]-v[length(v)]
return(ifelse(vud>0,1,-1))
}
udcorr<-function(x,y){
ud<-lappy(list(x,y),findud)
return(mean(ud[[1]]==ud[[2]]))
}

sign(c(-1,-2,-4,1,3,0) # 轉化為1，-1，0

grps<-list() #
for(gen in c("M","F","I")) grps[[gen]]<-which(g==gen)

identical(x,y)
typeof(y)

names(r)<-c("a","b","c","d")
rownames(t)[2]<-c("j")
矩陣
y[row(y)==col(y)]

對矩陣的行和列呼叫函式
apply(y,2,mean)
colMeans(y)

f<-function(x) x/c(2,8)
y<-apply(z,2,f) #c(2,8) 注意迴圈補齊

findols<-function(x){
findol<-function(xrow){
mdn<-median(xrow)
devs<-abs(xrow-mdn)
return(which.max(devs))
}
return(apply(x,1,findol))
} # 注意巢狀
矩陣插入 cbind，rbind，迴圈補齊

z[,2, drop=FALSE] drop 避免降維
"["(z,,2)

dimnames(f)<-list(c("a","b","c"),c("d","e"),c("f","g")) 陣列名字

z<-vector(mode="list")
z$b<NULL #刪除
unlist(z)
as.list(z) 相互轉換， list(z) 轉化成另外的形式

findwords<-function(tf){
txt<-scan(tf,"")
wl<-list()
for (i in 1:length(txt)){
wrd<-txt[i]
wl[[wrd]]<-c(wl[[wrd]],i)
}
return(wl)
}

lapply # list apply
lapply(f,median)
sapply # simplifed apply 結果轉化為矩陣或向量，可直接輸出矩陣

freqwl<-function(wrdlist){
freqs<-sapply(wrdlist,length)
return(wrdlst[order(freqs)])
}

c(list(a=1,b=2,c=list(d=4,e=9))) 列表遞迴
c(list(a=1,b=2,c=list(d=4,e=9)),recursive=T)

df5<-complete.cases(d4) ###去掉NA
assign(x,y，env 設定全域性變數賦值

makecorp<-function(corpname){
t<-all2006[all2006$Employer_Name==corpname,]
return (t)
}

f<-data.frame(a=c(1,2,3),b=c(4,5,6))
z<-data.frame(a=c(1,3,4),r=c(5,6,7))
merge(f,z)
merge(f,z, all=T) # 資料框合併 SQl join 合併？
all(count.fields("DA",sep='')>5)
table(count.fields("DA,sep=''))

aba<-read.csv("abaline.data",header=T)
lftn<-function(clmn){
glm(aba$Gender~clmn,family=binomial)$coef
}
loall<-sapply(aba[,-1],lftn) # sapply是lapply的友好簡潔版本，使用列

merge2fy<-function(fy1,fy2){
outdf<-merge(fy1,fy2)
for(fy in list(fy1,fy2)){
saplout<-sapply((fy[[2]]),sepsoundtone)
tmpdf<-data.frame(fy[,1],t(saplout),row.names=NULL,stringsAsFactors =F)
consname<-paste(names(fy)[[2]],"cons" sep="")
restname<-paste(names(fy)[[2]],"sound" sep="")
tonename<-paste(names(fy)[[2]],"tone"sep="")
names(tmpdf)<-c("Ch Char",consname,restname,tonename)
outdf<-merge(outdf,tmpdf)

}
return (outdf)
}

sepsoundtone<-function(pronun){
nchr<-nchar(pronun)
vowels<-c("a","e","i","o","u")
numcons<-0
for(i in 1:nchr) {
ltr<-substr(pronun,i,i)
if(!ltr %in% vowels) numcons<-numcons+1 else break
}
cons<-if(numcons>0) substr(pronun,1,numcons) else NA
tone<-substr(pronun,nchr,nchr)
numtones<-tone %in% letters
if(numtones==1) tone<-NA
therest<-substr(pronun,numcons+1,nchr-numtones)
return(c(cons,therest,tone))
}

split,分割
unsplit, 還原分割的資料，

因子和表
x<-c(5,12,13,12)
unclass(xf) # 已經把數值轉化為因子了，使用因子值參與計算
xff<-factor(x,levels=c(5,12,13,88)) 水平和值對應
xff[2]<-88 修改的是值，如果水平不存在，顯示插入非法水平

tapply(x,f,g) f 因子， tapply 用於因子函式　ｘ不可以是資料框
ages<-c(25,26,55,37,21,42)
affils<-c("R","D","D","R","U","D")
tapply(ages,affils,mean)

d<-data.frame(list(gender=c("M","M","F","M","F","F"),age=c(47,59,21,32,33,24),income=c(55000,88000,32450,76500,12300,45650)))
d$over25<-ifelse(d$age>25,1,0)
tapply(d$income,list(d$gender,d$over25),mean)
split（d$income,list(d$gender,d$over25))

findwords<-function(tf){
txt<-scan（tf,"")
words<-split(1:length(txt),txt)
return(words)
}

by函式，應用的物件不止向量，而tapply只能向量
aba<-read.csv("alaone.data",header=T)
by(aba,aba$Gender,function(m) lm(m[,2]~m[,3]))

u<-c(22,8,33,6,8,29,-2)
fl<-list(c(5,12,13,12,13,5,13),c("a","bc","a","a","bc","a","a"))
tapply(u,fl,length)

table(fl) contingency table
table(c(5,12,13,12,8,5))

class(cttab)
cttab[1,1] 可以和矩陣一樣的方式訪問 table
apply(cttab,1,sum)
addmargins(cttab) 變數的邊際值

subtable<-function(tbl,subnames){
tblarray<-unclass(tbl)
dcargs<-list(tblarray)
ndims<-length(subnames)
for(i in 1:ndims) {
dcargs[[i+1]]<-subnames[[i]]
}
subarray<-do.call("[",dcargs) ## 注意可變引數 do.call(f,argslist)
dims<-lapply(subnames,length)
subtbl<-array(subarray,dims,dimnames=subnames)
class(subtbl)<-"table" ######
return(subtbl)
}

tabdom<-function(tbl,k){
tbldf<-as.data.frame(tbl)
freqord<-order(tbldf$Freq,decreasing=T)
dom<-tbldf[freqord,][1:k,]
return(dom)
}

aggregate(aba[,-1],list(aba$Gender),median)
Z <- stats::rnorm(10000)
table(cut(Z, breaks = -6:6)) # cut 建立因子，分組
x <- 2:18
v <- c(5, 10, 15)
t<-cbind(x, findInterval(x, v)) # findInterval 建立因子，分組

迴圈控制
i<-1
while (i<10) i<-i+4

i<-1
while(TRUE){
i<-i+4
if (i>10) break
}

i<-1
repeat {
i<-i+4
if(i>10) break
}

sim<-function(nreps){
commdata<-list()
commdata$countabsamecomm<-0
for(rep in 1:nreps){
commdata$whosleft<-1:20
commdata$numabchosen<-0
commdata<-choosecomm(commdata,5)
if(commdata$numabchosen>0) next
commdata<-choosecomm(commdata,4)
if(commdata$numabchosen>0) next
cmmdata<-choosecomm(commdata,3)
}
print(commdata$countabsamecomm/nreps)
}

a<-matrix(1:4,2,2)
b<-matrix(1:6,3,2)
for(m in c("a","b")) {
z<-get(m)
print(lm(z[,2]~z[,1]))
} R不直接支援非向量的迴圈，可以使用lapply 和get 注意get的用法，獲得物件。才能進行迴圈

if(r==4){
x<-1}
else # 注意else寫的位置，前有{，或者else 寫到上一行上去？
{
x<-3
y<-4
}

x&&y 標量“與”
x&y 向量“與”
x||y 標量“或”
x|y 向量“或”

g<-function(x){
return(x+1)
}
formals(g)
bodys(g)

abline #檢視函式
page(abline)

f1<-function(a,b) return(a+b)
f2<-function(a,b) return(a-b)
f<-f1
f(3,2)

g<-function(h,a,b) h(a,b)
g(f1,3,2)

g1<-function(x) return(sin(x))
g2<-function(x) return(sqrt(x^2+1))
g3<-function(x) return(2*x-1)
plot(c(0,1),c(-1,1.5))
for(f in c(g1,g2,g3)) plot(f,0,1, add=T) # 函式物件迴圈

g<-function(h,a,b) h(a,b)
body(g)<-quote(2*x+3)

ls() 頂層環境物件
ls.str()
environment(f)

print(ls(enivr=parent.frame(n=1))) #函式內部
print(ls())

f<-function(){
a<-1
return(g(a)+1)
}
g<-function(aa){
b<-2
aab<-h(aa+b)
return(aab)
}

h<-function(aaa){
c<-3
return(aaa+c)
}

showframe<-function(upn) {
if(upn<0){
env<-.GlobalEnv
} else {
env<-parent.frame(n=upn+1)
}
vars<-ls(envir=env)
for(vr in vars){
vrg<-get(vr,envir=env)
if(!is.function(vrg)){
cat(vr,":\n", sep="")
print(vrg)
}
}
}

two<-function(u){
u<<-2*u ##使用超值運算子對上層變數進行建立並賦值，一層一層尋找
z<-z*z
}

two<-function(u) {
assign("u",2*u,pos=.GlabalEnv) ###對頂層賦值
z<-2*z
}

eventrow<-function(evnttm,evntty,appin=NULL){
rw<-c(list(evnttime=evnttm,evnttype=evntty),appin)
return(as.data.frame(rm))
}

schedevnt<-function(evnttm,evntty,appin=NULL) {
newevnt<-evntrow(evnttm,evntty,appin)
if(is.null(sim$evnts)){
sim$evnts<<-newevnt
return()
}

inspt<-binsearch((sim$evnts)$evnttime,evnttm)
before<-
if(inspt==1) NULL else sim$evnts[1:(inspt-1),]
nr<-nrow(sim$evnts)
after<-if(inspt<=nr)sim$events[inspt:nr,] else NULL
sim$evnts<<-rbind(before,newevnt,after)
}
binsearch<-function(x,y){
n<-length(x)
lo<-1
hi<-n
while(lo+1<hi){
mid<-floor((lo+hi)/2)
if(y==x[mid]) return(mid)
if(y<x[mid]) hi<-mid else lo<-mid
}
if(y<=x[lo]) return(lo)
if(y<x[hi]) return(hi)
return(hi+1)
}

counter<-function(){
ctr<-0
f<-function(){
ctr<<-ctr+1
cat("this count currently has value", ctr,"\n")
}
return(f)
} # 閉包
c1<-connter()
c2<-counter()

c1() #執行完畢後c1是f()的拷貝，同時變數ctr也存在

qs<-function(x){
if(length(x)<=1) return (x)
pivot<-x[1]
therest<-x[-1]
sv1<-therest[therest<pivot]
sv2<-therest[therest>=pivot]
sv1<-qs(sv1)
sv2<-qs(sv2)
return(c(sv1,pivot,sv2)
}

置換函式
"["(x,1)
x<-"names<-"(x,value=c("a","b")
"[<-"(x,2:3, value=55:56)

newbookvec<-function(x){
tmp<-list()
tmp$vec<-x
tmp$wrts<-rep(0,length(x))
class(tmp)<-"bookvec"
return(tmp)
}

"[.bookvec"<-function(bv,subs){
return(bv$vec[subs])
}

"[<-.bookvec"<-function(bv,subs,value){
bv$wrts[subs]<-bv$wrts[subs]+1
bv$vec[subs]<-value
return(bv)
}

函式程式碼編寫工具
edit()
source("zyx.R")

常見二元運算子
"%a2b%"<-function(a,b) return (a+b)
3 %a2b% 5
匿名函式

數值運算與模擬
pmax(c(1,2,3),c(4,1,3))
pmin(c(1,2,3),c(4,1,3))
prod(c(1,2,3,4))
cumsum(c(12,1,2))
cumprod(c(1,2,3))
nlm(function(x) return(x^2-sin(x)),8) 函式的最小值

排序
order(x) 索引
sort(x)
d[order(d$kids),]
crossprod(1:3,c(5,12,13)) # 計算向量內積
%*% 計算外積

a<-matrix(c(1,1,-1,1),nrow=2,ncol=2)
b<-c(2,4)
solve(a,b) 解方程

det(a)
eigen(a)
sweep(m,1,c(1,4,7),"+")
集合運算
union(x,y)
intersect(x,y)
setdiff(x,y)
choose(x,y)

物件導向的程式設計
daparse()
methods(print)
getAnywhere(print)
invisible(x)
getAnywhere(aspell)
utils:::print.aspell(word)
methods(class="default")

t <- c(if(is.matrix(x)) "mlm", "lm")
z <- c(if (is.matrix(x)) matrix(, 0, 3) else numeric())

j<-list(name="Joe",salary=55000,union=T)
class(j)<-"employee"
attributes(j)

print.employee<-function(wrkr){
cat(wrkr$name,"\n")
cat("salary",wrkr$salary,"\n")
cat("union memeber",wrkr$union,"\n")
}
methods(,"employee")
使用繼承

k<-list(name="Kate",salary=68000,union=F,hrsthismonth=2)
class(k)<-c("hrlyemplyee","employee")

矩陣壓縮案例
迴歸類
ployfit<-function(y,x,maxdeg){
pwrs<-powers(x,maxdeg)
lmout<-list()
class(lmout)<-"ployreg"
for(i in 1:maxdeg){
lmo<-lm(y~pwrs[,1:i])
lmo$fitted.cavvalues<-lvoneout(y,pwrs[,1:i,drop=F])
lmout[[i]]<-lmo
}
lmout$x<-x
lmout$y<-y
return(lmout)
}

print.polyreg<-function(fits){
maxdeg<-length(fits)-2
n<-length(fits$y)
tbl<-matrix(nrow=maxdeg,ncol=1)
colnames(tbl)<-"MSPE"
for(i in 1:maxdeg){
fi<-fits[[i]]
errs<-fits$y-fi$fitted.cvvalues
spe<-crossprod(errs,errs)
tbl[i,1]<-spe/n
}
cat("mean squared prediction errors, by degree\n")
print(tbl)
}

powers<-function(x,dg) {
pw<-matrix(x,nrow=length(x))
prod<-x
for(i in 2:dg){
prod<-prod*x
pw<-cbind(pw,prod)
}
return(pw)
}

lvoneout<-function(y,xmat){
n<-length(y)
predy<-vector(length=n)
for (i in 1:n) {
lmo<-lm(y[-i]~xmat[-i,])
betahat<-as.vector(lmo$coef)
predy[i]<-betahat %*% c(1,xmat[i,])
}
return(predy)
}

poly<-function(x,cfs){
val<-cfs[1]
prod<-1
dg<-length(cfs)-1
for(i in 1:dg){
prod<-prod*x
val<-val+cfs[i+1]*prod
}
}

S4類
setClass("employee",representation(name="character",salary="numeric",union="logical"))
joe<-new("employee",name="Joe",salary=55000,union=T)
joe@salary
joe@salary<-65000
slot(joe,"salary")<-88000
show(joe)
setMethod("show","employee",
function(object){
inorout<-ifelse(object@union,"is","is not")
cat(object@name,"has a salary of",object@salary,
"and",inorout,"in the union","\n")
}
)
ls(pattern="notebook")
page()
exists("j")
連線鍵盤與聯結器
scan("z4.txt",what="")
v<-scan("")
inits<-readline("type your initials:")
print()
cat("abc\n")
x<-c(5,12,13,8,88)
cat(x,sep=c(".",".",",","\n","\n"))
x<-matrix(scan("x"),nrow=5,byrow=T)
read.matrix<-function(filename){
as.matrix(read.table(filename))}
z1<-readlines("z1")
z<-file("z4.txt","r+")
readLines(z,n=1)

while(T){
rl<-readLines(c,n=1)
if(length(rl)==0){
print("reached the end")
break
} else print(rl)
}
seek(con=c,where=0) #從頭開始讀
close(c)

extractpums<-function(pf,flds){
dtf<-data.frame()
con<-file(pf,"r")
repeat{
hrec<-readLines(con,1)
if(length(hrec)==0) break
serno<-intextract(hrec,c(2,8))
npr<-intextract(hrec,c(106,107))
if(npr>0)
for(i in 1:npr) {
prec<-readLines(con,1)
person<-makerow(serno,prec,flds)
dtf<-rbind(dtf,person)
}
}
return(dtf)
}

makerow<-function(srn,pr,fl){
l<-list()
l[["serno"]]<-srn
for(nm in names(fl)) {
l[[nm]]<-intextract(pr,fl[[nm]])
}
return(l)
}

intextract<-function(s,rng){
fld<-substr(s,rng[1],rng[2])
return(as.integer(fld))
}

uci<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
uci<-paste(uci,"echocardiogram/echocardigram.data",sep="")
ecc<-read.csv(uci)

write.table(xc,"xcnew",row.names=F,col.names=F)
cat("abc\n",file="u")
cat("de\n",file="u",append=T)

c<-file("www","w")
writeLines(c("abc","de","f"),c)
close(c)

sumtree<-function(drtr){
tot<-0
fls<-dir(drtr,recursive=T)
for(f in fls) {
f<-file.path(drtr,f)
if(!file.info(f)$isdir){
tot<-tot+sum(scan(f,quiet=T))
}
}
return(tot)
}
訪問網際網路
cons<<-vector(mode="list",length=ncon)
option("timeout"=10000)
for(i in 1:ncon)
cons[[i]]<<-
socketConnection(port=port,server=T,blocking=T,open="a+b")
checkin<-unserialize(cons[[i]])
}
for(i in 1:ncon)
serialize(c(i,ncon),cons[[i]])
}
字串操作
grep("Pole",c("Equator","North Pole","South Pole"))
nchar("South Pole")
paste("North","Pole",sep="")
paste("North","Pole")
i<-8
s<-sprintf("the square of %d is %d",i,i^2)
substring("Equator",3,5)
strsplit("6-16-2011",split="-")
regexpr("uat","Equator")
gregexpr("iss","Mississippi")
正規表示式
grep(),grepl(),regexpr(),gregexpr(),sub(),gsub() strsplit()
grep("[au]",c("Equator","North Pole","South Pole"))
grep("o.e",c("Equator","North Pole","South Pole"))
grep(".",c("abc","de","f.g"))
grep("\\.",c("abc","de","f.g"))

testsuffix<-function(fn,suff){
parts<-strsplit(fn,".",fixed=T)
nparts<-length(parts[[1]])
return(parts[[1]][nparts]==suff)
}

for(i in 1:5) {
fname<-paste("q",i,"pdf")
pdf(fname)
hist(rnorm(100,sd=i))
dev.off()
}

for(i in 1:5) {
fname<-sprintf("q%d.pdf",i)
pdf(fname)
hist(rnorm(100,sd=i))
}

繪圖
plot(c(-3,3),c(-1,5),type="n",xlab="x",ylab="y")
plot(x,y)
lmout<-lm(y~x)
abline(lmout)
abline(c(2,1))
lines(c(1.5,2.5),c(3,3))
plot(x,y,type="l")
windows()

d1<-density(testscores$Exam1,from=0,to=100)
d2<-density(testscores$Exam2,from=0,to=100)
plot(d1,main="",xlab="")
lines(d2)

plot.ployreg<-function(fits){
plot(fit$x,fits$y,xlab="X",ylab="Y")
maxdg<-length(fits)-2
cols<-c("red","green","blue")
dg<-curecount<-1
while(dg<-maxdg){
prompt<-paste("Return for XV fit for degree".dg,"or type degree","or q for quit")
rl<-readline(prompt)
dg<-if(rl=="") dg else if(rl!="q") as.integer(rl) else break
lines(fits$x,fits[[dg]]$fitted.values,col=cols[curvecount%%3+1])
dg<-dg+1
cuvecount<-curvecount+1
}
}
points(testscores$Exam1,testscores$Exam3,pch="+")
points(c(1,2),c(2,3),pch="+")
legend()
text(12,23,"Exam1")
locator(1)
recordPlot()
replayPlot()
f<-function(x) return(1-exp(-x))
curve(f,0,2)
polygon(c(1.2,1.4,1.4,1.2),c(0,0,f(1.3),f(1.3)),col="gray")
g<-function(t){return(t^2+1)^0.5}
x<-seq(0,5,length=10000)
y<-g(x)
plot(x,y,type="l")

pdf("d12.pdf")
dev.list()
dev.set(2)
dev.copy(which=3)
dev.off()

library(lattice)
a<-1:10
b<-1:15
eg<-expand.grid(x=a,y=b)
eg$z<-eg$x^2+eg$x*eg$y
wireframe(z~x+y,eg)

除錯
debug(g(2,3))
stopifnot(x>0)

g<-function(x,y){
t<<-list()
stopifnot(x>0)
t[[1]]<-x+y
}
browser()
untrace(g)
setBreakpoint("x.R",28)
trace(rt,browser)
untrace(rt)
options(error=dump.frames)

findruns<-function(x,k){
n<-length(x)
runs<-NULL
for ( i in 1:(n-k)){
if(all(x[i:i+k-1]==1)) runs<-c(runs,i)
}
return(runs)
}
findruns(c(1,0,0,1,1,0,1,1,1),2)
debug(findruns)
速度與記憶體
x<-runif(100000)
y<-runif(100000)
z<-vector(length=100000)
system.time(z<-x+y)
system.time(for(i in 1:length(x)) z[i]<-x[i]+y[i])
":"(1,10)

sum<-0
nreps<-100000
for (i in 1:nreps) {
xy<-rnorm(2)
sum<-sum+max(xy)
}
print(sum/nreps)

nreps<-100000
xymat<-matrix(rnorm(2*nreps),ncol=2)
maxs<-pmax(xymat[,1],xymat[,2])
print(mean(maxs))
sim3<-function(nreps){
nb1<-10
nb2<-6
n1<-18
n2<-13
u<-matrix(c(runif(2*nreps)),nrow=nreps,ncol=2)
cndtn<-u[,1]<=nb1/n1&u[,2]<=(nb2+1)/n2|
u[,1]>nb1/n1&u[,2]<=nb2/n2
return(mean(cndtn))

}
outer(1:2,2:3,"*")

Rprof()
invisible(powers1(x,8)
Rprof(NULL)
summaryRprof()

R 與其它語言的介面
Rpy
R 平行計算
snow 包

library(snow)
dst<-function(x,y){
tmpmat<-matrix(abs(x-y),byrow=T,ncol=length(x))
rowSums(tmpmat)
}
findnewgrps<-function(currctrs){
ngrps<-nrow(currctrs)
spacedim<-ncol(currctrs)
sumcounts<-matrix(rep(0,ngrps*(spacedim+1)),nrow=ngrps)
for(i in 1:nrow(mchunk)){
dsts<-dst(mchunk[i,],t(currctrs))
j<-which.min(dsts)
sumcount[j,]<-sumcounts[j,]+c(mchunk[i,],1)
}
sumcounts
}

parkm<-function(cls,m,niters,initcenters){
n<-nrow(m)
spacedim<-ncol(m)
options(warn=-1)
ichunks<-split(1:n,1:length(cls))
options(warn=0)
mchunks<-lapply(ichunks,function(ichunk) m[ichunk,])
mcf<-function(mchunk)
mchunk<<-mchunk
invisible(clusterApply(cls,mchunks,mcf))
clusterExport(cls,"dst")
centers<-initcenters
for (i in 1:niters) {
sumcounts<-clusterCall(cls,findnewgrps,centers)
tmp<-Reduce("+",sumcounts)
centers<-tmp[,1:spacedim]/tmp[,spacedim+1]
center[is.nan(centers）】<-0
}
centers
}

R 語言程式設計藝術筆記

相關文章