Friday, May 14, 2010

My Database interface

How did I solve the integration between the R stat package and SQLite?

I simply used file input and output between the two. Windows is good enough at file system optimization that the data exchange stays in ram, not on disk. I didn't see any advantage to compiling the two together, and I just submit standard SQL queries from R to SQLite, and read back the results. I can add parameters to the SQLite queries automatically with a few lines of R code. All total so far? About 100 lines of R code. Set up for the system? None, zero, zip, nada. Just run R in the same directory as SQLite.

Simplicity always rules in software. What does this mean to the blogosphere? Well, one can down load the hundred lines of code and a binary copy of Sqlite, and have a natural SQL interface, no more difficult then R itself. The basic R macros are below.

For example, when the web says down load file, I can do save link as, and from then on, the data goes to SQL and both Sql and the data are controlled as R objects. The primitives are short and few, but we get all the plots, and statistical analysis, as well as freedom to associate ou4r own key words to new data.

I will post the latest beta version right here, wish me luck:
# R functions for using sqlite

# marshall some values into a macro replace @ @1 @2 @3 with user supplied strings.
varsSql <- function(s,query) {
y <- strsplit(query,"")[[1]]
x <- tokenize(s)
result <- ""
nvar <- 1
nprm <- 1
#print(length(y)); print(length(x))
while(nprm <= length(y) ) {
if(y[nprm] != "@")
result <-paste(result,y[nprm],sep="")
else {
ych <-y[nprm+1];
if( (nprm < length(y)) && ((ych == "1") || (ych == "2") || (ych == "3")) ){
ivar <- as.integer(ych)
result <- paste(result,x[ivar],sep="")
nprm <- nprm+1
} else {
if(nvar <= length(x) ) {
result <- paste(result,x[nvar],sep="")
nvar <- nvar + 1
} else
result <- paste(result,"NULL",sep="")
}
}
nprm <- nprm+1
}
result
}

# Execute Sqlite with an input file
SQL <- function(s,d,mode) {
x <- paste(".mode tabs\n.output \"r.dat\"\n",s,sep="")
writeLines(x,"test.sql")
y <- paste("sql ",d," < test.sql", sep="")
shell(y,intern=TRUE)

# Grab the result file from an sqlite execution
if(mode != "w")
read.csv("r.dat",header=FALSE,sep="\t",as.is=c(TRUE,TRUE,TRUE,TRUE))
}

# write a tab dilimited file from data.frame, no quotes hopefully
# read from Sqlite with: read.csv("r.dat",header=FALSE,sep="\t")
writeSql <- function(x) {
k <- data.frame(x)
print(class(k))
f <- file("sql.dat", "wb")
if(class(k) == "data.frame")
for(i in 1:dim(k)[1]) {
for(j in 1:dim(k)[2])
if(j < dim(k)[2])
writeChar(con=f,
sprintf("%s\t",k[i,j]),eos=NULL)
else
writeLines(con=f,
sprintf("%s",k[i,j]))
}


print("Closed");close(f)
}

helpSql <- function() {
print("qinit() get Sql constant")
print("qraw('sql text',database)")
print("qm(d) = get table names from d")
print("qrl(t,d) read 20 from table t in d")
print("qrb(t,d) read 200 from table t in d")
print("qw(x,t,d) - write x to table t to database")
print("qcreate(t,db,'createstring') inculud sql parenth, no spaces")
print("qinsert(t,db.'value string') ditto")

}
execSql <- function(s,d) {SQL(s,d,mode="r")}
qraw <- function(s,db) {SQL(s,db,mode = "?")}
qw <- function(x,t,db) { writeSql(x);
SQL(varsSql(t,".import sql.dat @\n"),db,mode = "w")}
qm <- function(d) {execSql(sql$master,d) }
qh <- function() {helpSql()}
qrb <- function(t,db) { execSql(varsSql(paste(t, "200"),sql$read),db) }
qrl <- function(t,db) { execSql(varsSql(paste(t, "20"),sql$read),db) }

qinsert <- function(t,db,v) {
x <- varsSql(paste(t,v),sql$insert)
SQL(x,db,mode = "w") }
qcreate <- function(t,db,s) {x <- varsSql(paste(t,s),sql$create);
SQL(x,db,mode="W")}

# tokenize a string
tokenize <- function(s) {
j <- 0; i <- 1; y <- ""
x <- strsplit(s,"")[[1]]
while(i < length(x) && x[i] == ' ') i <- i+1
while(i <= length(x) ) {
j <- j+1; y[j] =""
xc <- x[i]
y[j] <- paste(y[j],xc,sep="");
i <- i+1
if(xc == "'" || xc == "(") {
if(xc == '(') xc = ')'
while((i <= length(x)) && (x[i] != xc))
{y[j] <- paste(y[j],x[i],sep=""); i <- i+1; }
y[j] <- paste(y[j],xc,sep="")
i <- i+1
}
else while(i <= length(x) && x[i] != ' ') {
y[j] <-paste(y[j],x[i],sep="")
i <- i+1
}
while(i <= length(x) && x[i] == ' ') i <- i+1
}
y
}

### The rest are a few Sql query strings that are useful.
sql <<-list(

read = "select * from @ limit @;",
master = "select name from sqlite_master;",
offset = "select * from @ limit @ offset @;",
insert = "insert into @ values @;",
create = "drop table if exists @1; create table @1 @2 ;"
)

No comments: