# 一般化線形モデル用の関数の改変版(オリジナルは北海道大学の久保さん作) # http://hosho.ees.hokudai.ac.jp/~kubo/index-j.html 参照 # gtoolsを使うので、gregmiscが必要 # # grouping.R # KUBO Takuya (kubo@ees.hokudai.ac.jp) # August 2004 # # changed by MATSUMURA Toshikazu(http://kobe.cool.ne.jp/matsut/index.html) # Nobember 2004 library(gtools) # to use combinations() partition.int <- function(residual, imax = NA) { residual <- as.integer(residual) if (is.na(residual) == TRUE | residual < 1) { stop("the argument must be an integer and larger than zero") } if (is.na(imax) == TRUE) { imax <- residual } else { imax <- as.integer(imax) } tree <- c() for (i in rev(1:imax)) { branches <- c() if (i == residual) { branches <- c(i) } else if (i < residual) { branches <- lapply( Recall(residual - i, i), function(sb) c(i, unlist(sb)) ) } if (length(branches) > 0) { tree <- c(tree, branches) } } tree } matrix.to.list <- function(m) { set.list <- list() for (i in 1:nrow(m)) set.list <- c(set.list, list(m[i,])) set.list } list.map.combinations <- function(n, k, v.map) { lapply( matrix.to.list(combinations(as.integer(n), as.integer(k))), function(cmb) v.map[unlist(cmb)] ) } unique.group <- function(list.group) { list.return <- c() v.tag <- c() for (group in list.group) { tag <- get.group.tag(group) if (tag %in% v.tag == FALSE) list.return <- c(list.return, list(group)) v.tag <- c(v.tag, tag) } list.return } divide.set <- function(set.all, partition, depth = 0) { # check length if (length(set.all) != sum(partition)) { stop(sprintf( "Unbaralance: length(set.all) = %i, sum(partition) = %i\n", as.integer(length(set.all)), as.integer(sum(partition)) )) } # check partition type p.head <- partition[1] if (length(partition) < 2) { if (depth == 0 | p.head > 1) return(list(list(set.all))) else return(list(set.all)) } # recursively dividing p.tail <- partition[2:length(partition)] if (p.head == 1) # then partition must be c(1, 1, ..., 1) return(list(lapply(set.all, function(i) i))) list.set.head <- list.map.combinations(length(set.all), p.head, set.all) list.return <- c() for (set.head in list.set.head) { set.diff <- setdiff(set.all, set.head) for (list.set.tail in Recall(set.diff, p.tail, depth + 1)) { list.set <- list(set.head) for (set.tail in list.set.tail) { list.set <- c(list.set, list(set.tail)) } list.return <- c(list.return, list(list.set)) } } unique.group(list.return) } get.set.tag <- function(set) paste("(", paste(set, sep = "", collapse = "+"), ")", sep = "") get.group.tag <- function(list.set) { v.tag <- sapply(list.set, function(set) get.set.tag(set)) paste(sort(v.tag), sep ="", collapse = "") } map.list.set <- function(list.set) { v.names <- c() v.return <- c() for (set in list.set) { v.names<- c(v.names, sapply(set, function(s) as.character(s))) v.return <- c(v.return, rep(get.set.tag(set), length(set))) } names(v.return) <- v.names list( "tag" = get.group.tag(list.set), "map" = v.return, "number.groups" = as.integer(length(list.set)) ) } get.list.tag.map <- function(list.group) { lapply(list.group, function(group) map.list.set(group)) } generate.groups <- function(set.levels) { set.levels <- as.character(set.levels) partitions <- partition.int(length(set.levels)) list.return <- c() for (i in 1:length(partitions)) { partition <- partitions[[i]] list.return <- c( list.return, get.list.tag.map(divide.set(set.levels, partition)) ) } list.return } estimate.poisson <- function(samples) { samples <- samples[c(1,2)] # select colums(MT version) colnames(samples) <- c("treatment", "observed") # add colnames(MT version) # list.groups <- generate.groups(parameter$treatment)) # KT version list.groups <- generate.groups(unique(samples$treatment)) # MT version lapply( list.groups, function(group) { cat(sprintf("# evaluating Model%-12s", group$tag)) if (group$number.groups == 1) { glm.results <- glm( samples$observed ~ 1, family = poisson(link = log) ) } else { tr.mapped <- factor( group$map[as.character(samples$treatment)], ) glm.results <- glm( samples$observed ~ tr.mapped - 1, family = poisson(link = log) ) } group$glm <- glm.results cat(sprintf("AIC = %.1f ...\n", group$glm$aic)) group } ) } estimate.gaussian <- function(samples) { samples <- samples[c(1,2)] colnames(samples) <- c("treatment", "observed") list.groups <- generate.groups(unique(samples$treatment)) lapply( list.groups, function(group) { cat(sprintf("# evaluating Model%-12s", group$tag)) if (group$number.groups == 1) { glm.results <- glm( samples$observed ~ 1, family = gaussian(link = identity) # family = gaussian(link = "identity") ) } else { tr.mapped <- factor( group$map[as.character(samples$treatment)], ) glm.results <- glm( samples$observed ~ tr.mapped - 1, family = gaussian(link = identity) ) } group$glm <- glm.results cat(sprintf("AIC = %.1f ...\n", group$glm$aic)) group } ) } # 使用方法 # 解析データはデータフレームで、1列目と2列目に次の内容を入力しておく(3列目以降は無視される) # 1列目:区分したい項目名(factor) # 2列目:観測値(numeric) data.pois <- data.frame( c(rep("A",20), rep("B",10), rep("C",20)), c(rpois(20,2.5), rpois(10,3.5), rpois(20,3.5)) ) estimate.poisson(data.pois) data.gaus <- data.frame( c(rep("A",20), rep("B",10), rep("C",20)), c(rnorm(20,2.5), rnorm(10,3.5), rnorm(20,3.5)) ) estimate.gaussian(data.gaus)