# 一般化線形モデル用の関数の改変版(オリジナルは北海道大学の久保さん作)
# http://hosho.ees.hokudai.ac.jp/~kubo/index-j.html 参照
# gtoolsを使うので、gregmiscが必要
#
# grouping.R
# KUBO Takuya (kubo@ees.hokudai.ac.jp)
# August 2004
#
# changed by MATSUMURA Toshikazu(http://kobe.cool.ne.jp/matsut/index.html)
# Nobember 2004

library(gtools) # to use combinations()

partition.int <- function(residual, imax = NA)
{
	residual <- as.integer(residual)
	if (is.na(residual) == TRUE | residual < 1) {
		stop("the argument must be an integer and larger than zero")
	}
	if (is.na(imax) == TRUE) {
		imax <- residual
	}
	else {
		imax <- as.integer(imax)
	}
	tree <- c()
	for (i in rev(1:imax)) {
		branches <- c()
		if (i == residual) {
			branches <- c(i)
		}
		else if (i < residual) {
			branches <- lapply(
				Recall(residual - i, i),
				function(sb) c(i, unlist(sb))
			)
		}
		if (length(branches) > 0) {
			tree <- c(tree, branches)
		}
	}
	tree
}

matrix.to.list <- function(m)
{
	set.list <- list()
	for (i in 1:nrow(m))
		set.list <- c(set.list, list(m[i,]))
	set.list
}

list.map.combinations <- function(n, k, v.map)
{
	lapply(
		matrix.to.list(combinations(as.integer(n), as.integer(k))),
		function(cmb) v.map[unlist(cmb)]
	)
}

unique.group <- function(list.group)
{
	list.return <- c()
	v.tag <- c()
	for (group in list.group) {
		tag <- get.group.tag(group)
		if (tag %in% v.tag == FALSE)
			list.return <- c(list.return, list(group))
		v.tag <- c(v.tag, tag)
	}
	list.return
}

divide.set <- function(set.all, partition, depth = 0)
{
	# check length
	if (length(set.all) != sum(partition)) {
		stop(sprintf(
			"Unbaralance: length(set.all) = %i, sum(partition) = %i\n",
			as.integer(length(set.all)), as.integer(sum(partition))
		))
	}
	# check partition type
	p.head <- partition[1]
	if (length(partition) < 2) {
		if (depth == 0 | p.head > 1)
			return(list(list(set.all)))
		else 
			return(list(set.all))
	}
	# recursively dividing
	p.tail <- partition[2:length(partition)]
	if (p.head == 1) # then partition must be c(1, 1, ..., 1)
		return(list(lapply(set.all, function(i) i))) 
	list.set.head <- list.map.combinations(length(set.all), p.head, set.all)
	list.return <- c()
	for (set.head in list.set.head) {
		set.diff <- setdiff(set.all, set.head)
		for (list.set.tail in Recall(set.diff, p.tail, depth + 1)) {
			list.set <- list(set.head)
			for (set.tail in list.set.tail) {
				list.set <- c(list.set, list(set.tail))
			}
			list.return <- c(list.return, list(list.set))
		}
	}
	unique.group(list.return)
}

get.set.tag <- function(set)
	paste("(", paste(set, sep = "", collapse = "+"), ")", sep = "")

get.group.tag <- function(list.set)
{
	v.tag <- sapply(list.set, function(set) get.set.tag(set))
	paste(sort(v.tag), sep ="", collapse = "")
}

map.list.set <- function(list.set)
{
	v.names <- c()
	v.return <- c()
	for (set in list.set) {
		v.names<- c(v.names, sapply(set, function(s) as.character(s)))
		v.return <- c(v.return, rep(get.set.tag(set), length(set)))
	}
	names(v.return) <- v.names
	list(
		"tag" = get.group.tag(list.set),
		"map" = v.return,
		"number.groups" = as.integer(length(list.set))
	)
}

get.list.tag.map <- function(list.group)
{
	lapply(list.group, function(group) map.list.set(group))
}

generate.groups <- function(set.levels)
{
	set.levels <- as.character(set.levels)
	partitions <- partition.int(length(set.levels))
	list.return <- c()
	for (i in 1:length(partitions)) {
		partition <- partitions[[i]]
		list.return <- c(
			list.return,
			get.list.tag.map(divide.set(set.levels, partition))
		)
	}
	list.return
}

estimate.poisson <- function(samples)
{
	samples <- samples[c(1,2)]	# select colums(MT version)
	colnames(samples) <- c("treatment", "observed")		# add colnames(MT version)
#	list.groups <- generate.groups(parameter$treatment))	# KT version
	list.groups <- generate.groups(unique(samples$treatment))	# MT version
	lapply(
		list.groups,
		function(group) {
			cat(sprintf("# evaluating Model%-12s", group$tag))
			if (group$number.groups == 1) {
				glm.results <- glm(
					samples$observed ~ 1,
					family = poisson(link = log)
				)
			}
			else {
				tr.mapped <- factor(
					group$map[as.character(samples$treatment)],
				)
				glm.results <- glm(
					samples$observed ~ tr.mapped - 1,
					family = poisson(link = log)
				)
			}
			group$glm <- glm.results
			cat(sprintf("AIC = %.1f ...\n", group$glm$aic))
			group
		}
	)
}

estimate.gaussian <- function(samples)
{
	samples <- samples[c(1,2)]
	colnames(samples) <- c("treatment", "observed")
	list.groups <- generate.groups(unique(samples$treatment))
	lapply(
		list.groups,
		function(group) {
			cat(sprintf("# evaluating Model%-12s", group$tag))
			if (group$number.groups == 1) {
				glm.results <- glm(
					samples$observed ~ 1,
					family = gaussian(link = identity)
#					family = gaussian(link = "identity")
				)
			}
			else {
				tr.mapped <- factor(
					group$map[as.character(samples$treatment)],
				)
				glm.results <- glm(
					samples$observed ~ tr.mapped - 1,
					family = gaussian(link = identity)
				)
			}
			group$glm <- glm.results
			cat(sprintf("AIC = %.1f ...\n", group$glm$aic))
			group
		}
	)
}

# 使用方法
# 解析データはデータフレームで、1列目と2列目に次の内容を入力しておく(3列目以降は無視される)
# 1列目：区分したい項目名(factor)
# 2列目：観測値(numeric)
data.pois <- data.frame(
	c(rep("A",20), rep("B",10), rep("C",20)),
	c(rpois(20,2.5), rpois(10,3.5), rpois(20,3.5))
)
estimate.poisson(data.pois)

data.gaus <- data.frame(
	c(rep("A",20), rep("B",10), rep("C",20)),
	c(rnorm(20,2.5), rnorm(10,3.5), rnorm(20,3.5))
)
estimate.gaussian(data.gaus)