rm(list = ls())
library(dplyr)
library(broom)
library(ggplot2)
# 設定工作目錄
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Multiple Linear Regression")
# 讀取多樣性資料
diversity_data <- read.table("alpha_diversity.txt", header = TRUE, sep = "\t")
# 讀取分組資料
group_data <- read.table("group.txt", header = TRUE, sep = "\t")
# 合併兩個資料集
merged_data <- merge(diversity_data, group_data, by.x = "SampleID", by.y = "SampleID")
# 確保因變數為數值型,自變數為因子型
merged_data$Gene <- as.factor(merged_data$Gene)
# 生成所有模型(包含互動項)
diversity_metrics <- c("Shannon", "Simpson", "Pielou")
results <- lapply(diversity_metrics, function(metric) {
# 構建迴歸模型公式,包含基因型和時間的互動項
formula <- as.formula(paste(metric, "~ Gene"))
# 擬合線性模型
model <- lm(formula, data = merged_data)
# 結果整理並加上指標名稱
tidy(model) %>%
mutate(Metric = metric) # 新增指標名稱
})
# 合併所有結果
final_table <- bind_rows(results)
# 篩選需要的列並重新命名
final_table <- final_table %>%
select(Metric, term, estimate, std.error, p.value) %>%
rename(Variable = term, β = estimate, SE = std.error, p = p.value)
# 輸出結果表格
print(final_table)
# 將結果儲存為CSV檔案
write.csv(final_table, "alpha_regression_results.csv", row.names = TRUE)
#########################################################
# 定義一個函式來提取模型評估引數
extract_metrics <- function(model, metric_name) {
model_summary <- summary(model)
data.frame(
Metric = metric_name,
Residual_SE = model_summary$sigma,
R_squared = model_summary$r.squared,
Adjusted_R_squared = model_summary$adj.r.squared,
F_statistic = model_summary$fstatistic[1],
F_p_value = pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)
)
}
# 對多個模型應用
metrics_list <- lapply(diversity_metrics, function(metric) {
formula <- as.formula(paste(metric, "~ Gene"))
model <- lm(formula, data = merged_data)
extract_metrics(model, metric)
})
# 合併結果到一個表格
metrics_table <- do.call(rbind, metrics_list)
# 檢視結果
print(metrics_table)
write.csv(metrics_table, "alpha_metrics_table.csv", row.names = FALSE)