R:alpha多樣性線性迴歸

王哲MGG_AI發表於2024-11-28
rm(list = ls())
library(dplyr)
library(broom)
library(ggplot2)

# 設定工作目錄
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Multiple Linear Regression")

# 讀取多樣性資料
diversity_data <- read.table("alpha_diversity.txt", header = TRUE, sep = "\t")

# 讀取分組資料
group_data <- read.table("group.txt", header = TRUE, sep = "\t")

# 合併兩個資料集
merged_data <- merge(diversity_data, group_data, by.x = "SampleID", by.y = "SampleID")

# 確保因變數為數值型,自變數為因子型
merged_data$Gene <- as.factor(merged_data$Gene)

# 生成所有模型(包含互動項)
diversity_metrics <- c("Shannon", "Simpson", "Pielou")
results <- lapply(diversity_metrics, function(metric) {
  # 構建迴歸模型公式,包含基因型和時間的互動項
  formula <- as.formula(paste(metric, "~ Gene"))
  
  # 擬合線性模型
  model <- lm(formula, data = merged_data)
  
  # 結果整理並加上指標名稱
  tidy(model) %>%
    mutate(Metric = metric)  # 新增指標名稱
})

# 合併所有結果
final_table <- bind_rows(results)

# 篩選需要的列並重新命名
final_table <- final_table %>%
  select(Metric, term, estimate, std.error, p.value) %>%
  rename(Variable = term, β = estimate, SE = std.error, p = p.value)

# 輸出結果表格
print(final_table)

# 將結果儲存為CSV檔案
write.csv(final_table, "alpha_regression_results.csv", row.names = TRUE)
#########################################################
# 定義一個函式來提取模型評估引數
extract_metrics <- function(model, metric_name) {
  model_summary <- summary(model)
  data.frame(
    Metric = metric_name,
    Residual_SE = model_summary$sigma,
    R_squared = model_summary$r.squared,
    Adjusted_R_squared = model_summary$adj.r.squared,
    F_statistic = model_summary$fstatistic[1],
    F_p_value = pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)
  )
}

# 對多個模型應用
metrics_list <- lapply(diversity_metrics, function(metric) {
  formula <- as.formula(paste(metric, "~ Gene"))
  model <- lm(formula, data = merged_data)
  extract_metrics(model, metric)
})

# 合併結果到一個表格
metrics_table <- do.call(rbind, metrics_list)

# 檢視結果
print(metrics_table)

write.csv(metrics_table, "alpha_metrics_table.csv", row.names = FALSE)

相關文章