本文連結:https://www.cnblogs.com/snoopy1866/p/16069000.html
定性檢測的樣本量估算常用單組目標值法和抽樣誤差法,《體外診斷試劑臨床試驗技術指導原則》(2017年第72號)中提到:當評價指標P接近100%時,這兩種樣本量估算方法可能不適用,應考慮更加適宜的方法進行樣本量估算和統計學分析,如精確概率法。
1. PASS軟體估計樣本量
PASS 軟體提供的 Test for One Proportion 模組提供了精確概率法的選項,在 Power Calculation Method 中選擇 Binomial Enumeration 即可。SAS 軟體的 PROC POWER 過程則不支援精確概率法。
例如:某試劑的陽性符合率預期值為98%,目標值為95%,取顯著性水平α=0.05,檢驗效能1-β=0.8,試估計所需樣本量。
由於98%接近100%,因此採用精確概率法計算樣本量。在 PASS 軟體中設定相關引數,計算所需樣本量為312。
2. 功效曲線的"鋸齒狀"現象
需要注意的是:PASS軟體通過迭代尋找滿足檢驗效能高於0.8的樣本量,當找到一個滿足條件的樣本量時,PASS即中止迭代,然而此時的樣本量有可能並不是保守的。下面將展示這種“不保守”的現象。
在PASS軟體中,我們設定求解目標為Power,樣本量取值為區間[310, 370],繪製功效曲線如下:
可以發現:檢驗效能並非隨著樣本量增加而單調增加,而是顯示出“鋸齒狀”(saw-toothed),即使樣本量高於PASS軟體計算出的312,也存在檢驗效能低於0.8的情況,當且僅當樣本量≥338時,才能保證檢驗效能穩定在0.8以上。造成此現象的原因是二項分佈的離散性。
3. SAS 巨集程式
以下SAS巨集程式碼可用於計算給定引數下的精確概率法的最保守樣本量,供參考。
程式的基本思路如下:
Step1. 使用 PROC POWER 過程的近似正態法計算一個粗略的樣本量 n1
Step2. 在 n1 附近找一個區間,區間上下界通過引數 lbound_rate 和 ubound_rate 控制
Step3. 使用 PROC POWER 過程計算樣本量在區間 [lbound_rate * n1, ubound_rate * n1] 的檢驗效能
Step4. 判斷區間 [lbound_rate * n1, ubound_rate * n1] 內是否存在滿足任意 n>n0,使得 power(n) > 0.8 且 n0 之後的第一個波谷滿足 power > 0.8 的 n0
Step5. 如 Step 4 找到了滿足條件的n0,則輸出樣本量計算結果;否則,根據引數 expand_step 擴充套件區間上界,重複 Step1-Step4
/*
巨集程式功能:單組目標值-精確概率法,計算最保守樣本量,計算結果未考慮脫落率。
*/
%macro SampleSize_ExactBinomial(p0, p1, alpha = 0.05, power = 0.8, lbound_rate = 0.8, ubound_rate = 1.2, expand_step = 2
OutDataSet = SampleSize_ExactBinomial, DetailInfo = DetainInfo,
PowerPlot = Y);
/*
--------------巨集引數-----------------
p0: 目標值
p1: 預期值
alpha: 顯著性水平
power: 檢驗效能
lbound_rate: 尋值區間下界比例
ubound_rate: 尋值區間上界比例
expand_step: 擴充套件區間步長
OutDataSet: 輸出樣本量估算結果的資料集名稱
DetailInfo: 輸出樣本量估算細節的資料集名稱
PowerPlot: 是否繪製功效圖
----------------巨集變數---------------
ntotal_normal: 正態近似法估算的樣本量
ntotal_lbound: 尋值區間下界
ntotal_ubound: 尋值區間上界
IsLocalFindFirst: 是否找到首次滿足檢驗效能的不保守樣本量
IsGlobalFind: 是否找到穩定滿足檢驗效能的最保守樣本量
LooseMinSampleSize: 首次滿足檢驗效能的不保守樣本量
StrictMinSampleSize:穩定滿足檢驗效能的最保守樣本量
ActualPower: 最保守樣本量下的實際檢驗效能
*/
/*近似正態法求得一個粗略的樣本量*/
ods output output = output_normal;
proc power;
onesamplefreq test = z method = normal
alpha = &alpha
power = &power
nullproportion = &p0
proportion = &p1
ntotal = .;
run;
proc sql noprint;
select ntotal into: ntotal_normal from output_normal; /*提取正態近似樣本量*/
quit;
%let ntotal_lbound = %sysfunc(floor(%sysevalf(&lbound_rate*&ntotal_normal))); /*尋值區間下界*/
%let ntotal_ubound = %sysfunc(ceil(%sysevalf(&ubound_rate*&ntotal_normal))); /*尋值區間上界*/
/*在區間[&ntotal_lbound, &ntotal_ubound]內多次求Power*/
ods output output = output_exact;
proc power;
onesamplefreq test = exact
alpha = &alpha
power = .
nullproportion = &p0
proportion = &p1
ntotal = &ntotal_lbound to &ntotal_ubound by 1;
%if &PowerPlot = Y %then %do;
plot x = n min = &ntotal_lbound max = &ntotal_ubound step = 1
yopts = (ref = &power) xopts = (ref = &ntotal_normal);
%end;
run;
/*左鄰點*/
data power_exact_left;
if _n_ = 1 then do;
ntotal = &ntotal_lbound;
power_left = .;
output;
end;
set output_exact(rename = (power = power_left)
keep = ntotal power
firstobs = 1 obs = %eval(&ntotal_ubound - &ntotal_lbound));
ntotal = ntotal + 1;
label power_left = "左鄰點";
output;
run;
/*目標點*/
data power_exact_mid;
set output_exact(rename = (power = power_mid) keep = ntotal power);
label power_mid = "目標點";
run;
/*右鄰點*/
data power_exact_right;
set output_exact(rename = (power = power_right)
keep = ntotal power
firstobs = 2 obs = %eval(&ntotal_ubound - &ntotal_lbound + 1));
ntotal = ntotal - 1;
label power_right = "右鄰點";
output;
if _n_ = %eval(&ntotal_ubound - &ntotal_lbound) then do;
ntotal = &ntotal_ubound;
power_right = .;
output;
end;
run;
/*尋找最保守的樣本量*/
%let IsLocalFindFirst = 0;
%let IsGlobalFind = 0;
data &DetailInfo;
merge power_exact_left
power_exact_mid
power_exact_right;
label ntotal = "當前樣本量"
power_left = "左側點效能"
power_mid = "當前點效能"
power_right = "右側點效能"
min_sample_size = "已知最低樣本量"
is_local_find_first = "首次區域性最優解"
is_local_find = "區域性最優解"
is_global_find = "全域性最優解"
peak = "波峰"
trough = "波谷";
retain min_sample_size 0
is_local_find 0
is_local_find_first 0
is_global_find 0;
if ntotal > &ntotal_lbound and ntotal < &ntotal_ubound then do;
if power_left < power_mid and power_right < power_mid then peak = "Yes";
if power_left > power_mid and power_right > power_mid then trough = "Yes";
if power_mid > &power and is_local_find = 0 then do; /*區域性最優解,標記到達檢驗效能的樣本量*/
min_sample_size = ntotal;
is_local_find = 1;
if is_local_find_first = 0 then do; /*首次達到區域性最優解,可視為不保守的樣本量估算結果*/
is_local_find_first = 1;
call symput("LooseMinSampleSize", min_sample_size);
call symput("IsLocalFindFirst", is_local_find_first);
end;
end;
if power_mid < &power and is_local_find = 1 then do; /*區域性最優解的破壞,鋸齒狀的波谷導致此時的檢驗效能無法穩定在所需大小之上*/
min_sample_size = .;
is_local_find = 0;
end;
if power_mid > &power and is_local_find = 1 and is_global_find = 0 and trough = "Yes" then do; /*全域性最優解,此時即便是波谷也能達到所需的檢驗效能,可視為最保守的樣本量估算結果*/
is_global_find = 1;
call symput("StrictMinSampleSize", min_sample_size);
call symput("ActualPower", power_mid);
call symput("IsGlobalFind", is_global_find);
end;
end;
run;
%if &IsLocalFindFirst = 1 and &IsGlobalFind = 1 %then %do; /*當前區間內找到最保守的樣本量,輸出結果*/
/*輸出樣本量估算結果*/
data &OutDataSet;
length Exact1 $50 Exact2 $50;
label P0 = "目標值"
P1 = "預期值"
ALPHA = "顯著性水平"
POWER = "檢驗效能"
Normal = "正態近似"
Exact1 = "精確概率法(不保守)"
Exact2 = "精確概率法(最保守)";
P0 = &p0;
P1 = &p1;
ALPHA = α
POWER = &power;
Normal = &ntotal_normal;
%if &IsLocalFindFirst = 1 %then %do; Exact1 = put(&LooseMinSampleSize, best.); %end;
%else %do; Exact1 = "Outside the interval [&ntotal_lbound, &ntotal_ubound]"; %end;
%if &IsGlobalFind = 1 %then %do; Exact2 = put(&StrictMinSampleSize, best.); %end;
%else %do; Exact2 = "Outside the interval [&ntotal_lbound, &ntotal_ubound]"; %end;
run;
/*刪除資料集*/
proc delete data = output_exact
output_normal
power_exact_left
power_exact_mid
power_exact_right;
run;
/*輸出日誌*/
%put NOTE: 引數:&=p0, &=p1, &=alpha, &=power;
%put NOTE: 正態近似法求得最低樣本量為&ntotal_normal;
%if &IsLocalFindFirst = 1 %then %do;
%put NOTE: 精確概率法求得首次達到檢驗效能的最低樣本量為 %sysfunc(strip(&LooseMinSampleSize)) (不保守);
%end;
%if &IsGlobalFind = 1 %then %do;
%put NOTE: 精確概率法求得最保守的樣本量為 %sysfunc(strip(&StrictMinSampleSize)),實際檢驗效能為 &ActualPower.;
%end;
%else %do;
%put ERROR: 在樣本量區間[&ntotal_lbound, &ntotal_ubound]內沒有找到精確的樣本量!使用引數LBOUND_RATE、UBOUND_RATE調節區間大小;
%end;
%end;
%else %do; /*當前區間內未找到最保守的樣本量,擴大區間繼續尋找*/
%SampleSize_ExactBinomial(p0 = &p0, p1 = &p1, alpha = &alpha, power = &power, lbound_rate = &lbound_rate, ubound_rate = %sysevalf(&ubound_rate + &expand_step),
OutDataSet = &OutDataSet, DetailInfo = &DetailInfo, PowerPlot = &PowerPlot);
%end;
%mend;
/*Examples
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1, power = 0.9);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1, power = 0.9, lbound_rate = 0.8, ubound_rate = 1.3);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS, DetailInfo = Info);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS, DetailInfo = Info, PowerPlot = N);
data param;
n = 1;
do p1 = 0.940 to 0.980 by 0.002;
call execute('%nrstr(%SampleSize_ExactBinomial(p0 = 0.90, p1 = '||p1||', lbound_rate = 0.6, ubound_rate = 1.2, OutDataSet = SS'||strip(put(n, best.))||', PowerPlot = N))');
n = n + 1;
output;
end;
run;
data SS;
set SS1-SS21;
run;
*/
參考文獻:
- Vezzoli S, CROS NT V. Evaluation of Diagnostic Agents: a SAS Macro for Sample Size Estimation Using Exact Methods[C]//SAS Conference Proceedings: Pharmaceutical Users Software Exchange. 2008: 12-15.
- Chernick M R, Liu C Y. The saw-toothed behavior of power versus sample size and software solutions: single binomial proportion using exact methods[J]. The American Statistician, 2002, 56(2): 149-155.
- AKTAŞ ALTUNAY S. Effect Size For Saw Tooth Power Function in Binomial Trials[J]. 2015.