定性檢測的樣本量估算之精確概率法

Snoopy1866發表於2022-03-28

本文連結:https://www.cnblogs.com/snoopy1866/p/16069000.html

定性檢測的樣本量估算常用單組目標值法和抽樣誤差法,《體外診斷試劑臨床試驗技術指導原則》(2017年第72號)中提到:當評價指標P接近100%時,這兩種樣本量估算方法可能不適用,應考慮更加適宜的方法進行樣本量估算和統計學分析,如精確概率法

1. PASS軟體估計樣本量

PASS 軟體提供的 Test for One Proportion 模組提供了精確概率法的選項,在 Power Calculation Method 中選擇 Binomial Enumeration 即可。SAS 軟體的 PROC POWER 過程則不支援精確概率法。

例如:某試劑的陽性符合率預期值為98%,目標值為95%,取顯著性水平α=0.05,檢驗效能1-β=0.8,試估計所需樣本量。
由於98%接近100%,因此採用精確概率法計算樣本量。在 PASS 軟體中設定相關引數,計算所需樣本量為312。
定性檢測的樣本量估算之精確概率法

2. 功效曲線的"鋸齒狀"現象

需要注意的是:PASS軟體通過迭代尋找滿足檢驗效能高於0.8的樣本量,當找到一個滿足條件的樣本量時,PASS即中止迭代,然而此時的樣本量有可能並不是保守的。下面將展示這種“不保守”的現象。

在PASS軟體中,我們設定求解目標為Power,樣本量取值為區間[310, 370],繪製功效曲線如下:
定性檢測的樣本量估算之精確概率法
可以發現:檢驗效能並非隨著樣本量增加而單調增加,而是顯示出“鋸齒狀”(saw-toothed),即使樣本量高於PASS軟體計算出的312,也存在檢驗效能低於0.8的情況,當且僅當樣本量≥338時,才能保證檢驗效能穩定在0.8以上。造成此現象的原因是二項分佈的離散性。

3. SAS 巨集程式

以下SAS巨集程式碼可用於計算給定引數下的精確概率法的最保守樣本量,供參考。
程式的基本思路如下:
Step1. 使用 PROC POWER 過程的近似正態法計算一個粗略的樣本量 n1
Step2. 在 n1 附近找一個區間,區間上下界通過引數 lbound_rateubound_rate 控制
Step3. 使用 PROC POWER 過程計算樣本量在區間 [lbound_rate * n1, ubound_rate * n1] 的檢驗效能
Step4. 判斷區間 [lbound_rate * n1, ubound_rate * n1] 內是否存在滿足任意 n>n0,使得 power(n) > 0.8 且 n0 之後的第一個波谷滿足 power > 0.8 的 n0
Step5. 如 Step 4 找到了滿足條件的n0,則輸出樣本量計算結果;否則,根據引數 expand_step 擴充套件區間上界,重複 Step1-Step4

/*
巨集程式功能:單組目標值-精確概率法,計算最保守樣本量,計算結果未考慮脫落率。
*/
%macro SampleSize_ExactBinomial(p0, p1, alpha = 0.05, power = 0.8, lbound_rate = 0.8, ubound_rate = 1.2, expand_step = 2
                                OutDataSet = SampleSize_ExactBinomial, DetailInfo = DetainInfo,
                                PowerPlot = Y);
/*
--------------巨集引數-----------------
p0:             目標值
p1:             預期值
alpha:          顯著性水平
power:          檢驗效能
lbound_rate:    尋值區間下界比例
ubound_rate:    尋值區間上界比例
expand_step:    擴充套件區間步長
OutDataSet:     輸出樣本量估算結果的資料集名稱
DetailInfo:     輸出樣本量估算細節的資料集名稱
PowerPlot:      是否繪製功效圖
----------------巨集變數---------------
ntotal_normal:      正態近似法估算的樣本量
ntotal_lbound:      尋值區間下界
ntotal_ubound:      尋值區間上界
IsLocalFindFirst:   是否找到首次滿足檢驗效能的不保守樣本量
IsGlobalFind:       是否找到穩定滿足檢驗效能的最保守樣本量
LooseMinSampleSize: 首次滿足檢驗效能的不保守樣本量
StrictMinSampleSize:穩定滿足檢驗效能的最保守樣本量
ActualPower:        最保守樣本量下的實際檢驗效能
*/


    /*近似正態法求得一個粗略的樣本量*/
    ods output output = output_normal;
    proc power;
        onesamplefreq test = z method = normal
                      alpha = &alpha
                      power = &power
                      nullproportion = &p0
                      proportion = &p1
                      ntotal = .;
    run;
    proc sql noprint;
        select ntotal into: ntotal_normal from output_normal; /*提取正態近似樣本量*/
    quit;
    %let ntotal_lbound = %sysfunc(floor(%sysevalf(&lbound_rate*&ntotal_normal))); /*尋值區間下界*/
    %let ntotal_ubound = %sysfunc(ceil(%sysevalf(&ubound_rate*&ntotal_normal))); /*尋值區間上界*/


    /*在區間[&ntotal_lbound, &ntotal_ubound]內多次求Power*/
    ods output output = output_exact;
    proc power;
        onesamplefreq test = exact
                      alpha = &alpha
                      power = .
                      nullproportion = &p0
                      proportion = &p1
                      ntotal = &ntotal_lbound to &ntotal_ubound by 1;
        %if &PowerPlot = Y %then %do;
            plot x = n min = &ntotal_lbound max = &ntotal_ubound step = 1
                 yopts = (ref = &power) xopts = (ref = &ntotal_normal);
        %end;
    run;

    /*左鄰點*/
    data power_exact_left;
        if _n_ = 1 then do;
            ntotal = &ntotal_lbound;
            power_left = .;
            output;
        end;
        set output_exact(rename = (power = power_left)
                         keep = ntotal power
                         firstobs = 1 obs = %eval(&ntotal_ubound - &ntotal_lbound));
        ntotal = ntotal + 1;
        label power_left = "左鄰點";
        output;
    run;
    /*目標點*/
    data power_exact_mid;
        set output_exact(rename = (power = power_mid) keep = ntotal power);
        label power_mid = "目標點";
    run;
    /*右鄰點*/
    data power_exact_right;
        set output_exact(rename = (power = power_right)
                         keep = ntotal power
                         firstobs = 2 obs = %eval(&ntotal_ubound - &ntotal_lbound + 1));
        ntotal = ntotal - 1;
        label power_right = "右鄰點";
        output;
        if _n_ = %eval(&ntotal_ubound - &ntotal_lbound) then do;
            ntotal = &ntotal_ubound;
            power_right = .;
            output;
        end;
    run;

    /*尋找最保守的樣本量*/
    %let IsLocalFindFirst = 0;
    %let IsGlobalFind = 0;
    data &DetailInfo;
        merge power_exact_left
              power_exact_mid
              power_exact_right;
        label ntotal = "當前樣本量"
              power_left = "左側點效能"
              power_mid = "當前點效能"
              power_right = "右側點效能"
              min_sample_size = "已知最低樣本量"
              is_local_find_first = "首次區域性最優解"
              is_local_find = "區域性最優解"
              is_global_find = "全域性最優解"
              peak = "波峰"
              trough = "波谷";
        retain min_sample_size 0
               is_local_find 0
               is_local_find_first 0
               is_global_find 0;
        if ntotal > &ntotal_lbound and ntotal < &ntotal_ubound then do;
            if power_left < power_mid and power_right < power_mid then peak = "Yes";
            if power_left > power_mid and power_right > power_mid then trough = "Yes";

            if power_mid > &power and is_local_find = 0 then do; /*區域性最優解,標記到達檢驗效能的樣本量*/
                min_sample_size = ntotal;
                is_local_find = 1;
                if is_local_find_first = 0 then do; /*首次達到區域性最優解,可視為不保守的樣本量估算結果*/
                    is_local_find_first = 1;
                    call symput("LooseMinSampleSize", min_sample_size);
                    call symput("IsLocalFindFirst", is_local_find_first);
                end;
            end;
            if power_mid < &power and is_local_find = 1 then do; /*區域性最優解的破壞,鋸齒狀的波谷導致此時的檢驗效能無法穩定在所需大小之上*/
                min_sample_size = .;
                is_local_find = 0;
            end;
            if power_mid > &power and is_local_find = 1 and is_global_find = 0 and trough = "Yes" then do; /*全域性最優解,此時即便是波谷也能達到所需的檢驗效能,可視為最保守的樣本量估算結果*/
                is_global_find = 1;
                call symput("StrictMinSampleSize", min_sample_size);
                call symput("ActualPower", power_mid);
                call symput("IsGlobalFind", is_global_find);
            end;
        end;
    run;

    %if &IsLocalFindFirst = 1 and &IsGlobalFind = 1 %then %do; /*當前區間內找到最保守的樣本量,輸出結果*/
        /*輸出樣本量估算結果*/
        data &OutDataSet;
            length Exact1 $50 Exact2 $50;
            label P0 = "目標值"
                  P1 = "預期值"
                  ALPHA = "顯著性水平"
                  POWER = "檢驗效能"
                  Normal = "正態近似"
                  Exact1 = "精確概率法(不保守)"
                  Exact2 = "精確概率法(最保守)";
            P0 = &p0;
            P1 = &p1;
            ALPHA = &alpha;
            POWER = &power;
            Normal = &ntotal_normal;
            %if &IsLocalFindFirst = 1 %then %do; Exact1 = put(&LooseMinSampleSize, best.); %end;
            %else %do; Exact1 = "Outside the interval [&ntotal_lbound, &ntotal_ubound]"; %end;
            %if &IsGlobalFind = 1 %then %do; Exact2 = put(&StrictMinSampleSize, best.); %end;
            %else %do; Exact2 = "Outside the interval [&ntotal_lbound, &ntotal_ubound]"; %end;
        run;

        /*刪除資料集*/
        proc delete data = output_exact
                           output_normal
                           power_exact_left
                           power_exact_mid
                           power_exact_right;
        run;

        /*輸出日誌*/
        %put NOTE: 引數:&=p0, &=p1, &=alpha, &=power;
        %put NOTE: 正態近似法求得最低樣本量為&ntotal_normal;
        %if &IsLocalFindFirst = 1 %then %do;
            %put NOTE: 精確概率法求得首次達到檢驗效能的最低樣本量為 %sysfunc(strip(&LooseMinSampleSize)) (不保守);
        %end;
        %if &IsGlobalFind = 1 %then %do;
            %put NOTE: 精確概率法求得最保守的樣本量為 %sysfunc(strip(&StrictMinSampleSize)),實際檢驗效能為 &ActualPower.;
        %end;
        %else %do;
            %put ERROR: 在樣本量區間[&ntotal_lbound, &ntotal_ubound]內沒有找到精確的樣本量!使用引數LBOUND_RATE、UBOUND_RATE調節區間大小;
        %end;

    %end;
    %else %do; /*當前區間內未找到最保守的樣本量,擴大區間繼續尋找*/
        %SampleSize_ExactBinomial(p0 = &p0, p1 = &p1, alpha = &alpha, power = &power, lbound_rate = &lbound_rate, ubound_rate = %sysevalf(&ubound_rate + &expand_step),
                                  OutDataSet = &OutDataSet, DetailInfo = &DetailInfo, PowerPlot = &PowerPlot);
    %end;
%mend;

/*Examples
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1, power = 0.9);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, alpha = 0.1, power = 0.9, lbound_rate = 0.8, ubound_rate = 1.3);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS, DetailInfo = Info);
%SampleSize_ExactBinomial(p0 = 0.94, p1 = 0.98, OutDataSet = SS, DetailInfo = Info, PowerPlot = N);



data param;
    n = 1;
    do p1 = 0.940 to 0.980 by 0.002;
        call execute('%nrstr(%SampleSize_ExactBinomial(p0 = 0.90, p1 = '||p1||', lbound_rate = 0.6, ubound_rate = 1.2, OutDataSet = SS'||strip(put(n, best.))||', PowerPlot = N))');
        n = n + 1;
        output;
    end;
run;
data SS;
    set SS1-SS21;
run;
*/

參考文獻:

  1. Vezzoli S, CROS NT V. Evaluation of Diagnostic Agents: a SAS Macro for Sample Size Estimation Using Exact Methods[C]//SAS Conference Proceedings: Pharmaceutical Users Software Exchange. 2008: 12-15.
  2. Chernick M R, Liu C Y. The saw-toothed behavior of power versus sample size and software solutions: single binomial proportion using exact methods[J]. The American Statistician, 2002, 56(2): 149-155.
  3. AKTAŞ ALTUNAY S. Effect Size For Saw Tooth Power Function in Binomial Trials[J]. 2015.

相關文章