用LoadRunner做一個網路爬蟲

TIB發表於2010-03-29

Kim在《LoadRunner as a WebCrawler》這篇文章中介紹瞭如何用LoadRunner實現一個簡單的網路爬蟲:

http://ptfrontline.wordpress.com/2008/04/07/loadrunner-as-a-webcrawler/

 

 

網路爬蟲在效能測試中可以用在給“快取暖身”上。

 

 

 

void Process_Level1()
{
        int i;
        char buf[2048];
        char buf2[2048];
        char *pos;
        int res;
        int count;
 
        count = atoi(lr_eval_string("{URL_LIST1_count}"));
 
        if (count > 0)
        for ( i=1; i 0) res++; 
 
               if (res == 0)
               {
                       lr_save_string( lr_eval_string(buf), "URL" );
 
                       // Replace & with & - NONSTANDARD FUNCTION
                       lr_replace( "URL", "&", "&" );
 
                       web_reg_save_param("URL_LIST2", // save all href="" URL's
                                                        "LB=href="",
                                                        "RB="",
                                                        "Ord=All",
                                                        "Search=Body",
                                                        "NotFound=Warning",
                                                        LAST );
 
                       web_url("URL",
                               "URL={BaseURL}{URL}",
                               "TargetFrame=",
                               "Resource=0",
                               "RecContentType=text/html",
                               "Mode=HTML",
                               LAST);
 
                       // Process all "URL_LIST2" entires
                       Process_Level2();
               }
        }
}

 

 

 

 

 

Vince Lozada把這個指令碼完善了一下(用遞迴的方式訪問每一個URL一次):

char **myList;

int numListElements = 0;

int listSize = 1;

 

Action()

{

 

web_reg_save_param("URL_LIST1",

"LB=href=/"",

"RB=/"",

"Ord=All",

"Search=Body",

"NotFound=Warning",

LAST );

 

web_url("Home Page",

"URL={BaseURL}",

"TargetFrame=",

"Resource=0",

"RecContentType=text/html",

"Referer=",

"Snapshot=t1.inf",

"Mode=HTML",

LAST);

 

Process_URLs(1);

 

free(myList);

myList = 0;

numListElements = 0;

listSize = 1;

 

return 0;

}

 

Process_URLs(int index)

{

int i;

int nextIndex;

char listName[255];

char listCountParamName[255];

char listItemParamName[255];

int count;

int res_count;

char *resourceName;

 

nextIndex = (index + 1);

 

sprintf(listCountParamName, "{URL_LIST%d_count}", index);

count = atoi(lr_eval_string(listCountParamName));

 

if (count > 0){

for (i = 1; i <= count; i++){

sprintf(listItemParamName, "{URL_LIST%d_%d}", index, i);

 

lr_save_string(lr_eval_string(listItemParamName), "URL");

 

if (isItemInList(lr_eval_string("{URL}")) == 0) {

 

char *str = (char *)malloc(sizeof(lr_eval_string("{URL}")));

str = lr_eval_string("{URL}");

addItemToList(str);

 

sprintf(listName, "URL_LIST%d", nextIndex);

 

web_reg_save_param(listName,

"LB=href=/"",

"RB=/"",

"Ord=All",

"Search=Body",

"NotFound=Warning",

LAST );

 

resourceName = (char *) strrchr(lr_eval_string("{URL}"), ‘/’);

 

web_url(resourceName,

"URL={BaseURL}{URL}",

"TargetFrame=",

"Resource=0",

"RecContentType=text/html",

"Mode=HTML",

LAST);

 

Process_URLs(nextIndex);

 

}

}

}

}

 

void addItemToList(char *item) {

char **newList;

int i;

 

if (!myList) {

myList = (char **) malloc(listSize * sizeof(char *));

}

 

if (++numListElements > listSize) {

newList = (char**) malloc(listSize * 2 * sizeof(char *));

for (i = 0; i < listSize; ++i) {

newList[i] = myList[i];

}

listSize *= 2;

free(myList);

myList = newList;

}

 

myList[numListElements - 1] = item;

}

 

int isItemInList(char *item) {

int i;

 

for (i = 0; i < numListElements; ++i) {

if (!strcmp(item, myList[i])) {

return 1;

}

}

 

return 0;

}

 

void printList() {

int i;

 

for (i = 0; i < numListElements; ++i) {

lr_output_message(myList[i]);

}

}

 

 

 

試了一下這個指令碼,發現還不夠完善,在處理連結的URL字串時還要考慮得更周全。

 

 

 

相關文章