用LoadRunner做一個網路爬蟲

TIB發表於2010-03-29

原文網址 : https://blog.csdn.net/testing_is_believing/article/details/5427074

Kim在《LoadRunner as a WebCrawler》這篇文章中介紹瞭如何用LoadRunner實現一個簡單的網路爬蟲：

http://ptfrontline.wordpress.com/2008/04/07/loadrunner-as-a-webcrawler/

網路爬蟲在效能測試中可以用在給“快取暖身”上。

void Process_Level1()

        int i;

        char buf[2048];

        char buf2[2048];

        char *pos;

        int res;

        int count;

        count = atoi(lr_eval_string("{URL_LIST1_count}"));

        if (count > 0)

        for ( i=1; i 0) res++;

               if (res == 0)

                       lr_save_string( lr_eval_string(buf), "URL" );

                       // Replace & with & - NONSTANDARD FUNCTION

                       lr_replace( "URL", "&", "&" );

                       web_reg_save_param("URL_LIST2", // save all href="" URL's

                                                        "LB=href="",

                                                        "RB="",

                                                        "Ord=All",

                                                        "Search=Body",

                                                        "NotFound=Warning",

                                                        LAST );

                       web_url("URL",

                               "URL={BaseURL}{URL}",

                               "TargetFrame=",

                               "Resource=0",

                               "RecContentType=text/html",

                               "Mode=HTML",

                               LAST);

                       // Process all "URL_LIST2" entires

                       Process_Level2();

Vince Lozada把這個指令碼完善了一下（用遞迴的方式訪問每一個URL一次）：

char **myList;

int numListElements = 0;

int listSize = 1;

Action()

{

web_reg_save_param("URL_LIST1",

"LB=href=/"",

"RB=/"",

"Ord=All",

"Search=Body",

"NotFound=Warning",

LAST );

web_url("Home Page",

"URL={BaseURL}",

"TargetFrame=",

"Resource=0",

"RecContentType=text/html",

"Referer=",

"Snapshot=t1.inf",

"Mode=HTML",

LAST);

Process_URLs(1);

free(myList);

myList = 0;

numListElements = 0;

listSize = 1;

return 0;

}

Process_URLs(int index)

{

int i;

int nextIndex;

char listName[255];

char listCountParamName[255];

char listItemParamName[255];

int count;

int res_count;

char *resourceName;

nextIndex = (index + 1);

sprintf(listCountParamName, "{URL_LIST%d_count}", index);

count = atoi(lr_eval_string(listCountParamName));

if (count > 0){

for (i = 1; i <= count; i++){

sprintf(listItemParamName, "{URL_LIST%d_%d}", index, i);

lr_save_string(lr_eval_string(listItemParamName), "URL");

if (isItemInList(lr_eval_string("{URL}")) == 0) {

char *str = (char *)malloc(sizeof(lr_eval_string("{URL}")));

str = lr_eval_string("{URL}");

addItemToList(str);

sprintf(listName, "URL_LIST%d", nextIndex);

web_reg_save_param(listName,

"LB=href=/"",

"RB=/"",

"Ord=All",

"Search=Body",

"NotFound=Warning",

LAST );

resourceName = (char *) strrchr(lr_eval_string("{URL}"), ‘/’);

web_url(resourceName,

"URL={BaseURL}{URL}",

"TargetFrame=",

"Resource=0",

"RecContentType=text/html",

"Mode=HTML",

LAST);

Process_URLs(nextIndex);

}

void addItemToList(char *item) {

char **newList;

int i;

if (!myList) {

myList = (char **) malloc(listSize * sizeof(char *));

}

if (++numListElements > listSize) {

newList = (char**) malloc(listSize * 2 * sizeof(char *));

for (i = 0; i < listSize; ++i) {

newList[i] = myList[i];

}

listSize *= 2;

free(myList);

myList = newList;

}

myList[numListElements - 1] = item;

}

int isItemInList(char *item) {

int i;

for (i = 0; i < numListElements; ++i) {

if (!strcmp(item, myList[i])) {

return 1;

}

return 0;

}

void printList() {

int i;

for (i = 0; i < numListElements; ++i) {

lr_output_message(myList[i]);

}

試了一下這個指令碼，發現還不夠完善，在處理連結的URL字串時還要考慮得更周全。

精通Scrapy網路爬蟲【一】第一個爬蟲專案
2021-06-19
爬蟲
網路爬蟲——爬蟲實戰（一）
2022-01-29
爬蟲
python網路爬蟲應用_python網路爬蟲應用實戰
2020-12-29
Python爬蟲
如何自己寫一個網路爬蟲
2020-02-27
爬蟲
python網路爬蟲_Python爬蟲：30個小時搞定Python網路爬蟲視訊教程
2020-10-21
Python爬蟲
nodeJS做一個簡單的爬蟲
2018-03-30
NodeJS爬蟲
網路爬蟲
2018-12-07
爬蟲
python網路爬蟲筆記（一）
2020-10-25
Python爬蟲筆記
網路爬蟲技術及應用
2022-11-03
爬蟲
什麼是網路爬蟲?為什麼用Python寫爬蟲?
2021-03-08
爬蟲Python
網路爬蟲示例
2018-10-30
爬蟲
網路爬蟲精要
2019-04-27
爬蟲
用PYTHON爬蟲簡單爬取網路小說
2021-09-11
Python爬蟲
python3網路爬蟲開發實戰_Python 3開發網路爬蟲(一)
2020-12-07
Python爬蟲
初探python之做一個簡單小爬蟲
2019-03-02
Python爬蟲
網路爬蟲的原理
2018-12-02
爬蟲
python DHT網路爬蟲
2019-02-14
Python爬蟲
網路爬蟲專案
2022-01-29
爬蟲
104個實用網路爬蟲專案資源整理（超全）
2019-04-16
爬蟲
[Python] 網路爬蟲與資訊提取（1）網路爬蟲之規則
2020-11-06
Python爬蟲
什麼是Python網路爬蟲?常見的網路爬蟲有哪些?
2020-11-27
Python爬蟲
python網路爬蟲（14）使用Scrapy搭建爬蟲框架
2019-07-27
Python爬蟲框架
Python網路爬蟲實戰專案大全 32個Python爬蟲專案demo
2019-04-24
Python爬蟲
python網路爬蟲（9）構建基礎爬蟲思路
2019-06-09
Python爬蟲
網路爬蟲（python專案）
2018-12-04
爬蟲Python
什麼是網路爬蟲
2018-12-02
爬蟲
網路爬蟲大型教程(二)
2018-05-14
爬蟲
專案－－python網路爬蟲
2020-08-15
Python爬蟲
網路爬蟲流程總結
2023-03-09
爬蟲
網路爬蟲如何運作？
2022-02-08
爬蟲
python網路爬蟲合法嗎
2021-09-11
Python爬蟲
網路爬蟲的反扒策略
2021-09-11
爬蟲
什麼是網路爬蟲？
2022-12-09
爬蟲
網路爬蟲是什麼？
2022-05-25
爬蟲
Python網路爬蟲實戰
2022-03-18
Python爬蟲
Python爬蟲小專案：爬一個圖書網站
2018-11-21
Python爬蟲網站
Python網路爬蟲實戰(一)快速入門
2019-09-16
Python爬蟲
匿名IP在網路爬蟲中的應用探索
2023-05-16
爬蟲
網路爬蟲在商業分析中的應用
2020-01-03
爬蟲

用LoadRunner做一個網路爬蟲

相關文章