【Azure Developer】使用 Python SDK連線Azure Storage Account, 計算Blob大小程式碼示例

路邊兩盞燈發表於2021-06-08

問題描述

在微軟雲環境中,使用python SDK連線儲存賬號(Storage Account)需要計算Blob大小?雖然Azure提供了一個專用工具Azure Storage Explorer可以統計出Blob的大小:

【Azure Developer】使用 Python SDK連線Azure Storage Account, 計算Blob大小程式碼示例 

但是它也是隻能一個Blob Container一個的統計,如果Container數量巨大,這將是一個繁瑣的工作。而作為開發者,應該讓程式碼來幫助完成。下文使用最快上手的Python程式碼來計算Blob中容量的大小。

 

完整程式碼

 
import os, uuid, datetime, threading
import logging
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__


def calculateBlob(connect_string, count):
    try:
        blob_service_client = BlobServiceClient.from_connection_string(connect_string)
    except Exception as e:
        messages = str(count) + "Connect_String Error, Messages:" + e.args.__str__()
        print(messages)
        logging.info(messages)
    else:
        all_containers = blob_service_client.list_containers()
        for c in all_containers:
            count_name = c.name
            print(count_name)
            if count_name not in blobSize_Total:
                blobSize_Total[count_name] = 0
            if count_name not in blobSize_Daily:
                blobSize_Daily[count_name] = 0
            container_client = blob_service_client.get_container_client(count_name)
            generator = container_client.list_blobs()

            total_size_container = 0
            daily_size_container = 0

            for blob in generator:
                total_size_container += blob.size
                blob_create_time = blob.creation_time.strftime("%Y%m%d")
                if blob_create_time != now_date:
                    continue
                else:
                    # Calculate BlobSize in this month
                    daily_size_container += blob.size
                    # blobSize_Daily[count_name] += blob.size  # /(1024*1024)  # content_length - bytes

            blobSize_Total[count_name] += total_size_container / (1024 * 1024)
            blobSize_Daily[count_name] += daily_size_container / (1024 * 1024)

    return None


if __name__ == '__main__':
    # connect string
    Connection_String_List ="DefaultEndpointsProtocol=https;AccountName=<storagename>;AccountKey=<key>;EndpointSuffix=core.chinacloudapi.cn"
    # for i in Connection_String:
    start = datetime.datetime.now()
    print(start)

    # 定義全域性變數 - blobSize_Daily & blobSize_Total
    blobSize_Daily = {}
    blobSize_Total = {}

    now_date = datetime.datetime.now().strftime("%Y%m%d")

    print("開始計算")
    calculateBlob(Connection_String_List, 1)
    print("計算完成")

    print("統計當前新增大小")
    print(blobSize_Daily)
    print("統計Blob總大小")
    print(blobSize_Total)
    end = datetime.datetime.now()
    print(end)

如執行是沒有Azure blob模組,可以使用 pip install azure-storage-blob 安裝。以上程式碼執行結果如下:

【Azure Developer】使用 Python SDK連線Azure Storage Account, 計算Blob大小程式碼示例

 

 

如果有多個Storage Account,可以考慮加入多執行緒的方式來執行,在程式碼中增加一個myThread類,然後在 __main__ 中把 calculateBlob(Connection_String_List, 1) 執行替換為 many_thread(Connection_String_List) 即可。

class myThread(threading.Thread):

    def __init__(self, threadID, name, connection_string):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.connection_string = connection_string

    def run(self):
        print("開始執行緒:" + self.name)
        calculateBlob(self.connection_string, self.threadID)
        print("退出執行緒:" + self.name)


def many_thread(Connection_String_List):
    threads = []
    for i in range(len(Connection_String_List)):  # 迴圈建立多個執行緒
        t = myThread(i, "Thread-" + str(i), Connection_String_List[i])
        threads.append(t)
    for t in threads:  # 迴圈啟動執行緒 一個執行緒對應一個連線字串
        t.start()
    for t in threads:
        t.join()

 

遇見問題

在多執行緒執行時,可能會遇見問題:("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)),出現此問題大都是由於客戶端使用了已經斷開的連線導致所導致的。所以一定要仔細除錯多執行緒關閉程式碼。是否是把還需要執行的執行緒給關閉了。導致了以上的錯誤訊息。

 

附錄一:多執行緒計算Blob的完整程式碼

import os, uuid, datetime, threading
import logging
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__


def calculateBlob(connect_string, count):
    try:
        blob_service_client = BlobServiceClient.from_connection_string(connect_string)
    except Exception as e:
        messages = str(count) + "Connect_String Error, Messages:" + e.args.__str__()
        print(messages)
        logging.info(messages)
    else:
        all_containers = blob_service_client.list_containers()
        for c in all_containers:
            count_name = c.name
            print(count_name)
            if count_name not in blobSize_Total:
                blobSize_Total[count_name] = 0
            if count_name not in blobSize_Daily:
                blobSize_Daily[count_name] = 0
            container_client = blob_service_client.get_container_client(count_name)
            generator = container_client.list_blobs()

            total_size_container = 0
            daily_size_container = 0

            for blob in generator:
                total_size_container += blob.size
                blob_create_time = blob.creation_time.strftime("%Y%m%d")
                if blob_create_time != now_date:
                    continue
                else:
                    # Calculate BlobSize in this month
                    daily_size_container += blob.size
                    # blobSize_Daily[count_name] += blob.size  # /(1024*1024)  # content_length - bytes

            blobSize_Total[count_name] += total_size_container / (1024 * 1024)
            blobSize_Daily[count_name] += daily_size_container / (1024 * 1024)

    return None

class myThread(threading.Thread):

    def __init__(self, threadID, name, connection_string):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.connection_string = connection_string

    def run(self):
        print("開始執行緒:" + self.name)
        calculateBlob(self.connection_string, self.threadID)
        print("退出執行緒:" + self.name)


def many_thread(Connection_String_List):
    threads = []
    for i in range(len(Connection_String_List)):  # 迴圈建立多個個執行緒
        t = myThread(i, "Thread-" + str(i), Connection_String_List[i])
        threads.append(t)
    for t in threads:  # 迴圈啟動執行緒 - 一個執行緒對應一個連線字串
        t.start()
    for t in threads:
        t.join()


if __name__ == '__main__':
    # connect string
    Connection_String_List =  ['DefaultEndpointsProtocol=https;AccountName=<your storage account 1>;AccountKey=<Key 1>;EndpointSuffix=core.chinacloudapi.cn', 'DefaultEndpointsProtocol=https;AccountName=<your storage account 2>;AccountKey=<Key 2>;EndpointSuffix=core.chinacloudapi.cn']
    # for i in Connection_String:
    start = datetime.datetime.now()
    print(start)

    # 定義全域性變數 - blobSize_Daily & blobSize_Total
    blobSize_Daily = {}
    blobSize_Total = {}

    now_date = datetime.datetime.now().strftime("%Y%m%d")

    many_thread(Connection_String_List)
    print("Main Thread End")

    print(blobSize_Daily)
    print(blobSize_Total)
    end = datetime.datetime.now()
    print(end)

執行效果:

【Azure Developer】使用 Python SDK連線Azure Storage Account, 計算Blob大小程式碼示例

 

 

 

參考資料

快速入門:使用 Python v12 SDK 管理 blobhttps://docs.azure.cn/zh-cn/storage/blobs/storage-quickstart-blobs-python

Python 列表(List)https://www.runoob.com/python/python-lists.html

BlobServiceClient Classhttps://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python

 

 

 

相關文章