python 一鍵獲取郵件附件

Bgods發表於2020-06-22

這裡以我的企業QQ郵箱為例,程式碼如下:

# -*- coding: utf-8 -*-

import os
import poplib
import email
import time
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr


class QQEmail(object):
    def __init__(self, user, password):
        self.User = user  # 郵箱使用者名稱
        self.Pass = password  # 郵箱密碼
        self.server = None  # 初始化server,呼叫login_email方法後更新server
        self.mails = None  # 初始化郵箱資訊列表,呼叫get_email_lists方法後更新列表

    def login_email(self):
        # 登入郵箱
        pop3_server = 'imap.exmail.qq.com'
        try:
            server = poplib.POP3(pop3_server, 110, timeout=50)

            # 身份認證:
            server.user(self.User)
            server.pass_(self.Pass)
            self.server = server
            return {"status": True, 'info': f'{self.User} login successful!'}
        except BaseException as e:
            return {"status": False, 'info': f'Login failed: {e}'}

    # 字元編碼轉換
    def decode_str(self, str_in):
        try:
            value, charset = decode_header(str_in)[0]
            if charset:
                value = value.decode(charset)
            return value
        except:
            return str_in

    def guess_charset(self, msg):
        charset = msg.get_charset()
        if charset is None:
            content_type = msg.get('Content-Type', '').lower()
            pos = content_type.find('charset=')
            if pos >= 0:
                charset = content_type[pos + 8:].strip()
        return charset

    # indent用於縮排顯示:
    def print_info(self, msg, indent=0):
        if indent == 0:
            for header in ['From', 'To', 'Subject']:
                value = msg.get(header, '')
                if value:
                    if header == 'Subject':
                        value = self.decode_str(value)
                    else:
                        hdr, addr = parseaddr(value)
                        name = self.decode_str(hdr)
                        value = u'%s <%s>' % (name, addr)
                print('%s%s: %s' % ('  ' * indent, header, value))
        if (msg.is_multipart()):
            parts = msg.get_payload()
            for n, part in enumerate(parts):
                print('%spart %s' % ('  ' * indent, n))
                print('%s--------------------' % ('  ' * indent))
                self.print_info(part, indent + 1)
        else:
            content_type = msg.get_content_type()
            if content_type == 'text/plain' or content_type == 'text/html':
                content = msg.get_payload(decode=True)
                charset = self.guess_charset(msg)
                if charset:
                    content = content.decode(charset)
                print('%sText: %s' % ('  ' * indent, content + '...'))
            else:
                print('%sAttachment: %s' % ('  ' * indent, content_type))

    # 獲取郵箱資訊列表
    def get_email_lists(self):
        try:
            resp, mails, octets = self.server.list()  # list()返回所有郵件的編號:
            self.mails = mails
            return {"status": True, 'info': f'成功獲取郵件列表!'}
        except BaseException as e:
            return {"status": False, 'info': f'郵件列表獲取失敗: {e}'}

    # 解析郵件,獲取附件
    def get_att(self, msg_in):
        attachment_files = []
        i = 1
        for part in msg_in.walk():
            # 獲取附件名稱型別
            file_name = part.get_filename()
            # contType = part.get_content_type()
            if file_name:
                h = email.header.Header(file_name)

                # 對附件名稱進行解碼
                dh = email.header.decode_header(h)
                filename = dh[0][0]
                if dh[0][1]:
                    # 將附件名稱可讀化
                    filename = self.decode_str(str(filename, dh[0][1]))
                    # print(filename)
                    # filename = filename.encode("utf-8")

                # 下載附件
                data = part.get_payload(decode=True)
                path = r"附件" # 在指定目錄下建立檔案,如果不存在則建立目錄
                if not os.path.exists(path):
                    os.makedirs(path)
                att_file = open(path + '\\' + filename, 'wb') # 注意二進位制檔案需要用wb模式開啟
                attachment_files.append(filename)
                att_file.write(data)  # 儲存附件
                att_file.close()

                print(f'附件({i}): {filename}')
                i += 1
        return attachment_files

    # 解析郵件
    def parser_mail(self, index):
        '''
        :param index: 郵件索引
        :return: 郵件正文、時間、主題、發件人的字典
        '''

        # 1、獲取郵件原文
        try:
            resp, lines, octets = self.server.retr(index)  # 獲取第index封郵件,lines儲存了郵件的原始文字的每一行
        except:
            try:  # 如果獲取郵件失敗,嘗試重新登入郵箱再獲取
                self.login_email()
                self.get_email_lists()
                resp, lines, octets = self.server.retr(index)
            except:  # 如果還是失敗,返回False
                return False

        # 2、拼接郵件
        try:
            msg_content = b'\n'.join(lines).decode('gbk')  # 郵件的原始文字
        except:
            try:
                msg_content = b'\n'.join(lines).decode('utf-8')  # 郵件的原始文字
            except:
                return False

        # 3、解析郵件內容
        try:
            msg = Parser().parsestr(msg_content)
        except:
            return False

        # 4、解析郵件主題(標題)
        try:
            Subject = self.decode_str(msg.get("Subject"))
        except BaseException as e:
            return False

        # 5、解析郵件時間
        try:
            Date = time.strptime(self.decode_str(msg.get("Date"))[0:24], '%a, %d %b %Y %H:%M:%S')
            Date = time.mktime(Date)  # 獲取郵件的接收時間,格式化收件時間
        except:
            return False

        # 6、解析發件人
        try:
            From = self.decode_str(msg.get("From")).split(' ')[-1]
        except:
            From = '<None>'

        return {
            'From': From,
            'Date': Date,
            'Subject': Subject,
            'Msg': msg,
        }

    # 退出server
    def server_quit(self):
        self.server.quit()


if __name__ == "__main__":

    Q = QQEmail(user='郵箱', password='密碼')  # 初始化類
    login_info = Q.login_email()  # 登入郵箱
    print(login_info['info'])

    if login_info['status']:
        email_lists = Q.get_email_lists()  # 獲取郵件列表
        if email_lists['status']:
            indexs = range(len(Q.mails), 0, -1)[-10:]  # 獲取最近的10封郵件索引

            # 從最近的郵件開始,依次遍歷所有郵件
            for index in indexs:
                mail_msg = Q.parser_mail(index)  # 解析郵件
                if mail_msg:
                    # Q.print_info(mail_msg['Msg'])  # 輸入郵件內容
                    Q.get_att(mail_msg['Msg'])  # 下載郵件中的附件

原文:bgods.cn/blog/post/54/

本作品採用《CC 協議》,轉載必須註明作者和本文連結

相關文章