劫持微信聊天記錄並分析還原 —— 合併解密後的資料庫(三)

Rainbow_Technology發表於2024-11-08


  • 本工具設計的初衷是用來獲取微信賬號的相關資訊並解析PC版微信的資料庫。

  • 程式以 Python 語言開發,可讀取、解密、還原微信資料庫並幫助使用者檢視聊天記錄,還可以將其聊天記錄匯出為csv、html等格式用於AI訓練,自動回覆或備份等等作用。下面我們將深入探討這個工具的各個方面及其工作原理。

  • 本專案僅供學習交流使用,嚴禁用於商業用途或非法途徑,任何違反法律法規、侵犯他人合法權益的行為,均與本專案及其開發者無關,後果由行為人自行承擔。


【完整演示工具下載】

https://www.chwm.vip/index.html?aid=23


我們接著上一篇文章《劫持微信聊天記錄並分析還原 —— 解密資料庫(二)》將解密後的微信資料庫合併為一整個DB檔案。


微信資料庫存放目錄:
微信資料庫存放目錄

解密後的微信資料庫:
解密後的微信資料庫

詳細命令:
merge -i "C:\Users\admin\AppData\Local\Temp\wx_tmp" -o "C:\Users\admin\AppData\Local\Temp\wxdb_all.db"

  • -i 為解密後微信資料庫的存放路徑
  • -o 為合併微信資料庫的存放路徑與檔名

執行命令後,我們可以看到在此位置 "C:\Users\admin\AppData\Local\Temp" 下生成了一個以 “wxdb_all.db” 命名的檔案,這便是解密後的微信資料庫合併成功的檔案,下一步我們將詳細解讀微信資料庫的結構以及如何開啟並訪問裡面的內容。


部分現實程式碼:

# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name:         merge_db.py
# Description:  
# Author:       Rainbow(www.chwm.vip)
# Date:         2024/11/08
# -------------------------------------------------------------------------------
import logging
import os
import shutil
import sqlite3
import subprocess
import time
from typing import List
 
from .decryption import batch_decrypt
from .wx_info import get_core_db
from .utils import wx_core_loger, wx_core_error, CORE_DB_TYPE
 
 
@wx_core_error
def execute_sql(connection, sql, params=None):
    """
    執行給定的SQL語句,返回結果。
    引數:
        - connection: SQLite連線
        - sql:要執行的SQL語句
        - params:SQL語句中的引數
    """
    try:
        # connection.text_factory = bytes
        cursor = connection.cursor()
        if params:
            cursor.execute(sql, params)
        else:
            cursor.execute(sql)
        return cursor.fetchall()
    except Exception as e:
        try:
            connection.text_factory = bytes
            cursor = connection.cursor()
            if params:
                cursor.execute(sql, params)
            else:
                cursor.execute(sql)
            rdata = cursor.fetchall()
            connection.text_factory = str
            return rdata
        except Exception as e:
            wx_core_loger.error(f"**********\nSQL: {sql}\nparams: {params}\n{e}\n**********", exc_info=True)
            return None
 
 
@wx_core_error
def check_create_sync_log(connection):
    """
    檢查是否存在表 sync_log,用於記錄同步記錄,包括微信資料庫路徑,表名,記錄數,同步時間
    :param connection: SQLite連線
    :return: True or False
    """
 
    out_cursor = connection.cursor()
    # 檢查是否存在表 sync_log,用於記錄同步記錄,包括微信資料庫路徑,表名,記錄數,同步時間
    sync_log_status = execute_sql(connection, "SELECT name FROM sqlite_master WHERE type='table' AND name='sync_log'")
    if len(sync_log_status) < 1:
        #  db_path 微信資料庫路徑,tbl_name 表名,src_count 源資料庫記錄數,current_count 當前合併後的資料庫對應表記錄數
        sync_record_create_sql = ("CREATE TABLE sync_log ("
                                  "id INTEGER PRIMARY KEY AUTOINCREMENT,"
                                  "db_path TEXT NOT NULL,"
                                  "tbl_name TEXT NOT NULL,"
                                  "src_count INT,"
                                  "current_count INT,"
                                  "createTime INT DEFAULT (strftime('%s', 'now')), "
                                  "updateTime INT DEFAULT (strftime('%s', 'now'))"
                                  ");")
        out_cursor.execute(sync_record_create_sql)
        # 建立索引
        out_cursor.execute("CREATE INDEX idx_sync_log_db_path ON sync_log (db_path);")
        out_cursor.execute("CREATE INDEX idx_sync_log_tbl_name ON sync_log (tbl_name);")
        # 建立聯合索引,防止重複
        out_cursor.execute("CREATE UNIQUE INDEX idx_sync_log_db_tbl ON sync_log (db_path, tbl_name);")
        connection.commit()
    out_cursor.close()
    return True
 
 
@wx_core_error
def check_create_file_md5(connection):
    """
    檢查是否存在表 file_md5,用於記錄檔案資訊,後續用於去重等操作,暫時閒置
    """
    pass
 
 
@wx_core_error
def merge_db(db_paths: List[dict], save_path: str = "merge.db", is_merge_data: bool = True,
             startCreateTime: int = 0, endCreateTime: int = 0):
    """
    合併資料庫 會忽略主鍵以及重複的行。
    :param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...]
                        db_path表示初始路徑,de_path表示解密後的路徑;初始路徑用於儲存合併的日誌情況,解密後的路徑用於讀取資料
    :param save_path: str 輸出檔案路徑
    :param is_merge_data: bool 是否合併資料(如果為False,則只解密,並建立表,不插入資料)
    :param startCreateTime: 開始時間戳 主要用於MSG資料庫的合併
    :param endCreateTime:  結束時間戳 主要用於MSG資料庫的合併
    :return:
    """
    if os.path.isdir(save_path):
        save_path = os.path.join(save_path, f"merge_{int(time.time())}.db")
 
    if isinstance(db_paths, list):
        # alias, file_path
        databases = {f"dbi_{i}": (db['db_path'],
                                  db.get('de_path', db['db_path'])
                                  ) for i, db in enumerate(db_paths)
                     }
    else:
        raise TypeError("db_paths 型別錯誤")
    outdb = sqlite3.connect(save_path)
 
    is_sync_log = check_create_sync_log(outdb)
    if not is_sync_log:
        wx_core_loger.warning("建立同步記錄表失敗")
 
    out_cursor = outdb.cursor()
 
    # 將MSG_db_paths中的資料合併到out_db_path中
    for alias, db in databases.items():
        db_path = db[0]
        de_path = db[1]
 
        # 附加資料庫
        sql_attach = f"ATTACH DATABASE '{de_path}' AS {alias}"
        out_cursor.execute(sql_attach)
        outdb.commit()
        sql_query_tbl_name = f"SELECT tbl_name, sql FROM {alias}.sqlite_master WHERE type='table' ORDER BY tbl_name;"
        tables = execute_sql(outdb, sql_query_tbl_name)
        for table in tables:
            table, init_create_sql = table[0], table[1]
            table = table if isinstance(table, str) else table.decode()
            init_create_sql = init_create_sql if isinstance(init_create_sql, str) else init_create_sql.decode()
            if table == "sqlite_sequence":
                continue
            if "CREATE TABLE".lower() not in str(init_create_sql).lower():
                continue
            # 獲取表中的欄位名
            sql_query_columns = f"PRAGMA table_info({table})"
            columns = execute_sql(outdb, sql_query_columns)
            if table == "ChatInfo" and len(columns) > 12:  # bizChat中的ChatInfo表與MicroMsg中的ChatInfo表欄位不同
                continue
            col_type = {
                (i[1] if isinstance(i[1], str) else i[1].decode(),
                 i[2] if isinstance(i[2], str) else i[2].decode())
                for i in columns}
            columns = [i[0] for i in col_type]
            if not columns or len(columns) < 1:
                continue
            # 建立表table
            sql_create_tbl = f"CREATE TABLE IF NOT EXISTS {table} AS SELECT *  FROM {alias}.{table} WHERE 0 = 1;"
            out_cursor.execute(sql_create_tbl)
            # 建立包含 NULL 值比較的 UNIQUE 索引
            index_name = f"{table}_unique_index"
            coalesce_columns = ','.join(f"COALESCE({column}, '')" for column in columns)
            sql = f"CREATE UNIQUE INDEX IF NOT EXISTS {index_name} ON {table} ({coalesce_columns})"
            out_cursor.execute(sql)
 
            # 插入sync_log
            sql_query_sync_log = f"SELECT src_count FROM sync_log WHERE db_path=? AND tbl_name=?"
            sync_log = execute_sql(outdb, sql_query_sync_log, (db_path, table))
            if not sync_log or len(sync_log) < 1:
                sql_insert_sync_log = "INSERT INTO sync_log (db_path, tbl_name, src_count, current_count) VALUES (?, ?, ?, ?)"
                out_cursor.execute(sql_insert_sync_log, (db_path, table, 0, 0))
            outdb.commit()
 
            if is_merge_data:
                # 比較源資料庫和合並後的資料庫記錄數
                log_src_count = execute_sql(outdb, sql_query_sync_log, (db_path, table))[0][0]
                src_count = execute_sql(outdb, f"SELECT COUNT(*) FROM {alias}.{table}")[0][0]
                if src_count <= log_src_count:
                    wx_core_loger.info(f"忽略 {db_path} {de_path} {table} {src_count} {log_src_count}")
                    continue
 
                # 構建資料查詢sql
                sql_base = f"SELECT {','.join([i for i in columns])} FROM {alias}.{table} "
                where_clauses, params = [], []
                if "CreateTime" in columns:
                    if startCreateTime > 0:
                        where_clauses.append("CreateTime > ?")
                        params.append(startCreateTime)
                    if endCreateTime > 0:
                        where_clauses.append("CreateTime < ?")
                        params.append(endCreateTime)
                # 如果有WHERE子句,將其新增到SQL語句中,並新增ORDER BY子句
                sql = f"{sql_base} WHERE {' AND '.join(where_clauses)} ORDER BY CreateTime" if where_clauses else sql_base
                src_data = execute_sql(outdb, sql, tuple(params))
                if not src_data or len(src_data) < 1:
                    continue
                # 插入資料
                sql = f"INSERT OR IGNORE INTO {table} ({','.join([i for i in columns])}) VALUES ({','.join(['?'] * len(columns))})"
                try:
                    out_cursor.executemany(sql, src_data)
 
                    # update sync_log
                    sql_update_sync_log = ("UPDATE sync_log "
                                           "SET src_count = ? ,"
                                           f"current_count=(SELECT COUNT(*) FROM {table}) "
                                           "WHERE db_path=? AND tbl_name=?")
                    out_cursor.execute(sql_update_sync_log, (src_count, db_path, table))
 
                except Exception as e:
                    wx_core_loger.error(
                        f"error: {db_path}\n{de_path}\n{table}\n{sql}\n{src_data}\n{len(src_data)}\n{e}\n",
                        exc_info=True)
        # 分離資料庫
        sql_detach = f"DETACH DATABASE {alias}"
        out_cursor.execute(sql_detach)
        outdb.commit()
    out_cursor.close()
    outdb.close()
    return save_path
 
 
# @wx_core_error
# def merge_db1(db_paths: list[dict], save_path: str = "merge.db", is_merge_data: bool = True,
#               startCreateTime: int = 0, endCreateTime: int = 0):
#     """
#     合併資料庫 會忽略主鍵以及重複的行。
#     :param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...]
#                         db_path表示初始路徑,de_path表示解密後的路徑;初始路徑用於儲存合併的日誌情況,解密後的路徑用於讀取資料
#     :param save_path: str 輸出檔案路徑
#     :param is_merge_data: bool 是否合併資料(如果為False,則只解密,並建立表,不插入資料)
#     :param startCreateTime: 開始時間戳 主要用於MSG資料庫的合併
#     :param endCreateTime:  結束時間戳 主要用於MSG資料庫的合併
#     :return:
#     """
#     if os.path.isdir(save_path):
#         save_path = os.path.join(save_path, f"merge_{int(time.time())}.db")
#
#     if isinstance(db_paths, list):
#         # alias, file_path
#         databases = {f"MSG{i}": (db['db_path'],
#                                  db.get('de_path', db['db_path'])
#                                  ) for i, db in enumerate(db_paths)
#                      }
#     else:
#         raise TypeError("db_paths 型別錯誤")
#
#     from sqlalchemy import create_engine, MetaData, Table, select, insert, Column, UniqueConstraint
#     from sqlalchemy.orm import sessionmaker
#     from sqlalchemy import inspect, PrimaryKeyConstraint
#
#     outdb = create_engine(f"sqlite:///{save_path}", echo=False)
#
#     # 建立Session例項
#     Session = sessionmaker()
#     Session.configure(bind=outdb)
#     session = Session()
#
#     # 將MSG_db_paths中的資料合併到out_db_path中
#     for alias, db in databases.items():
#         db_path = db[0]
#         de_path = db[1]
#
#         db_engine = create_engine(f"sqlite:///{de_path}", echo=False)
#
#         # 反射源資料庫的表結構
#         metadata = MetaData()
#         metadata.reflect(bind=db_engine)
#
#         # 建立表
#         outdb_metadata = MetaData()
#         inspector = inspect(db_engine)
#         table_names = [i for i in inspector.get_table_names() if i not in ["sqlite_sequence"]]
#         for table_name in table_names:
#             # 建立表table
#             columns_list_dict = inspector.get_columns(table_name)
#             col_names = [i['name'] for i in columns_list_dict]
#             columns = [Column(i['name'], i['type'], primary_key=False) for i in columns_list_dict]
#             table = Table(table_name, outdb_metadata, *columns)
#             if len(columns) > 1:  # 聯合索引
#                 unique_constraint = UniqueConstraint(*col_names, name=f"{table_name}_unique_index")
#                 table.append_constraint(unique_constraint)
#             else:
#                 table.append_constraint(PrimaryKeyConstraint(*col_names))
#             table.create(outdb, checkfirst=True)
#
#         # 將源資料庫中的資料插入目標資料庫
#         outdb_metadata = MetaData()
#         for table_name in metadata.tables:
#             source_table = Table(table_name, metadata, autoload_with=db_engine)
#             outdb_table = Table(table_name, outdb_metadata, autoload_with=outdb)
#
#             # 查詢源表中的所有資料
#             query = select(source_table)
#             with db_engine.connect() as connection:
#                 result = connection.execute(query).fetchall()
#
#             # 插入到目標表中
#             for row in result:
#                 row_data = row._asdict()
#
#                 # 嘗試將所有文字資料轉換為 UTF-8
#                 for key, value in row_data.items():
#                     if isinstance(value, str):
#                         row_data[key] = value.encode("utf-8")
#
#                 insert_stmt = insert(outdb_table).values(row_data)
#                 try:
#                     session.execute(insert_stmt)
#                 except Exception as e:
#                     pass
#         db_engine.dispose()
#
#     # 提交事務
#     session.commit()
#     # 關閉Session
#     session.close()
#     outdb.dispose()
#     return save_path
 
@wx_core_error
def decrypt_merge(wx_path: str, key: str, outpath: str = "",
                  merge_save_path: str = None,
                  is_merge_data=True, is_del_decrypted: bool = True,
                  startCreateTime: int = 0, endCreateTime: int = 0,
                  db_type=None) -> (bool, str):
    """
    解密合並資料庫 msg.db, microMsg.db, media.db,注意:會刪除原資料庫
    :param wx_path: 微信路徑 eg: C:\\*******\\WeChat Files\\wxid_*********
    :param key: 解密金鑰
    :param outpath: 輸出路徑
    :param merge_save_path: 合併後的資料庫路徑
    :param is_merge_data: 是否合併資料(如果為False,則只解密,並建立表,不插入資料)
    :param is_del_decrypted: 是否刪除解密後的資料庫(除了合併後的資料庫)
    :param startCreateTime: 開始時間戳 主要用於MSG資料庫的合併
    :param endCreateTime:  結束時間戳 主要用於MSG資料庫的合併
    :param db_type: 資料庫型別,從核心資料庫中選擇
    :return: (true,解密後的資料庫路徑) or (false,錯誤資訊)
    """
    if db_type is None:
        db_type = []
 
    outpath = outpath if outpath else "decrypt_merge_tmp"
    merge_save_path = os.path.join(outpath,
                                   f"merge_{int(time.time())}.db") if merge_save_path is None else merge_save_path
    decrypted_path = os.path.join(outpath, "decrypted")
 
    if not wx_path or not key or not os.path.exists(wx_path):
        wx_core_loger.error("引數錯誤", exc_info=True)
        return False, "引數錯誤"
 
    # 解密
    code, wxdbpaths = get_core_db(wx_path, db_type)
    if not code:
        wx_core_loger.error(f"獲取資料庫路徑失敗{wxdbpaths}", exc_info=True)
        return False, wxdbpaths
 
    # 判斷out_path是否為空目錄
    if os.path.exists(decrypted_path) and os.listdir(decrypted_path):
        for root, dirs, files in os.walk(decrypted_path, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
 
    if not os.path.exists(decrypted_path):
        os.makedirs(decrypted_path)
 
    wxdbpaths = {i["db_path"]: i for i in wxdbpaths}
 
    # 呼叫 decrypt 函式,並傳入引數   # 解密
    code, ret = batch_decrypt(key=key, db_path=list(wxdbpaths.keys()), out_path=decrypted_path, is_print=False)
    if not code:
        wx_core_loger.error(f"解密失敗{ret}", exc_info=True)
        return False, ret
 
    out_dbs = []
    for code1, ret1 in ret:
        if code1:
            out_dbs.append(ret1)
 
    parpare_merge_db_path = []
    for db_path, out_path, _ in out_dbs:
        parpare_merge_db_path.append({"db_path": db_path, "de_path": out_path})
    merge_save_path = merge_db(parpare_merge_db_path, merge_save_path, is_merge_data=is_merge_data,
                               startCreateTime=startCreateTime, endCreateTime=endCreateTime)
    if is_del_decrypted:
        shutil.rmtree(decrypted_path, True)
    if isinstance(merge_save_path, str):
        return True, merge_save_path
    else:
        return False, "未知錯誤"
 
 
@wx_core_error
def merge_real_time_db(key, merge_path: str, db_paths: [dict] or dict, real_time_exe_path: str = None):
    """
    合併實時資料庫訊息,暫時只支援64位系統
    :param key:  解密金鑰
    :param merge_path:  合併後的資料庫路徑
    :param db_paths:  [dict] or dict eg: {'wxid': 'wxid_***', 'db_type': 'MicroMsg',
                        'db_path': 'C:\**\wxid_***\Msg\MicroMsg.db', 'wxid_dir': 'C:\***\wxid_***'}
    :param real_time_exe_path:  實時資料庫合併工具路徑
    :return:
    """
    try:
        import platform
    except:
        raise ImportError("未找到模組 platform")
    # 判斷系統位數是否為64位,如果不是則丟擲異常
    if platform.architecture()[0] != '64bit':
        raise Exception("System is not 64-bit.")
 
    if isinstance(db_paths, dict):
        db_paths = [db_paths]
 
    merge_path = os.path.abspath(merge_path)  # 合併後的資料庫路徑,必須為絕對路徑
    merge_path_base = os.path.dirname(merge_path)  # 合併後的資料庫路徑
    if not os.path.exists(merge_path_base):
        os.makedirs(merge_path_base)
 
    endbs = []
    for db_info in db_paths:
        db_path = os.path.abspath(db_info['db_path'])
        if not os.path.exists(db_path):
            # raise FileNotFoundError("資料庫不存在")
            continue
        endbs.append(os.path.abspath(db_path))
    endbs = '" "'.join(list(set(endbs)))
 
    if not os.path.exists(real_time_exe_path if real_time_exe_path else ""):
        current_path = os.path.dirname(__file__)  # 獲取當前資料夾路徑
        real_time_exe_path = os.path.join(current_path, "tools", "realTime.exe")
    if not os.path.exists(real_time_exe_path):
        raise FileNotFoundError("未找到實時資料庫合併工具")
    real_time_exe_path = os.path.abspath(real_time_exe_path)
 
    # 呼叫cmd命令
    cmd = f'{real_time_exe_path} "{key}" "{merge_path}" "{endbs}"'
    # os.system(cmd)
    # wx_core_loger.info(f"合併實時資料庫命令:{cmd}")
    p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=merge_path_base,
                         creationflags=subprocess.CREATE_NO_WINDOW)
    out, err = p.communicate()  # 檢視返回值
    if out and out.decode("utf-8").find("SUCCESS") >= 0:
        wx_core_loger.info(f"合併實時資料庫成功{out}")
        return True, merge_path
    else:
        wx_core_loger.error(f"合併實時資料庫失敗\n{out}\n{err}")
        return False, (out, err)
 
 
@wx_core_error
def all_merge_real_time_db(key, wx_path, merge_path: str, real_time_exe_path: str = None):
    """
    合併所有實時資料庫
    注:這是全量合併,會有可能產生重複資料,需要自行去重
    :param key:  解密金鑰
    :param wx_path:  微信資料夾路徑 eg:C:\*****\WeChat Files\wxid*******
    :param merge_path:  合併後的資料庫路徑 eg: C:\\*******\\WeChat Files\\wxid_*********\\merge.db
    :param real_time_exe_path:  實時資料庫合併工具路徑
    :return:
    """
    if not merge_path or not key or not wx_path or not wx_path:
        return False, "msg_path or media_path or wx_path or key is required"
    try:
        from wxdump import get_core_db
    except ImportError:
        return False, "未找到模組 wxdump"
    db_paths = get_core_db(wx_path, CORE_DB_TYPE)
    if not db_paths[0]:
        return False, db_paths[1]
    db_paths = db_paths[1]
    code, ret = merge_real_time_db(key=key, merge_path=merge_path, db_paths=db_paths,
                                   real_time_exe_path=real_time_exe_path)
    if code:
        return True, merge_path
    else:
        return False, ret

相關文章