python收集jvm資料

曲珂發表於2018-09-12

之前前輩用 java 寫的收集 jvm 指令碼, 不太方便組內小夥伴維護, 遂用 python 重寫了

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Filename:    jvm_monitor
# Description: collect jvm info
# Author:      quke
# Date:        2018/8/22


import base64
import datetime
import json
import logging.handlers
import os
import random
import re
import socket
import time
from subprocess import Popen, PIPE

import MySQLdb
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    )
console_handler = logging.StreamHandler()
file_handler = logging.handlers.RotatingFileHandler('jvm_monitor.log', maxBytes=10485760, backupCount=5)

logger = logging.getLogger(__name__)
logger.addHandler(file_handler)

hostname = socket.gethostname()


def run_command(cmd):
    process = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
    if stderr:
        logger.error('Excepiton with run %s:%s' % (cmd, stderr))
        raise SystemExit
    else:
        return stdout.strip('\n').split('\n')


def requests_retry(
        retries=3,
        backoff_factor=0.3,
        status_forcelist=(500, 502, 504),
        session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session


def execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db'):
    db = MySQLdb.connect(host, user, password, db)
    cursor = db.cursor()
    cursor.execute(sql)
    if 'insert' in sql or 'update' in sql:
        db.commit()
    ret = cursor.fetchall()
    cursor.close()
    db.close()
    return ret


def get_all_mixed_info():
    sql = 'select mixhost,module,alias from cmdb_mixed_relation'
    db_detail = execute_sql(sql, host='192.168.1.1', user='user', password='password', db='db')
    ret = {}
    for obj in db_detail:
        hostname, modulename, alias = obj
        ret.setdefault(hostname, {}).update({modulename: alias})

    return ret


def get_java_module(args):
    cur_dir = '/apps'
    for d in os.listdir(cur_dir):
        if os.path.isdir(os.path.join(cur_dir, d)):
            if 'java' in d or 'boot' in d or 'tomcat' in d or 'mycat' in d:
                if d in args:
                    return d


def get_alias(module_name):
    all_alias = get_all_mixed_info()
    alias = all_alias.get(hostname, {}).get(module_name)
    alias = alias if alias else 'null'
    return alias


def get_gc_collector_name(line):
    for gc in ['UseParNewGC', 'UseG1GC', 'UseSerialGC', 'UseParallelGC']:
        if gc in line:
            ygc = gc
            break
    else:
        ygc = 'ParNew'

    for gc in ['UseConcMarkSweepGC', 'UseG1GC', 'UseParallelOldGC', 'UseSerialGC']:
        if gc in line:
            ogc = gc
            break
    else:
        ogc = 'CMS'

    return ygc, ogc


def get_start_time(pid):
    ret = run_command('ps -o lstart -p %s' % pid)
    start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(ret[1], '%a %b %d %H:%M:%S %Y'))
    return start_time


def get_jstat_info(pid):
    ret = run_command('jstat -gc %s' % pid)
    rc = re.compile(
        r'(?P<s0c>[0-9.]+)\s+(?P<s1c>[0-9.]+)\s+(?P<s0u>[0-9.]+)\s+(?P<s1u>[0-9.]+)\s+(?P<ec>[0-9.]+)\s+(?P<eu>[0-9.]+)\s+(?P<oc>[0-9.]+)\s+(?P<ou>[0-9.]+)\s+(?P<pc>[0-9.]+)\s+(?P<pu>[0-9.]+)\s+(?P<jvmYgc>[0-9.]+)\s+(?P<jvmYgct>[0-9.]+)\s+(?P<jvmFgc>[0-9.]+)\s+(?P<jvmFgct>[0-9.]+)\s+(?P<jvmGct>[0-9.]+)')
    gc_statistics = rc.match(ret[1]).groupdict()
    return gc_statistics


def get_thread_count(pid):
    ret = run_command('jstat -snap %s' % pid)
    active_thread_count = ret[-3].split('=')[1]
    total_thread_count = ret[-1].split('=')[1]
    return active_thread_count, total_thread_count


def get_jvm_info():
    instances = []
    ret = run_command('jps -mlv')
    for line in ret:
        if line and 'sun.tools.jps.Jps' not in line and 'com.lagou.jmonitor.AgentWatcher' not in line:
            module = get_java_module(line)
            alias = hostname if module in hostname else get_alias(module)

            if 'null' == alias:
                logger.error('[%s] can not get mixed module alias name , continue' % module)
                continue

            ygc, ogc = get_gc_collector_name(line)
            instances_list = line.split(' ')
            pid = instances_list[0]
            start_time = get_start_time(pid)
            gc_statistics = get_jstat_info(pid)
            active_thread_count, total_thread_count = get_thread_count(pid)
            main_function = instances_list[1]
            main_args = ' '.join(instances_list[2:])
            instances.append(
                dict(
                    pid=pid,
                    module=module,
                    alias=alias,
                    start_time=start_time,
                    gc_statistics=gc_statistics,
                    active_thread_count=active_thread_count,
                    total_thread_count=total_thread_count,
                    ygc=ygc,
                    ogc=ogc,
                    main_function=main_function,
                    main_args=main_args
                )
            )
    return instances


def push_to_oss(jvm):
    modulename = jvm.get('module')
    hostname = jvm.get('alias')
    pid = jvm.get('pid')
    mainclassname = jvm.get('main_function')
    vmparam = jvm.get('main_args')
    updated = jvm.get('start_time')

    gclist = json.dumps(
        [dict(useTime=jvm['gc_statistics']['jvmYgct'], name=jvm['ygc'], times=jvm['gc_statistics']['jvmYgc']),
         dict(useTime=jvm['gc_statistics']['jvmFgct'], name=jvm['ogc'], times=jvm['gc_statistics']['jvmFgc'])])

    fgcygc = json.dumps(dict(jvmFgc=jvm['gc_statistics']['jvmFgc'],
                             jvmYgc=jvm['gc_statistics']['jvmYgc'],
                             jvmFgct=jvm['gc_statistics']['jvmFgct'],
                             jvmYgct=jvm['gc_statistics']['jvmYgct'], ))

    get_hostnames_sql = 'select hostname,modulename from jvmmonitordata where modulename="%s"' % modulename
    ignore_hostname_ne_modulename = 'select hostname from jvmmonitordata where hostname="%s"' % hostname
    logger.info('execute sql :%s' % get_hostnames_sql)

    is_existing = False
    for obj in execute_sql(get_hostnames_sql):
        if hostname in obj:
            is_existing = True

    for obj in execute_sql(ignore_hostname_ne_modulename):
        if hostname in obj:
            is_existing = True

    if is_existing:
        update_jvmmonitordata_sql = "update jvmmonitordata set pid=%d,gclist='%s',fgcygc='%s' where hostname='%s'" % (
            int(pid), gclist, fgcygc, hostname)
        logger.info('execute sql :%s' % update_jvmmonitordata_sql)
        execute_sql(update_jvmmonitordata_sql)
    else:
        insert_jvmmonitordata_sql = "insert into jvmmonitordata(hostname,modulename,mainclassname,pid,vmparam,gclist,updated,fgcygc) values ('%s','%s','%s',%d,'%s','%s','%s','%s')" % (
            hostname, modulename, mainclassname, int(pid), vmparam, gclist, updated, fgcygc)
        logger.info('execute sql :%s' % insert_jvmmonitordata_sql)
        execute_sql(insert_jvmmonitordata_sql)


def get_hbase_svr():
    hbase_list = ["http://192.168.100.1:8080", "http://192.168.100.2:8080", "http://192.168.100.3:8080"]
    hbase_url = None
    retry = 10
    while retry > 0:
        hbase_url = random.choice(hbase_list)
        try:
            r = requests.head(hbase_url, timeout=2)
        except:
            logger.info("connect" + hbase_url + "error, try another")
        else:
            if r.status_code == 200:
                break
        retry -= 1
        if retry == 0:
            logger.error("connect hbase failed with 10 times")
    return hbase_url


def build_hbase_data(jvm):
    hostName = jvm['alias']
    jvmEc = float(jvm['gc_statistics']['ec']) * 1000
    jvmEu = float(jvm['gc_statistics']['eu']) * 1000
    jvmOc = float(jvm['gc_statistics']['oc']) * 1000
    jvmOu = float(jvm['gc_statistics']['ou']) * 1000
    jvmPc = float(jvm['gc_statistics']['pc']) * 1000
    jvmPu = float(jvm['gc_statistics']['pu']) * 1000
    jvmSc = (float(jvm['gc_statistics']['s0c']) + float(jvm['gc_statistics']['s1c'])) * 1000
    jvmSu = (float(jvm['gc_statistics']['s0u']) + float(jvm['gc_statistics']['s1u'])) * 1000
    totalThreadCount = int(jvm['total_thread_count'])
    activeThreadCount = int(jvm['active_thread_count'])

    return dict(
        hostName=hostName,
        jvmEc=int(jvmEc),
        jvmEu=int(jvmEu),
        jvmOc=int(jvmOc),
        jvmOu=int(jvmOu),
        jvmPc=int(jvmPc),
        jvmPu=int(jvmPu),
        jvmSc=int(jvmSc),
        jvmSu=int(jvmSu),
        totalThreadCount=totalThreadCount,
        activeThreadCount=activeThreadCount,
    )


def jvm_hbase_constructor(jvm):
    """jvm hbase 資料構造器"""
    data = build_hbase_data(jvm)
    rows = []
    json_rows = {"Row": rows}
    row_key = base64.b64encode(data['hostName'] + ":" + datetime.datetime.now().strftime('%Y%m%d%H%M'))
    cell = []
    for column in ['jvmEc', 'jvmEu', 'jvmOc', 'jvmOu', 'jvmPc', 'jvmPu', 'jvmSc', 'jvmSu',
                   'totalThreadCount', 'activeThreadCount']:
        cell.append({"column": base64.b64encode('jvm' + ":" + column), "$": base64.b64encode(str(data[column]))})
    rows.append({'key': row_key, 'Cell': cell})
    return row_key, json_rows


def push_to_hbase(jvm):
    table_name = 'jvm'
    try:
        row_key, json_rows = jvm_hbase_constructor(jvm)
    except Exception as e:
        logger.error("construct hbase data error %s" % str(e))
    else:
        for i in range(10):
            hbase_url = get_hbase_svr()
            try:
                response = requests.post(hbase_url + '/' + table_name + '/' + row_key, data=json.dumps(json_rows),
                                         headers={"Content-Type": "application/json", "Accept": "application/json"},
                                         timeout=60)
                if response.status_code == 200:
                    break
            except:
                pass
            if i == 9:
                logger.error("try to save hbase failed with 10 times,exit")


def push_data(jvm_infos):
    for jvm in jvm_infos:
        push_to_oss(jvm)
        push_to_hbase(jvm)


if __name__ == '__main__':
    jvm_infos = get_jvm_info()
    push_data(jvm_infos)

 

相關文章