转载

使用shell自动化诊断性能问题(一)(r11笔记第41天)

一直以来要做性能分析的自动化工作,但是久久没有动笔,今天索性来更新一版。

首先我希望得到的一个基本效果就是后台去扫描数据库的DB time,如果超出了阈值,比如这里我设置的为400(即DB time为400%),则会开启自动诊断的任务。时间范围是提前一个小时和当前时间。我对已有的脚本做了一些改动,加了一些逻辑,后续还会不断完善。

DBTIME_THRESHOLD=400
DATE=`date '+%Y%m%d'`
BEGIN_HOUR=`date -d"1 hour ago" +"%H"`
END_HOUR=`date  +"%H"`

下面的函数会得到快照级别的DB time情况

function showsnap
{
sqlplus -s $DB_CONN_STR@$SH_DB_SID <<EOF
break on db_name
set pages 0
set feedback off
set linesize 100
col snapdate format a20
select
begin_snap
,end_snap
,snapdate
,round(((END_INTERVAL_TIME+0)-(BEGIN_INTERVAL_TIME+0 ))*24*60) duration_mins
,round((select round((sum(e.value) -
                        sum(b.value)) / 1000000 /60,2) dbtime
                        FROM DBA_HIST_SYS_TIME_MODEL e, DBA_HIST_SYS_TIME_MODEL b
                        WHERE
                         e.STAT_NAME = 'DB time'
                         and b.snap_id=begin_snap
                        and e.snap_id =end_snap
                        AND b.STAT_NAME = 'DB time'
                        group by e.snap_id,b.snap_id)) dbtime
from
(       
select
      di.db_name                                        db_name
     , s.snap_id                                         begin_snap
     ,lead(s.snap_id ,1,s.snap_id ) over(order by s.end_interval_time ) end_snap
     , to_char(s.end_interval_time,'dd Mon YYYY HH24:mi') snapdate
     , s.snap_level                                      lvl     
     ,s.end_interval_time
     ,s.begin_interval_time
  from dba_hist_snapshot s
     , dba_hist_database_instance di
 where
  ( di.dbid,di.instance_number) in
 (select d.dbid            dbid
     , i.instance_number inst_num
  from v/$database d,
       v/$instance i)
   and di.dbid             = s.dbid
   and di.instance_number  = s.instance_number
   and di.startup_time     = s.startup_time
   and to_char(END_INTERVAL_TIME,'yyyymmdd')='$1'
   and EXTRACT(HOUR FROM END_INTERVAL_TIME) between $2-1 and $3+1
 order by  instance_name, snap_id
 );  
EOF
}下面的函数会得到快照级别SQL的DB time占比图。
function showsnapsql
{
sqlplus -s $DB_CONN_STR@$SH_DB_SID <<EOF
break on db_name
set pages 50
set linesize 100
col elapsed_time format a10
col per_total format a10
select snap_id,sql_id,EXECUTIONS_DELTA,max_elapsed elapsed_time,per_total||'%' per_total from
(select distinct snap_id,sql_id,EXECUTIONS_DELTA,trunc(max(ELAPSED_TIME_DELTA) OVER (PARTITION BY snap_id,sql_id )/1000000,0)||'s' max_elapsed,
 trunc((max(ELAPSED_TIME_DELTA) OVER (PARTITION BY snap_id,sql_id))/(SUM(ELAPSED_TIME_DELTA) OVER (PARTITION BY snap_id )),2)*100 per_total
 from dba_hist_sqlstat where snap_id=$1
 order by 5 desc
) where rownum<=5;

EOF
}下面的函数会基于快照生成AWR报告。
function genawrhtml
{
awr_inputs=`sqlplus -s ${DB_CONN_STR}@${SH_DB_SID} <<EOF
SET FEEDBACK OFF
SET HEAD OFF
SET PAGES 0
select d.dbid||','||i.instance_number||','||$1||','||$2||',0' text
from v///$database d,
v///$instance i ;
EOF`
sqlplus -s ${DB_CONN_STR}@${SH_DB_SID} <<EOF
set pages 0
set linesize 1500
set termout on;
spool awrrpt_$1_$2.lst
select output from table(dbms_workload_repository.awr_report_html( ${awr_inputs}));
#select output from table(dbms_workload_repository.awr_report_html( `cat awr_inputs.lst`));
spool off;
set termout off;
clear columns sql;
EOF
}下面的是执行的主方法,当然还有待完善。
#MAIN 主方法

tmp_dbtime_snap=`showsnap $DATE $BEGIN_HOUR $END_HOUR|awk -v dbtime=$DBTIME_THRESHOLD '{if($8>=dbtime) print $0}' |tail -1`
echo $tmp_dbtime_snap

dbtime_snap=`echo $tmp_dbtime_snap|awk '{print $1" " $2}'`
echo $dbtime_snap
#得到快照级别的SQL占用DB time情况showsnapsql $dbtime_snap#生成基于DB time的AWR报告genawrhtml  $dbtime_snap上面的脚本执行很简单,无需输入任何参数。就会得到一个完整的数据报告。后续会通过邮件的形式来发送。后面会继续补充完善。



正文到此结束
Loading...