线上Oracle准备实现类似MySQL slow query的监控脚本,把查询时间超出定值的SQL定时的发送邮件告警,实现过程记录如下:
主要思路是通过DBA_HIST的几个视图来获取每小时快照中慢SQL的情况,为了不影响线上环境,这里把脚本部署在了自己的监控端,通过DBLINK定期的抓取线上生产库的数据到监控数据库,并简单的处理后获得csv格式的报表,发送报表至邮箱。
定时脚本 每小时查询一次
00 * * * * /opt/scripts/oracle/get_slow_query.sh
脚本内容如下
[oracle@59-Mysql-Test ~]$ cat /opt/scripts/oracle/get_slow_query.sh
#!/bin/bash
errlog="/opt/scripts/oracle/sqlerror.log"
sq_data="/opt/scripts/oracle/slow_query_data.xls"
check_file="/opt/scripts/oracle/slowsql_check.log"
send_mail_check="/opt/scripts/oracle/send_mail.chk"
export ORACLE_BASE=/u01/app/oracle
export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db_1
export ORACLE_SID=oramon
export PATH=/usr/sbin:$PATH
export PATH=/u01/app/oracle/product/11.2.0/db_1/bin:$PATH
export LD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib
export CLASSPATH=/u01/app/oracle/product/11.2.0/db_1/JRE:/u01/app/oracle/product/11.2.0/db_1/jlib:/u01/app/oracle/product/11.2.0/db_1/rdbms/jlib
cd /opt/scripts/oracle/
$ORACLE_HOME/bin/sqlplus -S sqmon/oracle @main > ${errlog}
cat ${errlog} | grep -v 'Call completed.' | grep -v '' > ${check_file}
[ -s ${check_file} ] && /bin/mail -s "Oracle slow query check error" xxx@xxx.com < ${check_file}
cat ${sq_data} | grep -v '<' >${send_mail_check}
[ -s ${send_mail_check} ] && /bin/mail -a ${sq_data} -s "OracleDB find slow query,please check" xxx@xxx.com,xxx@xxx.com
[oracle@59-Mysql-Test oracle]$ cat main.sql
call pro_get_slow_query();
set linesize 5000
set term off verify off feedback off pagesize 999
set markup html on entmap ON spool on preformat off
spool slow_query_data.xls
@get_tables.sql
spool off
exit
[oracle@59-Mysql-Test oracle]$ cat get_tables.sql
select sql_id,elapsed_time,cpu_time,iowait_time,gets,reads,rws,clwait_time,execs,elpe,machine,username,dbms_lob.substr(sqt,4000) from DBA_ORA_SLOW_QUERY where elpe > 10 and machine not in ('rac01','rac02');
存储过程pro_get_slow_query内容如下
CREATE OR REPLACE PROCEDURE SQMON.pro_get_slow_query
AS
BEGIN
/**********delete old data on sqltext*************/
delete from local_dba_hist_sqltextas;
commit;
insert into local_dba_hist_sqltextas select * from dba_hist_sqltext@dg2;
commit;
insert into DBA_ORA_SLOW_QUERY_HISTORY select a.*,sysdate from DBA_ORA_SLOW_QUERY;
commit;
delete from DBA_ORA_SLOW_QUERY;
commit;
/*
select * from DBA_ORA_SLOW_QUERY;
select * from DBA_ORA_SLOW_QUERY_HISTORY;
*/
/************insert new date ********************/
insert into DBA_ORA_SLOW_QUERY
select v_1.sql_id,
v_1.elapsed_time,
v_1.cpu_time,
v_1.iowait_time,
v_1.gets,
v_1.reads,
v_1.rws,
v_1.clwait_time,
v_1.execs,
v_1.elpe,
v_2.machine,
v_2.username,
v_1.sqt
from (select s.sql_id,
elapsed_time / 1000000 elapsed_time,
cpu_time / 1000000 cpu_time,
iowait_time / 1000000 iowait_time,
gets,
reads,
rws,
clwait_time / 1000000 clwait_time,
execs,
st.sql_text sqt,
elapsed_time / 1000000 / decode(execs, 0, null, execs) elpe
from (select *
from (select sql_id,
sum(executions_delta) execs,
sum(buffer_gets_delta) gets,
sum(disk_reads_delta) reads,
sum(rows_processed_delta) rws,
sum(cpu_time_delta) cpu_time,
sum(elapsed_time_delta) elapsed_time,
sum(clwait_delta) clwait_time,
sum(iowait_delta) iowait_time
from dba_hist_sqlstat@HUBSDG2
where snap_id >=
(select max(snap_id) - 1
from dba_hist_snapshot@DG2)
and snap_id <=
(select max(snap_id)
from dba_hist_snapshot@DG2)
group by sql_id
order by sum(elapsed_time_delta) desc)
where rownum <= 20) s,
local_dba_hist_sqltextas st
where st.sql_id = s.sql_id) v_1
left join (select distinct a.sql_id, a.machine, b.username
from dba_hist_active_sess_history@DG2 a
left join dba_users@DG2 b
on a.user_id = b.user_id
where a.snap_id >=
(select max(snap_id) - 1 from dba_hist_snapshot@DG2)
and a.snap_id <=
(select max(snap_id) from dba_hist_snapshot@DG2)) v_2
on v_1.sql_id = v_2.sql_id
order by elpe desc;
commit;
END;
/
上面标红部分的表不再列出,可以直接通过CTAS格式去创建即可。