巡检项:
cpu使用率,内存使用率,文件系统使用率,日志文件大小,oracle后台日志(有无异常),oracle连接数使用情况,oracle表空间使用率,asm存储使用率,失效索引,rman备份日志(有无异常)
主巡检脚本:Oracheck.sh
[oracle@orcl scripts]$ cat oracheck.sh #!/bin/sh # Get Hostname DB_CONN_STR=system/oracle source /home/oracle/.bash_profile logfile=oracheck_`date +%y%m%d`.log hostinfodir=/home/oracle/oracheck DB_UNIQUE_NAME=orcl if [[ ! -d $hostinfodir ]] then mkdir -p $hostinfodir fi echo -e "==== Oracheck started at $(date +"%y-%m-%d %H:%M:%S") ===\n" >> $hostinfodir/$logfile echo -e "*** step1.CPU使用率检查 \n" >> $hostinfodir/$logfile sar 1 10 >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step2.内存使用率检查 \n" >> $hostinfodir/$logfile free -g >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step3.文件系统使用率检查 \n" >> $hostinfodir/$logfile df -h >>$hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step4.日志文件大小检查 \n" >> $hostinfodir/$logfile echo -e "trace目录(trc,trm,alert_sid):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/trace >>$hostinfodir/$logfile echo -e "alert目录(log_*.xml):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/alert >>$hostinfodir/$logfile echo -e "监听日志(listener.log):" >> $hostinfodir/$logfile du -sh $ORACLE_HOME/network/admin/listener.ora >>$hostinfodir/$logfile echo -e "审计目录(adump):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/admin/$DB_UNIQUE_NAME/adump >>$hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step5.oracle后台日志检查 \n" >> $hostinfodir/$logfile tail -3000 $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/trace/alert_$ORACLE_SID.log |grep ORA- >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step6.oracle连接数检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/process_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step7.oracle表空间使用率检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/tbs_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step8.asm存储使用率检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/asm_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step9.失效索引检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/invalid_index_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step10.rman备份检查 \n" >> $hostinfodir/$logfile echo -e "rman备份日志:" >> $hostinfodir/$logfile # cat /backup/logs/rman`date +%Y%m%d`.log |grep ORA- >> $hostinfodir/$logfile echo -e "rman备份集校验日志:" >> $hostinfodir/$logfile # cat /backup/logs/rman_validate`$date+%Y%m%d`.log |grep ORA- >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "==== Oracheck ended at $(date +"%y-%m-%d %H:%M:%S") ===\n" >> $hostinfodir/$logfile exit 0
process_check.sh
[oracle@orcl scripts]$ cat process_check.sh
sqlplus -s $DB_CONN_STR <<EOF
set linesize 300
col resource_name format a15
select resource_name,current_utilization,limit_value, trunc(current_utilization * 100 / limit_value) Result_Number
from v\$resource_limit
where resource_name in ('processes');
EOF
exit
tbs_check.sh
[oracle@orcl scripts]$ cat tbs_check.sh
sqlplus -s $DB_CONN_STR <<EOF
set linesize 300
col tablespace_name format a20
SELECT DF.TABLESPACE_NAME,
COUNT(*) DATAFILE_COUNT,
ROUND(SUM(DF.BYTES) / 1048576 / 1024, 2) SIZE_GB,
ROUND(SUM(FREE.BYTES) / 1048576 / 1024, 2) FREE_GB,
ROUND(SUM(DF.BYTES) / 1048576 / 1024 -
SUM(FREE.BYTES) / 1048576 / 1024,
2) USED_GB,
ROUND(MAX(FREE.MAXBYTES) / 1048576 / 1024, 2) MAXFREE,
100 - ROUND(100.0 * SUM(FREE.BYTES) / SUM(DF.BYTES), 2) PCT_USED,
ROUND(100.0 * SUM(FREE.BYTES) / SUM(DF.BYTES), 2) PCT_FREE
FROM DBA_DATA_FILES DF,
(SELECT TABLESPACE_NAME,
FILE_ID,
SUM(BYTES) BYTES,
MAX(BYTES) MAXBYTES
FROM DBA_FREE_SPACE
WHERE BYTES > 1024 * 1024
GROUP BY TABLESPACE_NAME, FILE_ID) FREE
WHERE DF.TABLESPACE_NAME = FREE.TABLESPACE_NAME(+)
AND DF.FILE_ID = FREE.FILE_ID(+)
GROUP BY DF.TABLESPACE_NAME
ORDER BY 8;
EOF
exit
asm_check.sh
[oracle@orcl scripts]$ cat asm_check.sh sqlplus -s $DB_CONN_STR <<EOF select name,state,total_mb,free_mb,usable_file_mb from v\$asm_diskgroup; EOF exit
invalid_index_check.sh
[oracle@orcl scripts]$ cat invalid_index_check.sh
sqlplus -s $DB_CONN_STR <<EOF
select owner, index_name, status
from dba_indexes where status not in ('VALID','N/A')
order by 1, 2;
select i.owner, i.index_name, p.partition_name, p.status
from dba_ind_partitions p, dba_indexes i
where p.index_name = i.index_name
and p.index_owner=i.owner
and p.status != 'USABLE'
order by 1, 2, 3;
select i.owner, i.index_name, s.subpartition_name, s.status
from dba_ind_subpartitions s, dba_indexes i
where s.index_name = i.index_name
and s.index_name = i.index_name
and s.status != 'USABLE'
order by 1, 2, 3;
EOF
exit
rman_validate.sh(校验前一天的备份集)
#!/bin/bash
# +-----------------------------------------------------------------------+
# | EXPORT ENVIRONMENT VARIABLE OF ORACLE USER |
# +-----------------------------------------------------------------------+
source ~/.bash_profile;
# +-----------------------------------------------------------------------+
# | GLOBAL VARIABLES ABOUT THE ABSOLUTE PATH OF THE SHELL COMMAND |
# +-----------------------------------------------------------------------+
export AWK=`which awk`
export DATE=`which date`
export ECHO=`which echo`
# +-----------------------------------------------------------------------+
# | GLOBAL VARIABLES ABOUT STRINGS AND BACKTICK EXECUTION RESULT OF SHELL |
# +-----------------------------------------------------------------------+
export BACK_LOG=~/backup/logs
export RMAN=$ORACLE_HOME/bin/rman
export SQLPLUS=$ORACLE_HOME/bin/sqlplus
export YESTERDAY=`$DATE +%Y-%m-%d -d yesterday`
export DAY_OF_WEEK=`$DATE +%u`
export BSKEY_LIST=
export BSKEY_LIST_WITH_COMMA=
# +-----------------------------------------------------------------------+
# | QUERY ALL OF BS_KEY VALUE OF RMAN BACKUPSET YESTERDAY INTO BSKEY_LIST |
# +-----------------------------------------------------------------------+
BSKEY_LIST=`
$SQLPLUS -S /nolog << EOF
connect / as sysdba
set echo off feedback off heading off underline off
select bs_key from v\\$backup_set_details where device_type='DISK' and completion_time > to_date('$YESTERDAY','yyyy-mm-dd') order by 1;
exit;
EOF`
# +-----------------------------------------------------------------------+
# | WITH AWK COMMAND TO PROCESS BSKEY_LIST SAVE TO BSKEY_LIST_WITH_COMMA |
# +-----------------------------------------------------------------------+
BSKEY_LIST_WITH_COMMA=`$ECHO $BSKEY_LIST | $AWK -F' ' '{ for ( i=1; i<NF; i++ ) print $i","; print $NF }'`
# +-----------------------------------------------------------------------+
# | VALIDATE RMAN BACKUPSET THAT IS GENERATED LAST NIGHT |
# +-----------------------------------------------------------------------+
case $DAY_OF_WEEK in
6)
$RMAN nocatalog log $BACK_LOG/rman_validate`$DATE +%Y%m%d`.log <<EOF
connect target /
run {
allocate channel d1 type disk maxpiecesize 16g;
allocate channel d2 type disk maxpiecesize 16g;
allocate channel d3 type disk maxpiecesize 16g;
allocate channel d4 type disk maxpiecesize 16g;
validate backupset $BSKEY_LIST_WITH_COMMA check logical;
release channel d4;
release channel d3;
release channel d2;
release channel d1;
}
exit;
EOF
;;
1|2|3|4|5|7)
$RMAN nocatalog log $BACK_LOG/rman_validate`$DATE +%Y%m%d`.log <<EOF
connect target /
validate backupset $BSKEY_LIST_WITH_COMMA check logical;
exit;
EOF
;;
esac
巡检结果:
==== Oracheck started at 17-11-18 14:15:20 ===
*** step1.CPU使用率检查
Linux 2.6.32-431.el6.x86_64 (orcl) 11/18/2017 _x86_64_ (1 CPU)
02:15:20 PM CPU %user %nice %system %iowait %steal %idle
02:15:21 PM all 0.00 0.00 1.01 1.01 0.00 97.98
02:15:22 PM all 0.00 0.00 1.02 0.00 0.00 98.98
02:15:23 PM all 0.00 0.00 1.03 0.00 0.00 98.97
02:15:24 PM all 1.03 0.00 1.03 0.00 0.00 97.94
02:15:25 PM all 0.00 0.00 3.03 0.00 0.00 96.97
02:15:26 PM all 0.00 0.00 2.08 0.00 0.00 97.92
02:15:27 PM all 0.00 0.00 3.09 1.03 0.00 95.88
02:15:28 PM all 1.02 0.00 3.06 0.00 0.00 95.92
02:15:29 PM all 0.00 0.00 2.08 0.00 0.00 97.92
02:15:30 PM all 1.02 0.00 3.06 0.00 0.00 95.92
Average: all 0.31 0.00 2.05 0.21 0.00 97.44
*** step2.内存使用率检查
total used free shared buffers cached
Mem: 1 1 0 0 0 1
-/+ buffers/cache: 0 1
Swap: 3 0 3
*** step3.文件系统使用率检查
Filesystem Size Used Avail Use% Mounted on
/dev/sda3 36G 22G 12G 65% /
tmpfs 932M 266M 666M 29% /dev/shm
/dev/sda1 194M 34M 151M 19% /boot
*** step4.日志文件大小检查
trace目录(trc,trm,alert_sid):
1.6M /u01/app/oracle/diag/rdbms/orcl/orcl/trace
alert目录(log_*.xml):
592K /u01/app/oracle/diag/rdbms/orcl/orcl/alert
监听日志(listener.log):
4.0K /u01/app/oracle/product/11.2.0/db_1/network/admin/listener.ora
审计目录(adump):
820K /u01/app/oracle/admin/orcl/adump
*** step5.oracle后台日志检查
ORA-1109 signalled during: ALTER DATABASE CLOSE NORMAL...
ORA-00313: open failed for members of log group 1 of thread 1
ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 1 of thread 1
ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 1 of thread 1
ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 2 of thread 1
ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 2 of thread 1
ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 2 of thread 1
ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 3 of thread 1
ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 3 of thread 1
ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log'
ORA-27037: unable to obtain file status
ORA-00313: open failed for members of log group 3 of thread 1
ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log'
ORA-27037: unable to obtain file status
*** step6.oracle连接数检查
RESOURCE_NAME CURRENT_UTILIZATION LIMIT_VALUE RESULT_NUMBER
--------------- ------------------- -------------------- -------------
processes 34 150 22
*** step7.oracle表空间使用率检查
TABLESPACE_NAME DATAFILE_COUNT SIZE_GB FREE_GB USED_GB MAXFREE PCT_USED PCT_FREE
-------------------- -------------- ---------- ---------- ---------- ---------- ---------- ----------
SYSTEM 1 .74 .01 .74 .01 99.08 .92
SYSAUX 1 .57 .03 .54 .03 94.83 5.17
USERS 1 0 0 0 0 26.25 73.75
UNDOTBS1 2 .17 .14 .03 .08 17.06 82.94
TBS_DATA 1 .2 .19 .01 .19 3 97
*** step8.asm存储使用率检查
no rows selected
*** step9.失效索引检查
no rows selected
no rows selected
no rows selected
*** step10.rman备份检查
rman备份日志:
rman备份集校验日志:
==== Oracheck ended at 17-11-18 14:15:30 ===


Asynq任务框架
MCP智能体开发实战
WEB架构
安全监控体系





