• test


    ===================================================== 数据库异常处理 ======================================================================
    一、当前情况
    1、监控当前TOPAS
    topas

    2、监控当前等待事件
    set pagesize 999
    set linesize 300
    col event format a66
    col wait_class format a30
    select event,count(*),wait_class from gv$session group by event,wait_class order by 2;

    3、通过等待事件检查会话
    col machine format a32
    col event format a32
    col program format a32
    set linesize 555
    select sid,machine,program,sql_id,event,p1,p2 from v$session where event='&event_list' order by sql_id;

    4、监控当前的数据库会话信息
    set pagesize 999
    set lines 666
    col username format a13
    col prog format a10 trunc
    col sql_text format a41 trunc
    col sid format 999999
    col child for 999999
    col avg_etime for 999,999,.99
    col EXECS for 99999999999
    select sid,
    serial#,
    status,
    username,
    substr(program,1,19) prog,
    address,
    hash_value,
    b.sql_id,
    child_number child,
    plan_hash_value,
    executions execs,
    (elapsed_time / decode(nvl(executions,0),0,1,executions)) / 1000000 avg_etime,
    sql_text
    from v$session a,v$sql b
    where status='ACTIVE'
    and username is not null
    and a.sql_id=b.sql_id
    and a.sql_child_number=b.child_number
    and sql_text not like
    '%select sid,serial#,username,substr(program,1,19) prog,%' --don't show this query
    order by sql_id,sql_child_number
    /

    5、根据以上查询的检查异常SQL_ID,主要检查 PLAN_HASH_VALUE
    set lines 155
    col execs for 999,999,999
    col avg_etime for 999,999.999
    col avg_lio for 999,999,999.9
    col begin_interval_time for a30
    col node for 99999
    break on plan_hash_value on startup_time skip 1
    select ss.snap_id,
    ss.instance_number node,
    begin_interval_time,
    sql_id,
    plan_hash_value,
    nvl(executions_delta,0) execs,
    (elapsed_time_delta /
    decode(nvl(executions_delta,0),0,1,executions_delta))/1000000 avg_etime,
    (buffer_gets_delta /
    decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio,
    (disk_reads_delta/
    decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_pio
    from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
    where sql_id='&sql_id'
    and ss.snap_id=S.snap_id
    and ss.instance_number=S.instance_number
    and s.instance_number like nvl('&instance_number',s.instance_number)
    and executions_delta>0
    order by 1,2,3
    /

    6、通过上面的返回查看执行计划是否改变,检查执行计划
    set linesize 300
    select * from table(DBMS_XPLAN.DISPLAY_CURSOR(to_char('&sql_id'),NULL));

    二、非当前情况
    1、检查快照信息
    select INSTANCE_NUMBER,SNAP_ID,BEGIN_INTERVAL_TIME from DBA_HIST_SNAPSHOT where BEGIN_INTERVAL_TIME>sysdate-1 order by SNAP_ID;

    ##语句在时间段内的消耗情况(snap_id)
    select /*+parallel(t,4)*/
    plan_hash_value,
    buffer_gets_delta/executions_delta get_exec,
    disk_reads_delta/executions_delta read_exec,
    cpu_time_delta/executions_delta/1000 cpu_exec_ms ,
    elapsed_time_delta/executions_delta/1000 elaps_exec_ms ,
    parsing_schema_id,
    ROWS_PROCESSED_delta/executions_delta rows_exec,
    executions_delta
    from dba_hist_sqlstat t
    where sql_id = '1mgfnuxggrfj0'
    and instance_number = 1
    and SNAP_ID between 8978 and 8988
    and EXECUTIONS_DELTA > 0;

    回放故障期间数据库层等待事件的情况:
    col event for a35
    set lines 222
    set pages 9999
    col p1 for 999999999999999999999999999999
    select instance_number,sample_id,sample_time,event,count(*) from dba_hist_active_sess_history
    where sample_time >
    to_timestamp('2018-08-02 11:00:00','yyyy-mm-dd hh24:mi:ss')
    and sample_time <
    to_timestamp('2018-08-02 11:55:00','yyyy-mm-dd hh24:mi:ss')
    --and event ='change tracking file synchronous read'
    group by instance_number,sample_id,sample_time,event
    order by 1,3,5;

    col event for a35
    set lines 222
    set pages 9999
    col p1 for 999999999999999999999999999999
    select sample_id,sample_time,event,count(*) from gv$active_session_history;
    where INST_ID=2 and event is not null
    and sample_id between 43715229 and 43715911
    group by sample_id,sample_time,event
    order by 1,4;


    2、按照快照检查SQL执行信息
    set lines 155
    col execs for 999,999,999
    col min_etime for 999,999.99
    col max_etime for 999,999.99
    col avg_etime for 999,999.999
    col avg_lio for 999,999,999.9
    col norm_stddev for 999,999.9999
    col begin_interval_time for a30
    col node for 99999
    break on plan_hash_value on startup_time skip 1
    select * from
    (select sql_id,
    sum(execs),
    min(avg_etime) min_etime,
    max(avg_etime) max_etime,
    stddev_etime / min(avg_etime) norm_stddev
    from (select sql_id,
    plan_hash_value,
    execs,
    avg_etime,
    stddev(avg_etime) over(partition by sql_id) stddev_etime
    from (select sql_id,
    plan_hash_value,
    sum(nvl(executions_delta,0)) execs,
    (sum(executions_delta)/
    decode(sum(nvl(executions_delta,0)),
    0,
    1,
    sum(executions_delta)) /1000000) avg_etime
    --,sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio
    from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
    where ss.snap_id=s.snap_id
    and ss.instance_number=s.instance_number
    and executions_delta>0
    and elapsed_time_delta>0
    and s.snap_id > nvl('&start_snap_id',0)
    and s.snap_id < nvl('&last_snap_id',0)
    group by sql_id,plan_hash_value))
    group by sql_id,stddev_etime)
    where max_etime>0.01
    order by 4
    /

    3、根据时间检查SQL的信息 (可以修改default的值 当前为0.5天)
    alter session set nls_date_format='yyyy-mm-dd hh24:mi:ss';

    accept days_ago -
    prompt 'Enter Days ago: ' -
    default '1'

    set lines 155
    col execs for 999,999,999
    col before_etime for 999,990.99
    col after_etime for 999,990.99
    col before_avg_etime for 999,990.99 head AVG_ETIME_BEFORE
    col after_avg_etime for 999,990.99 head AVG_ETIME_AFTER
    col min_etime for 999,990.99
    col max_etime for 999,990.99
    col avg_etime for 999,990.999
    col avg_lio for 999,999,990.9
    col norm_stddev for 999,990.9999
    col begin_interval_time for a30
    col node for 99999
    break on plan_hash_value on startup_time skip 1
    select * from (
    select sql_id, execs, before_avg_etime, after_avg_etime, norm_stddev,
    case when to_number(before_avg_etime) < to_number(after_avg_etime) then 'Slower' else 'Faster' end result
    -- select *
    from (
    select sql_id, sum(execs) execs, sum(before_execs) before_execs, sum(after_execs) after_execs,
    sum(before_avg_etime) before_avg_etime, sum(after_avg_etime) after_avg_etime,
    min(avg_etime) min_etime, max(avg_etime) max_etime, stddev_etime/min(avg_etime) norm_stddev,
    case when sum(before_avg_etime) > sum(after_avg_etime) then 'Slower' else 'Faster' end better_or_worse
    from (
    select sql_id,
    period_flag,
    execs,
    avg_etime,
    stddev_etime,
    case when period_flag = 'Before' then execs else 0 end before_execs,
    case when period_flag = 'Before' then avg_etime else 0 end before_avg_etime,
    case when period_flag = 'After' then execs else 0 end after_execs,
    case when period_flag = 'After' then avg_etime else 0 end after_avg_etime
    from (
    select sql_id, period_flag, execs, avg_etime,
    stddev(avg_etime) over (partition by sql_id) stddev_etime
    from (
    select sql_id, period_flag, sum(execs) execs, sum(etime)/sum(decode(execs,0,1,execs)) avg_etime from (
    select sql_id, 'Before' period_flag,
    nvl(executions_delta,0) execs,
    (elapsed_time_delta)/1000000 etime
    -- sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta))) avg_lio
    from DBA_HIST_SQLSTAT S, DBA_HIST_SNAPSHOT SS
    where ss.snap_id = S.snap_id
    and ss.instance_number = S.instance_number
    and executions_delta > 0
    and elapsed_time_delta > 0
    and ss.begin_interval_time <= sysdate-&&days_ago
    union
    select sql_id, 'After' period_flag,
    nvl(executions_delta,0) execs,
    (elapsed_time_delta)/1000000 etime
    -- (elapsed_time_delta)/decode(nvl(executions_delta,0),0,1,executions_delta)/1000000 avg_etime
    -- sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta))) avg_lio
    from DBA_HIST_SQLSTAT S, DBA_HIST_SNAPSHOT SS
    where ss.snap_id = S.snap_id
    and ss.instance_number = S.instance_number
    and executions_delta > 0
    and elapsed_time_delta > 0
    and ss.begin_interval_time > sysdate-&&days_ago
    -- and s.snap_id > 7113
    )
    group by sql_id, period_flag
    )
    )
    )
    group by sql_id, stddev_etime
    )
    )
    where after_avg_etime>0.01
    order by norm_stddev
    /

    4、根据以上查询的检查异常SQL_ID,主要检查PLAN_HASH_VALUE
    set lines 155
    col execs for 999,999,999
    col avg_etime for 999,999.999
    col avg_lio for 999,999,999.9
    col begin_interval_time for a30
    col node for 99999
    break on plan_hash_value on startup_time skip 1
    select ss.snap_id,
    ss.instance_number node,
    begin_interval_time,
    sql_id,
    plan_hash_value,
    nvl(executions_delta,0) execs,
    (elapsed_time_delta/
    decode(nvl(executions_delta,0),0,1,executions_delta))/1000000 avg_etime,
    (buffer_get_delta/
    decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio,
    (disk_reads_delta/
    decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_pio
    from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
    where sql_id-'&sql_id'
    and ss.snap_id=s.snap_id
    and ss.instance_number=s.instance_number
    and s.instance_number like '&instance_number'
    and executions_delta > 0
    order by 1,2,3
    /

    5、通过上面的返回查看执行计划是否改变,检查执行计划
    set pagesize 999
    set linesize 380
    select * from table(dbms_xplan.display_cursor(to_char('&sql_id'),NULL);

    6、根据快照ID查询SQL_ID的来源信息(查所有的视图太大)
    set linesize 555
    set pagesize 999
    col event format a40
    col program format a30
    col wait_class format a20
    select snap_id,instance_number,session_id,sql_id,event,wait_class,p1,p2,p3,blocking_session,blocking_inst_id,program
    from dba_hist_active_sess_history
    where snap_id > &start_snap_id
    and snap_id < &last_snap_id
    and sql_id = &sql_id
    order by snap_id,program;

    ===================================================== 等待事件 ======================================================================
    ##利用触发器记录每个会话的等待事件
    在数据库上部署了这个脚本,就能够及时回顾以发现每一个前台进程在数据库中所作的工作。

    create table system.session_event_history
    tablespace <name>
    storage (freelist groups <value>)
    initrans <value>
    as
    select b.sid,
    b.serial#,
    b.username,
    b.osuser,
    b.paddr,
    b.process,
    b.logon_time,
    b.type,
    a.event,
    a.total_waits,
    a.total_timeouts,
    a.time_waited,
    a.average_wait,
    a.max_wait,
    sysdate as logoff_timestamp
    from v$session_event a, v$session b
    where 1 = 2;

    create table system.sesstat_history
    tablespace < name >
    storage (freelist groups <value>)
    initrans <value>
    as
    select c.username,
    c.osuser,
    a.sid,
    c.serial#,
    c.paddr,
    c.process,
    c.logon_time,
    a.statistic#,
    b.name,
    a.value,
    sysdate as logoff_timestamp
    from v$sesstat a, v$statname b, v$session c
    where 1 = 2;

    create or replace trigger sys.logoff_trig
    before logoff on database
    declare
    logoff_sid pls_integer;
    logoff_time date := sysdate;
    begin
    select sid into logoff_sid from v$mystat where rownum < 2;

    insert into system.session_event_history
    (sid,
    serial#,
    username,
    osuser,
    paddr,
    process,
    logon_time,
    type,
    event,
    total_waits,
    total_timeouts,
    time_waited,
    average_wait,
    max_wait,
    logoff_timestamp)
    select a.sid,
    b.serial#,
    b.username,
    b.osuser,
    b.paddr,
    b.process,
    b.logon_time,
    b.type,
    a.event,
    a.total_waits,
    a.total_timeouts,
    a.time_waited,
    a.average_wait,
    a.max_wait,
    logoff_time
    from v$session_event a, v$session b
    where a.sid = b.sid
    and b.username = login_user
    and b.sid = logoff_sid;

    insert into system.sesstat_history
    (username,
    osuser,
    sid,
    serial#,
    paddr,
    process,
    logon_time,
    statistic#,
    name,
    value,
    logoff_timestamp)
    select c.username,
    c.osuser,
    a.sid,
    c.serial#,
    c.paddr,
    c.process,
    c.logon_time,
    a.statistic#,
    b.name,
    a.value,
    logoff_time
    from v$sesstat a, v$statname b, v$session c
    where a.statistic# = b.statistic#
    and a.sid = c.sid
    and b.name in ('CPU used when call started', --调用开始时使用的CPU时间。
    'CPU used by this session', --会话自用户调用起至结束期间使用CPU时间的总量。
    'recursive cpu usage', --非用户调用(递归调用)使用的CPU时间总量。
    'parse time cpu') --(硬和软)解析使用的CPU时间的总量。
    and c.sid = logoff_sid
    and c.username = login_user;
    end;
    /

    ##某段时间等待事件情况
    select /*+ parallel(d,16) */sql_id,event,count(*) from dba_hist_active_sess_history d
    where d.event in ('db file sequential read','gc buffer busy acquire','read by other session','latch: cache buffers chains') and d.dbid=1714394454
    and d.sample_time between to_date('2017-03-01 07:00:00','yyyy-mm-dd hh24:mi:ss') and to_date('2017-03-01 09:00:00','yyyy-mm-dd hh24:mi:ss')
    and d.instance_number=1 group by sql_id,event order by count(*),sql_id,event;

    ##等待事件对应的p1,p2,p3含义
    col name for a25;
    col p1 for a10;
    col p2 for a10;
    col p3 for a10;
    SELECT NAME, PARAMETER1 P1, PARAMETER2 P2, PARAMETER3 P3
    FROM V$EVENT_NAME
    WHERE NAME = '&event_name';

    --sql执行历史情况查询
    --v$active_session_history是动态的,ASH保存在内存中,每1秒从v$session_wait中取一次,ASH通常用来分析当前的性能问题,而dba_hist_active_sess_history则是ASH以及AWR采集数据的一个总览表,通常可以通过该视图进行历史数据的访问
    --BLOCKING_SESSION是指引起等待的session_id
    select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,SESSION_STATE,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
    SAMPLE_TIME>to_date('20130825 08:00','yyyymmdd hh24:mi:ss') and event ='enq: TX - row lock contention';

    select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
    SESSION_ID=1598;

    select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from v$active_session_history where
    SESSION_ID=1598;

    select to_char(b.end_interval_time,'yyyy-mm-dd hh24:mi:ss') sample_time, a.*
    from dba_hist_sqlstat a,dba_hist_snapshot b
    where a.snap_id=b.snap_id and a.sql_id in ('dfafaf','dkafja');

    select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,SESSION_STATE,USER_ID,event,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
    SAMPLE_TIME>to_date('20130825 08:00','yyyymmdd hh24:mi:ss') ;

    ##dba_hist_active_sess_history分析
    create table tmp_ash_20180920 as select * from dba_hist_active_sess_history
    where sample_time between to_date('20180919 02:00','yyyymmdd hh24:mi') and to_date('20180920 05:00','yyyymmdd hh24:mi');

    create table tmp_sql_20180920 as select * from WRH$_SQLTEXT where sql_id in (select sql_id from tmp_ash_20180920);

    exp "'/ as sysdba'" file=tmp_ash_20180920.dmp tables=tmp_ash_20180920,tmp_sql_20180920

    imp "'/ as sysdba'" file=tmp_ash_20170517.dmp full=y

    select instance_number,sample_id,sample_time,event,count(*) from tmp_ash_20170517 group by instance_number,sample_id,sample_time,event order by 1,3,5;

    col SQL_ID for a20
    col EVENT for a20
    col SQL_OPNAME for a10
    col SAMPLE_TIME for a30
    col PROGRAM for a40
    select sample_time, session_id, user_id, sql_id, sql_opname,event,blocking_session,program from tmp_ash_20170517 where session_id=4338 order by 1;
    select session_id, sample_id, user_id, sql_id, is_sqlid_current,sql_opname,event,blocking_session,top_level_call_name,program from tmp_ash_20170517 where sample_id in (24810796) order by session_id;

    ##ash中等待事件的变化
    SELECT to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME, event, COUNT(*) FROM dba_hist_active_sess_history
    WHERE sample_time between to_date('20170510 00:00','yyyymmdd hh24:mi') and to_date('20170510 00:30','yyyymmdd hh24:mi')
    GROUP BY to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss'),event ORDER BY 1;

    --ASH中每个采样点的top n event --http://feed.askmaclean.com/archives/dba_hist_active_sess_history.html
    select t.dbid,
    t.sample_id,
    t.sample_time,
    t.instance_number,
    t.event,
    t.session_state,
    t.c session_count
    from (select t.*,
    rank() over(partition by dbid, instance_number, sample_time order by c desc) r
    from (select /*+ parallel 8 */
    t.*,
    count(*) over(partition by dbid, instance_number, sample_time, event) c,
    row_number() over(partition by dbid, instance_number, sample_time, event order by 1) r1
    from dba_hist_active_sess_history t
    where sample_time >
    to_timestamp('&begin_sample_time',
    'yyyy-mm-dd hh24:mi:ss')
    and sample_time <
    to_timestamp('&end_sample_time',
    'yyyy-mm-dd hh24:mi:ss')
    ) t
    where r1 = 1) t
    where r < 3
    order by dbid, instance_number, sample_time, r;

    --观察每个采样点的等待链:
    select
    level lv,
    connect_by_isleaf isleaf,
    connect_by_iscycle iscycle,
    t.dbid,
    t.sample_id,
    t.sample_time,
    t.instance_number,
    t.session_id,
    t.sql_id,
    t.session_type,
    t.event,
    t.session_state,
    t.blocking_inst_id,
    t.blocking_session,
    t.blocking_session_status
    from m_ash20180322 t
    where sample_time >
    to_timestamp('2018-03-22 09:59:00',
    'yyyy-mm-dd hh24:mi:ss')
    and sample_time <
    to_timestamp('2018-03-22 10:00:00',
    'yyyy-mm-dd hh24:mi:ss')
    start with blocking_session is not null
    connect by nocycle
    prior dbid = dbid
    and prior sample_time = sample_time
    /*and ((prior sample_time) - sample_time between interval '-1'
    second and interval '1' second)*/
    and prior blocking_inst_id = instance_number
    and prior blocking_session = session_id
    and prior blocking_session_serial# = session_serial#
    order siblings by dbid, sample_time;

    进一步筛选,将isleaf=1的叶(top holder)找出来
    --基于上一步的原理来找出每个采样点的最终top holder:
    select t.lv,
    t.iscycle,
    t.dbid,
    t.sample_id,
    t.sample_time,
    t.instance_number,
    t.session_id,
    t.sql_id,
    t.session_type,
    t.event,
    t.seq#,
    t.session_state,
    t.blocking_inst_id,
    t.blocking_session,
    t.blocking_session_status,
    t.c blocking_session_count
    from (select t.*,
    row_number() over(partition by dbid, instance_number, sample_time order by c desc) r
    from (select t.*,
    count(*) over(partition by dbid, instance_number, sample_time, session_id) c,
    row_number() over(partition by dbid, instance_number, sample_time, session_id order by 1) r1
    from (select /*+ parallel 8 */
    level lv,
    connect_by_isleaf isleaf,
    connect_by_iscycle iscycle,
    t.*
    from m_ash20180322 t
    where sample_time >
    to_timestamp('2018-03-22 09:59:00',
    'yyyy-mm-dd hh24:mi:ss')
    and sample_time <
    to_timestamp('2018-03-22 10:00:00',
    'yyyy-mm-dd hh24:mi:ss')
    start with blocking_session is not null
    connect by nocycle
    prior dbid = dbid
    and prior sample_time = sample_time
    /*and ((prior sample_time) - sample_time between interval '-1'
    second and interval '1' second)*/
    and prior blocking_inst_id = instance_number
    and prior blocking_session = session_id
    and prior
    blocking_session_serial# = session_serial#) t
    where t.isleaf = 1) t
    where r1 = 1) t
    where r < 3
    order by dbid, sample_time, r;

    ##找出历史坏SQL
    使用如下脚本可以监控历史会话经历过哪些等待事件,并且按照等待次数降序排列
    select session_id,event,count(*),sum(time_waited) from v$active_session_history where session_state='WAITING'
    and time_waited>0 and sample_time>=(sysdate-&howlongago_min/(24*60))
    group by session_id,event order by 3 desc;

    然后通过如下语句查询出引起该等待事件的sql_id
    select event, session_id,sql_id, p1,p2,p3 from v$active_session_history where sample_time>=(sysdate-&howlongago/(24*60)) and session_id=&sid;

    再通过如下语句找到引起该等待事件的SQL
    select sql_text from v$sqlarea where sql_id='&sql_id';

    ##找出sql执行消耗
    在Oracle 11g中,当SQL满足以下条件之一就会被sql monitor捕获到,监控数据被记录在v$sql_monitor视图中。

    ■ 当SQL并行执行时,会立即被实时监控到
    ■ 当SQL单进程运行时,如果消耗超过5秒的CPU或I/O时间,它也会被监控到。(通过修改隐含参数_sqlmon_threshold可以控制这一行为,默认为5秒,如果设置为0将关闭SQL MONITORING功能)
    ■ 使用/*+ monitor */提示的sql语句:如select /*+ monitor */ * from dba_tables;

    set trimspool on
    set arraysize 512
    set trim on
    set pagesize 0
    set linesize 1000
    set long 1000000
    set longchunksize 1000000
    spool sqlmon.html
    select /*+ noparallel */ dbms_sqltune.report_sql_monitor (
    sql_id=>'&sql_id',
    report_level=>'ALL',
    type=>'ACTIVE')
    from dual;
    spool off
    cat sqlmon.html

    ##direct path read
    direct path read 的原因有以下:
    1.大量的磁盘排序操作,无法在排序区中完成排序,需要利用temp表空间进行排序.
    2.大量的Hash Join操作,利用temp表空间保存hash区。
    3.大表的全表扫描,在Oracle11g中,全表扫描的算法有新的变化,根据表的大小、高速缓存的大小等信息,决定是否绕过SGA直接从磁盘读取数据。而10g则是全部通过高速缓存读取数据,称为table scan(large)。
    11g认为大表全表时使用直接路径读,可能比10g中的数据文件散列读(db file scattered reads)速度更快,使用的latch也更少。

    ##一次ash分析案例
    1、获得故障期间按时间分布的活动会话个数的趋势。
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), COUNT(1) FROM dba_hist_active_sess_history
    WHERE instance_number=1
    GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss')
    ORDER BY 1;

    2、活动会话高峰期的等待事件
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event, COUNT(*) FROM dba_hist_active_sess_history
    WHERE to_char(sample_time, 'yyyymmdd hh24:mi:ss')='20161120 20:44'
    AND instance_number=1
    GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event
    HAVING COUNT(*)>=10;
    --和RAC global cache相关的等待

    3、找到某一瞬间,正在等gc buffer busy acquire的上百个会话,分别在申请哪些BLOCK(p1/p2),被谁阻塞了(blocking_session)。
    SELECT p1,p2,blocking_inst_id||'_'||blocking_session block_sid, COUNT(1)
    FROM dba_hist_active_sess_history
    WHERE to_char(sample_time, 'yyyymmdd hh24:mi:ss')='20161120 20:44:04'
    AND instance_number=1
    AND event='gc buffer busy acquire'
    GROUP BY p1,p2, blocking_inst_id||'_'||blocking_session
    ORDER BY COUNT(1);

    P1 P2 BLOCK_SID COUNT(1)
    --- ------- ----------- --------
    47 1540116 1_3200 82
    ......
    可以看到,在开始出现问题的时刻,即20:44:04
    节点1有82个会话申请同一个BLOCK在等gc bufferbusy acquire,被节点1 SID为3200的会话阻塞了,即节点1 SID为3200的会话先于82个进程向节点2请求。

    4、查看节点1 SID为3200的会话在等什么
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
    FROM dba_hist_active_sess_history
    WHERE instance_number||'_'||session_id='1_3200'
    AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
    ORDER BY 1;

    SAMPLE_TIME SID EVENT STATE SEQ# P1 P2 BLOCK_SID
    ------------------- ------- ----------------------- --------- ------- ----- --------- -----------
    20161120 20:44:04 1_3200 gc current blcok busy WAITING 7667 47 1540116 _
    20161120 20:44:14 1_3200 gc current blcok busy WAITING 7667 47 1540116 _
    20161120 20:44:24 1_3200 gc current blcok busy WAITING 7667 47 1540116 _

    节点1 SID为3200的会话在请求收到文件号47 ,BLOCK号1540116的时候,无法快速获取,
    等待事件是gc current blockbusy,并且这次申请从20:44:04到20:44:24,持续了20秒以上,但是崩溃的是,没有办法知道谁是我的阻塞者了

    5、从sqlid出发
    既然客户已经提到,有一笔插入报文表的联机交易2016-11-20 20:44:04 ~ 20:44:34,时间达到了30秒。那么我们就从这里开始好了
    SELECT sql_exec_id, COUNT(1), to_char(MIN(sample_time), 'yyyymmdd hh24:mi:ss') min_sample_time, to_char(MAX(sample_time), 'yyyymmdd hh24:mi:ss') max_sample_time
    FROM dba_hist_active_sess_history
    WHERE sql_id='15vru8xqgdkjf'
    GROUP BY sql_exec_id
    ORDER BY COUNT(1) DESC;

    SQL_EXEC_ID COUNT(1) MIN_SAMPLE_TIME MAX_SAMPLE_TIME
    ------------- --------- ------------------- -------------------
    22616995 4 20161120 20:44:04 20161120 20:44:34
    ......
    --可以看到:SQL_ID为'15vru8xqgdkjf'在ASH中被连续采样到了4次,说明执行时间大于30秒.与客户描述完全一致。


    6、我们通过sql_exec_id即SQL的唯一执行标记,来看看这笔超长交易的情况
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, event, session_state, p1, p2,
    blocking_inst_id||'_'||blocking_session block_sid FROM dba_hist_active_sess_history
    WHERE sql_exec_id=22616995
    AND sql_id='15vru8xqgdkjf'
    ORDER BY sample_time;

    SAMPLE_TIME SID EVENT STATE P1 P2 BLOCK_SID
    ------------------- ------- ------------------- --------- ----- --------- ------------
    20161120 20:44:04 1_2724 gc current retry WAITING 43 1152178 _
    20161120 20:44:14 1_2724 gc current retry WAITING 43 1152178 _
    20161120 20:44:24 1_2724 gc current retry WAITING 43 1152178 _
    20161120 20:44:34 1_2724 gc currentretry WAITING 39 1770308 1_2449

    确实最长的一笔交易,在04秒到24秒期间,一直在等”gc current retry”。“gc currentretry”,从字面上,看是请求当前BLOCK,但是无法获取,在retry.
    既然是向节点2请求一个BLOCK,无法获取,需要retry,那么我们有必要查下节点2是谁负责供应这个BLOCK 呢?没错!就是节点2的LMS进程!
    接下来,我们于情于理,都应该去查下问题期间,LMS进程在做什么
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
    FROM dba_hist_active_sess_history
    WHERE instance_number=2
    AND upper(PROGRAM) LIKE '%LMS%'
    AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
    ORDER BY sid, sample_time;

    SAMPLE_TIME SID PROGRAM EVENT STATE P1 P2 BLOCK_SID
    ------------------- ------- ----------------------- ------------------- --------- ----- --------- ------------
    20161120 20:44:06 2_1035 oracle@corerac2 (LMS9) gcs log flush sysnc WAITING 30 0 2_2633
    20161120 20:44:16 2_1035 oracle@corerac2 (LMS9) gcs log flush sysnc WAITING 30 0 2_2633
    20161120 20:44:06 2_1082 oracle@corerac2 (LMSA) gcs log flush sysnc WAITING 30 0 2_2633
    20161120 20:44:16 2_1082 oracle@corerac2 (LMSA) gcs log flush sysnc WAITING 30 0 2_2633
    20161120 20:44:06 2_1129 oracle@corerac2 (LMSB) gcs log flush sysnc WAITING 30 0 2_2633
    20161120 20:44:16 2_1129 oracle@corerac2 (LMSB) gcs log flush sysnc WAITING 30 0 2_2633
    .....

    可以看到,这些LMS进程在等gcs log flush sync,即LMS进程在把一个BLOCK传给其他节点前,需要把这个BLOCK在log buffer中的改变,刷到磁盘中的在线日志文件,
    但是很可惜,刷了很长时间都没有完成。所以没有办法快速将BLOCK传输给节点1,因此节点1在不断retry.这里” gcs log flush sync”的阻塞者2_2633显然就是我们所熟悉的LGWR进程,即log buffer 的写进程。

    看看节点2的LGWR进程是不是在忙其他事,无暇顾及LMS进程的刷日志请求呢?
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
    FROM dba_hist_active_sess_history
    WHERE instance_number||'_'||session_id='2_2633'
    AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
    ORDER BY sample_time;

    SAMPLE_TIME SID PROGRAM EVENT STATE SEQ# P1 P2 BLOCK_SID
    ------------------- ------- ----------------------- ----------------------- --------- ----- ----------- --------- ------------
    20161120 20:44:06 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 _
    20161120 20:44:16 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 1_1369
    20161120 20:44:26 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 1_1369
    20161120 20:44:36 2_2633 oracle@corerac2 (LGWR) LOG file parallel write WAITING 30 1 118 _

    可以看到:节点2的LGWR进程,在等enq: CF-contention,即想要去写控制文件(例如切换日志时需要写控制文件),但是被其他人领先一步,这个人是节点1的1369会话。
    期间,LGWR进程傻傻的继续等控制文件的队列锁,但等了几十秒都无法获取。

    乘胜追击,发出下列查询,看看节点1的1369会话为什么长时间持有控制文件的队列锁。
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
    FROM dba_hist_active_sess_history
    WHERE instance_number||'_'||session_id='1_1369'
    AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
    ORDER BY sample_time;
    SAMPLE_TIME SID PROGRAM EVENT STATE SEQ# P1 P2 BLOCK_SID
    ------------------- ------- ----------------------- ---------------------------- --------- ----- ----------- --------- ------------
    20161120 20:44:04 1_1369 oracle@corerac2 (CKPT) control file sequential read WAITING 16094 0 39 _

    可以看到,sid=1369是节点1的ckpt检查点进程. ckpt正在进行控制文件的读写,因此持有控制文件的锁。
    期间等待事件是控制文件的顺序度和并行写,并且没有阻塞,而是一直在变化。正常来说,如果IO比较快的话,那么持有控制文件的锁的时间是非常快的。

    从oswatcher数据可以看到,在问题时段,很多磁盘busy 100%,平均响应时间200多毫秒,IO基本上主要是写.
    检查ASH中的等待事件
    SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event, COUNT(1) FROM dba_hist_active_sess_history
    WHERE instance_number=1
    AND event LIKE '%write%'
    GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss')
    ORDER BY 1,3;
    可以看到是问题时刻,在等db file parallel write的进程有24个,显然是DBWR进程在等该数据库总计24个DBWR进程,全部处于活动状态,说明有什么动作触发了大量刷脏块的操作。
    检查alert日志,可以看到alter system archive log all(current)的命令,该命令必然会触发DBWR大量刷脏块,导致磁盘在短时间内极忙,导致HBA卡带宽也可能被占满。

    答案就是我们大家所熟悉的RMAN备份脚本!
    要说明的是,真相和你想的不一样!不是备份产生的IO影响了磁盘繁忙!
    因为之前我们看到了,磁盘繁忙100%来自于写,而不是来自于读!RMAN主要是读!而且如果是RMAN的读IO影响磁盘繁忙,那么不可能只影响了30秒左右!

    因为RMAN脚本开始执行时的alter system archive log all(current)的命令触发了DBWR大量刷脏块,导致磁盘很忙,而控制文件和数据文件在一起,导致ckpt进程在拿到控制文件队列锁后,
    操作很慢,继而阻塞了LGWR进程,LGWR进程于是不响应LMS进程,最后导致了GC等待数十秒,也就是联机交易大量超时。全量检查点完成后,交易恢复正常!

    故障原因总结:
    - 故障的原因在于批量期间,产生大量脏块
    - ckpt进程持有控制文件的队列锁后,开始进行IO操作
    - 但期间rman备份调用alter system archive log的命令,从而触发dbwr进程往数据文件大量的写,导致磁盘100% busy.导致控制文件的IO读写无法快速完成。
    - 由于日志切换,因此lgwr进程需要获得控制文件的队列锁,而ckpt进程阻塞了lgwr进程,lgwr进程处于enq: CF – contention状态,无法响应LMS进程的日志写情况。LMS进程负责节点间的GC。因此,GC出现问题,INSERT语句出现超时

    数据中心运维建议:
    Ø 将控制文件和数据文件从底层RAID组就分开,放到其他磁盘组,避免被DBWR写IO和RMAN读IO影响。
    Ø Rman备份脚本中的alter system archive log all命令会导致数据库做全量检查点,触发DBWR的大量IO操作,期间拖慢整体性能
    Ø 重新评估 alter system archive log all的替代方式
    Ø 将RMAN备份和批量时间错开


    ##gc相关的等待事件
    关于gc的简单描述,就是当实例1 发起请求时发现需要请求的块已经在另一个实例,这个时候就需要通过GCS(LMS)通过私网把相应的块传递到实例1,这个过程就是集群的cache fusion。请求的块又分为当前块和CR。
    1、gc buffer busy acquire
    当session#1(节点1)尝试请求访问远程实例(remote instance) buffer,但是在session#1之前已经有相同实例上(节点1)另外一个session#2请求访问了相同的buffer,并且没有完成,那么session#1等待gc buffer busy acquire。
    出现gc buffer busy acquire的原因一般有以下几种原因:
    ■ 热点块(hot block)
    在AWR中Segments by Global Cache Buffer Busy 记录了访问频繁的gc buffer(对比正常时段),解决方法可以根据热点块的类型采取不同的解决方法,比如采取分区表,分区索引,反向index等等。这点与单机数据库中的buffer busy waits类似。
    ■ 低效SQL语句
    低效SQL语句会导致不必要的buffer被请求访问,增加了buffer busy的机会。在AWR中可以找到TOP SQL。解决方法可以优化SQL语句减少buffer访问。这点与单机数据库中的buffer busy waits类似。
    ■ 数据交叉访问
    RAC数据库,同一数据在不同数据库实例上被请求访问。如果应用程序可以实现,那么我们建议不同的应用功能/模块数据分布在不同的数据库实例上被访问,避免同一数据被多个实例交叉访问,可以减少buffer的争用,避免gc等待。
    ■ Oracle bug

    2、gc cr block busy
    指实例1和实例2的buffer cache都含有某个block,某个时刻实例1修改了这个block并且没有提交;这个时刻实例2上的会话1读取这个block需要通过undo record 来构造CR块。且构造的过程中必须将改变记入到实例1的redo,这个时候发生的等待就是gc cr block busy等待。

    ===================================================== 等待事件 ======================================================================

    ===================================================== 回收高水位 ======================================================================
    如何查询高水位:
    ANALYZE TABLE test1 COMPUTE STATISTICS;
    select
    a.owner,
    a.table_name,
    a.blocks*to_number(p.value)/1024/1024 size_mb,
    a.avg_row_len*a.num_rows/(a.blocks*to_number(p.value))*100 pct,
    to_char(a.last_analyzed,'yyyy-mm-dd hh24:mi:ss') last_analyzed
    from dba_tables a,v$parameter p
    where p.name = 'db_block_size'
    and a.blocks > 0
    and a.avg_row_len*a.num_rows/(a.blocks*to_number(p.value))*100 < 0.7
    and a.blocks*to_number(p.value)/1024/1024 >= 10
    and a.owner in ('ULTRANMS');

    ALTER TABLE SHRINK command took 20 hours for a 150GB table.
    The ALTER TABLE MOVE took 10 hours for a 150GB table.

    ##采用move(move需要两倍的空间)
    1. 回收高水位
    针对表
    alter table scott.emp move;
    针对分区
    alter table scott.emp move partition P201209;
    2. 重建索引
    查看本地索引的状态
    select index_name,index_owner,partition_name,status from dba_ind_partitions where index_name in ('IDX_WORKLIST_SUBSID','PK_OM_WL_WORKLIST');
    重建本地索引(本地索引肯定是分区索引),可以按整表来重建
    alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild parallel 8;
    重建本地索引(本地索引肯定是分区索引),也可以按分区来重建
    alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild partition P201209 parallel 8;

    查看全局索引的状态(主键肯定是全局索引)
    select index_name,index_owner,status from dba_partitions where index_name in ('IDX_WORKLIST_SUBSID','PK_OM_WL_WORKLIST');
    重建全局索引
    alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild parallel 8;
    3. 统计信息收集

    以下是一些move时候的注意点:
    1、move前最好逻辑备份待整理的表;
    2、对于大表,建议开启并行和nologging
    alter table test move nologging parallel 2;
    3、整理完毕后重建相关的索引
    alter index ind_test rebuild nologging parallel 2;
    4、恢复表和索引的并行度、logging
    alter table test logging parallel 1;
    5、当我们对table进行move操作时,查询v$locked_objects视图可以发现,table上加了exclusive lock。需要在非业务时段使用


    ##采用shrink 对整表进行回收高水位,若出现资源繁忙,则考虑采用shrink (shrink不需要额外的空间)
    oracle在进行shrink的时候会对相应的索引进行维护,以保证index在shrink结束的时候index仍然有效。

    alter table ICDWF.WF_TBL_NODEINFO enable row movement;
    alter table ICDWF.WF_TBL_NODEINFO shrink space cascade; --加cascade选项会同时整理索引空间
    alter table ICDWF.WF_TBL_NODEINFO disable row movement;

    segment shrink分为两个阶段:
    1、数据重组(compact):通过一系列insert、delete操作,将数据尽量排列在段的前面。在这个过程中需要在表上加RX锁,即只在需要移动的行上加锁。
    在这个过程中会产生大量的undo和redo信息
    由于涉及到rowid的改变,需要enable row movement.同时要disable基于rowid的trigger.这一过程对业务影响比较小。
    2、HWM调整:第二阶段是调整HWM位置,释放空闲数据块。此过程需要在表上加X锁,会造成表上的所有DML语句阻塞。在业务特别繁忙的系统上可能造成比较大的影响。
    注意:shrink space语句两个阶段都执行。
    shrink space compact只执行第一个阶段。
    如果系统业务比较繁忙,可以先执行shrink space compact重组数据,然后在业务不忙的时候再执行shrink space降低HWM释放空闲数据块。
    alter table images shrink space compact; -- do not lock table, run online
    alter table images shrink space ; -- lock table, run when users aren't
    alter table images modify lob (image) (shrink space);

    --耗时案例:
    OWNER TABLE_NAME COLUMN_NAME SEGMENT_NAME SEGMENT_TYPE ROUND(B.BYTES/1024/1024)
    --------------- -------------------- ------------------------------ ------------------------------ ------------------ ------------------------
    HR65_APP SM_BUSILOG_DEFAULT TABLE 8297 --表大小
    HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_LOB0000085979C00006$$ LOBSEGMENT 299871 --LOB字段大小
    HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_IL0000085979C00006$$ LOBINDEX 593 --LOB索引大小

    TABLE_NAME NUM_ROWS ACTUAL_MB HIGH_WATER_MB NEED_RECOVERY_MB
    ------------------------------ ---------- ---------- ------------- ----------------
    SM_BUSILOG_DEFAULT 1838803 2700.57356 8291.44531 5590.87175

    alter table HR65_APP.SM_BUSILOG_DEFAULT shrink space compact;
    测试库耗时Elapsed: 02:24:12.95
    执行完之后表大小,LOB字段大小都没有变化

    ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT MODIFY LOB (LOGMSG) (SHRINK SPACE compact);
    测试库耗时Elapsed: 08:02:45.11
    执行完之后LOB字段大小没有变化

    ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT SHRINK SPACE CASCADE;
    Elapsed: 01:52:45.64
    ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT MODIFY LOB (LOGMSG) (SHRINK SPACE);
    Elapsed: 00:16:43.50

    OWNER TABLE_NAME COLUMN_NAME SEGMENT_NAME SEGMENT_TYPE ROUND(B.BYTES/1024/1024)
    --------------- -------------------- ------------------------------ ------------------------------ ------------------ ------------------------
    HR65_APP SM_BUSILOG_DEFAULT TABLE 3430
    HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_LOB0000085979C00006$$ LOBSEGMENT 30695
    HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_IL0000085979C00006$$ LOBINDEX 593

    --alter table move和alter table shrink都可以把表收缩和整理碎片,还有一个很大的区别是:
    alter table move后表在表空间中的位置肯定会变,可能前移也可能后移,一般来说如果该表前面的表空间中有足够空间容纳该表,则前移,否则后移。
    alter table shrink后表在表空间中的位置肯定不变,也就是表的段头位置不会发生变化。
    基于这个区别,把一个表空间中的所有表move后,表会都集中到表空间的前半部分了,此时表空间可以收缩。把一个表空间中的所有表shrink后,表空间中表的位置没有发生变化,此时表空间跟shrink之前一样,依然没有多少收缩空间。


    #查看高水位下块使用情况
    o Allocated, but currently unformatted and unused
    o Formatted and contain data
    o Formatted and empty because the data was deleted

    对于ASSM:
    对于ASSM的segment来说,考察HWM下的blocks的空间使用状况相对要简单一些。在这里,我们可以使用这样一个procedure来得到table的blocks使用情况:
    create or replace procedure show_space_assm(
    p_segname in varchar2,
    p_owner in varchar2 default user,
    p_type in varchar2 default 'TABLE' )
    as
    l_fs1_bytes number;
    l_fs2_bytes number;
    l_fs3_bytes number;
    l_fs4_bytes number;
    l_fs1_blocks number;
    l_fs2_blocks number;
    l_fs3_blocks number;
    l_fs4_blocks number;
    l_full_bytes number;
    l_full_blocks number;
    l_unformatted_bytes number;
    l_unformatted_blocks number;
    procedure p( p_label in varchar2, p_num in number )
    is
    begin
    dbms_output.put_line( rpad(p_label,40,'.') ||p_num );
    end;
    begin
    dbms_space.space_usage(
    segment_owner => p_owner,
    segment_name => p_segname,
    segment_type => p_type,
    fs1_bytes => l_fs1_bytes,
    fs1_blocks => l_fs1_blocks,
    fs2_bytes => l_fs2_bytes,
    fs2_blocks => l_fs2_blocks,
    fs3_bytes => l_fs3_bytes,
    fs3_blocks => l_fs3_blocks,
    fs4_bytes => l_fs4_bytes,
    fs4_blocks => l_fs4_blocks,
    full_bytes => l_full_bytes,
    full_blocks => l_full_blocks,
    unformatted_blocks => l_unformatted_blocks,
    unformatted_bytes => l_unformatted_bytes);
    p('free space 0-25% Blocks:',l_fs1_blocks);
    p('free space 25-50% Blocks:',l_fs2_blocks);
    p('free space 50-75% Blocks:',l_fs3_blocks);
    p('free space 75-100% Blocks:',l_fs4_blocks);
    p('Full Blocks:',l_full_blocks);
    p('Unformatted blocks:',l_unformatted_blocks);
    end;
    /


    我们知道,在ASSM下,block的空间使用分为free space: 0-25%,25-50%,50-75%,70-100%,full 这样5中情况,show_space_assm会对需要统计的table汇总这5中类型的block的数量。
    我们来看table HWM1的空间使用情况:
    SQL> set serveroutput on;
    SQL> exec show_space_assm('HWM1','DLINGER');
    free space 0-25% Blocks:.................0
    free space 25-50% Blocks:...............1
    free space 50-75% Blocks:...............0
    free space 75-100% Blocks:..............8
    Full Blocks:.....................................417
    Unformatted blocks:.........................0

    这个结果显示,table HWM1,full的block有417个,free space 为75-100% Block有8个,free space 25-50% Block有1个。当table HWM下的blocks的状态大多为free space
    较高的值时,我们考虑来合并HWM下的blocks,将空闲的block释放,降低table的HWM。

    SQL> set serveroutput on;
    SQL> exec show_space_assm('SM_BUSILOG_DEFAULT','HR65_APP');
    SQL> exec show_space_assm('SYS_LOB0000085979C00006$$','HR65_APP','LOB');

    #查看高水位存储过程
    create or replace procedure show_space
    ( p_segname in varchar2,
    p_owner in varchar2 default user,
    p_type in varchar2 default 'TABLE',
    p_partition in varchar2 default NULL )
    as
    l_total_blocks number;
    l_total_bytes number;
    l_unused_blocks number;
    l_unused_bytes number;
    l_LastUsedExtFileId number;
    l_LastUsedExtBlockId number;
    l_last_used_block number;
    procedure p( p_label in varchar2, p_num in number )
    is
    begin
    dbms_output.put_line( rpad(p_label,40,'.') ||
    p_num );
    end;
    begin
    dbms_space.unused_space
    ( segment_owner => p_owner,
    segment_name => p_segname,
    segment_type => p_type,
    partition_name => p_partition,
    total_blocks => l_total_blocks,
    total_bytes => l_total_bytes,
    unused_blocks => l_unused_blocks,
    unused_bytes => l_unused_bytes,
    last_used_extent_file_id => l_LastUsedExtFileId,
    last_used_extent_block_id => l_LastUsedExtBlockId,
    last_used_block => l_last_used_block );
    p( 'Total Blocks', l_total_blocks );
    p( 'Total Bytes', l_total_bytes );
    p( 'Unused Blocks', l_unused_blocks );
    p( 'Unused Bytes', l_unused_bytes );
    p( 'Last Used Ext FileId', l_LastUsedExtFileId );
    p( 'Last Used Ext BlockId', l_LastUsedExtBlockId );
    p( 'Last Used Block', l_last_used_block );
    end;

    ===================================================== 回收高水位 ====================================================================

    ##表碎片(高水位)
    SELECT --表
    TABLE_NAME,
    OWNER,
    PARTITIONED,
    tablespace_name,
    LAST_ANALYZED,
    NUM_ROWS,
    BLOCKS*8/1024 "SIZE_M(true)", --表的总大小
    (AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024 "USED_M(need)", --行平均长度*行数 + 事物槽 (实际使用大小)
    (BLOCKS*8/1024 - ((AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024)) "FREE_MB(release)", --可以释放大小
    (1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 "PCT(shrink)" --释放百分比
    FROM DBA_TABLES
    where BLOCKS*8/1024 >5 and blocks != 0
    and (1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 > 20 --释放百分比大于20%以上
    order by "FREE_MB(release)", "SIZE_M(true)", "PCT(shrink)" asc;

    SELECT --有lob字段的表
    t.TABLE_NAME,
    t.OWNER,
    LAST_ANALYZED,
    b.segment_name,
    b.lob_size_m,
    NUM_ROWS,
    BLOCKS*8/1024 "SIZE_M(true)",
    (AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024 "USED_M(need)",
    (BLOCKS*8/1024 - ((AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024)) "FREE_MB(release)",
    (1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 "PCT(shrink)"
    FROM DBA_TABLES t,
    (select * from (
    select s.owner, l.table_name, s.segment_name , l.COLUMN_NAME, s.bytes/1024/1024 lob_size_m from dba_segments s, dba_lobs l
    where s.segment_name=l.segment_name and segment_type='LOBSEGMENT' order by 5 desc )
    where rownum<10) b
    where t.OWNER=b.OWNER and t.TABLE_NAME=b.TABLE_NAME and t.BLOCKS*8/1024 >5 and blocks != 0
    order by "FREE_MB(release)", "SIZE_M(true)", "PCT(shrink)" asc;

    ##监控表空间的增长
    --每天增长量
    select a.days, a.tsname, cur_size_mb,used_size_mb,usage_pert, used_size_mb - lag (used_size_mb,1) over ( partition by a.tsname order by a.tsname,a.days) inc_used_size_mb
    from (
    select to_char(sp.begin_interval_time,'mm-dd-yyyy') days, ts.tsname,
    max(round((tsu.tablespace_size* dt.block_size )/(1024*1024),2) ) cur_size_mb,
    max(round((tsu.tablespace_usedsize* dt.block_size )/(1024*1024),2)) used_size_mb,
    round(max(tsu.tablespace_usedsize)/max(tsu.tablespace_size)*100, 1) usage_pert
    from dba_hist_tbspc_space_usage tsu --依赖AWR SNAPSHOT数据
    inner join dba_hist_tablespace_stat ts on tsu.tablespace_id=ts.ts#
    inner join dba_hist_snapshot sp on tsu.snap_id =sp.snap_id
    inner join dba_tablespaces dt on ts.tsname=dt.tablespace_name
    where
    --(sp.begin_interval_time > sysdate-2) and
    dt.tablespace_name not like '%UNDO%'
    group by to_char(sp.begin_interval_time,'mm-dd-yyyy'), ts.tsname
    order by ts.tsname, days
    ) a ;

    --平均增长量/天
    break on report
    compute sum of cur_used_size_mb on report
    compute sum of avg_increas_mb on report
    select b.tsname tablespace_name, max(b.used_size_mb) cur_used_size_mb, round(avg(inc_used_size_mb),2) avg_increas_mb
    from (
    select a.days, a.tsname, used_size_mb, used_size_mb - lag (used_size_mb,1) over ( partition by a.tsname order by a.tsname,a.days) inc_used_size_mb
    from (
    select to_char(sp.begin_interval_time,'mm-dd-yyyy') days, ts.tsname,
    max(round((tsu.tablespace_usedsize* dt.block_size )/(1024*1024),2)) used_size_mb
    from dba_hist_tbspc_space_usage tsu
    inner join dba_hist_tablespace_stat ts on tsu.tablespace_id=ts.ts#
    inner join dba_hist_snapshot sp on tsu.snap_id =sp.snap_id
    inner join dba_tablespaces dt on ts.tsname=dt.tablespace_name
    where
    --(sp.begin_interval_time > sysdate-2) and
    dt.tablespace_name not like '%UNDO%'
    group by to_char(sp.begin_interval_time,'mm-dd-yyyy'), ts.tsname
    order by ts.tsname, days
    ) a
    ) b
    group by b.tsname order by b.tsname;

    select sum(space_used_total)/1024/1024/1024 "last 7 days db increase - G"
    from
    dba_hist_seg_stat s,
    dba_hist_seg_stat_obj o,
    dba_hist_snapshot sn
    where
    s.obj# = o.obj#
    and
    sn.snap_id = s.snap_id
    and begin_interval_time > sysdate-8
    order by
    begin_interval_time
    /

    ##每天产生的归档量
    alter session set NLS_DATE_FORMAT='YYYY-MON-DD';
    select trunc(completion_time) rundate
    ,count(*) logswitch
    ,round((sum(blocks*block_size)/1024/1024)) "REDO PER DAY (MB)"
    from v$archived_log
    group by trunc(completion_time)
    order by 1;
    每小时切换次数
    select trunc(first_time, 'HH') , count(*)
    from v$loghist
    group by trunc(first_time, 'HH')
    order by trunc(first_time, 'HH');

    set line 300 pagesize 1000
    col cmd for a100
    select /*+ ordered use_hash(a,c) */ 'alter database datafile '''||a.file_name||''' resize ' ||round(a.filesize - (a.filesize - c.hwmsize-100) *0.8)||'M;' cmd, a.filesize, c.hwmsize, a.filesize-round(a.filesize - (a.filesize - c.hwmsize-100) *0.8) shrink_size
    from (select file_id,file_name,round(bytes/1024/1024) filesize from dba_data_files) a,
    (select file_id,round(max(block_id)*8/1024) HWMsize from dba_extents group by file_id) c
    where a.file_id = c.file_id and a.filesize - c.hwmsize > 100 order by shrink_size;

    #临时表空间的使用情况(高水位 rac)
    SELECT d.tablespace_name "Tablespace_name",
    TO_CHAR(NVL(a.bytes / 1024 / 1024, 0),'99,999,990.900') "Size (M)",
    TO_CHAR(NVL(t.hwm, 0)/1024/1024,'99999999.999') "HWM (M)",
    TO_CHAR(NVL(t.hwm / a.bytes * 100, 0), '990.00') "HWM % " ,
    TO_CHAR(NVL(t.bytes/1024/1024, 0),'99999999.999') "Using (M)",
    TO_CHAR(NVL(t.bytes / a.bytes * 100, 0), '990.00') "Using %"
    FROM sys.dba_tablespaces d,
    (select tablespace_name, sum(bytes) bytes from dba_temp_files group by tablespace_name) a,
    (select tablespace_name, sum(bytes_cached) hwm, sum(bytes_used) bytes from gv$temp_extent_pool group by tablespace_name) t
    WHERE d.tablespace_name = a.tablespace_name(+)
    AND d.tablespace_name = t.tablespace_name(+)
    AND d.extent_management like 'LOCAL'
    AND d.contents like 'TEMPORARY';

    --收缩临时表空间
    alter tablespace temp shrink space keep 400M;
    alter tablespace temp shrink space;
    alter tablespace temp shrink tempfile '/u01/app/oracle/oradata/anqing/temp01.dbf' keep 300M;
    alter tablespace temp shrink tempfile '/u01/app/oracle/oradata/anqing/temp01.dbf';


    ##数据文件碎片
    select /*+ ordered use_hash(a,b,c) */ a.file_id,a.file_name,a.filesize, b.freesize,
    (a.filesize-b.freesize) usedsize, c.hwmsize, c.hwmsize - (a.filesize-b.freesize) freesize_belowhwm, a.filesize - c.hwmsize canshrinksize
    from (select file_id,file_name,round(bytes/1024/1024) filesize from dba_data_files) a,
    (select file_id,round(sum(dfs.bytes)/1024/1024) freesize from dba_free_space dfs group by file_id) b,
    (select file_id,round(max(block_id)*8/1024) HWMsize from dba_extents group by file_id) c
    where a.file_id = b.file_id and a.file_id = c.file_id
    order by freesize_belowhwm desc;

    #表空间碎片

    select a.tablespace_name,
    trunc(sqrt(max(blocks)/sum(blocks))* (100/sqrt(sqrt(count(blocks)))),2) fsfi
    from dba_free_space a,dba_tablespaces b
    where a.tablespace_name=b.tablespace_name
    and b.contents not in('TEMPORARY','UNDO')
    group by A.tablespace_name
    order by fsfi;

  • 相关阅读:
    洛谷P2389 电脑班的裁员(区间DP)
    停更祭
    搜索 水题&&错误集锦
    模板——最小生成树prim算法&&向前星理解
    单源最短路dijkstra算法&&优化史
    模板——最小生成树kruskal算法+并查集数据结构
    卡常三连(快读快写+re)
    模板——STL队列
    起点
    《2016年十一月十三日周总结》
  • 原文地址:https://www.cnblogs.com/dc-chen/p/12954724.html
Copyright © 2020-2023  润新知