Monday, January 12, 2009

How to improve the performance of a Correlated query ?

How to improve the performance of a Correlated query ?

In our regular development life, we come across many queries that has correlated sub queries.

Eg : select ename, sal, deptno
from emp e
where sal > (select avg (sal)
from emp
where deptno = e.deptno
group by deptno);

(also can be written as) :

select *
from (select ename, sal, deptno, avg (sal) over (partition by deptno) avg_sal
from emp)
where sal > avg_sal;

Sometimes, either the query becomes too nested or non-performant. Following blog explains the different approaches that can be used to achieve the same result, with improved performance.

As usual, let's start with a test case.

create table T(
ITEM_NO VARCHAR2(20),
QTY NUMBER,
BIN VARCHAR2(10),
EFFECTIVE_DATE DATE
);

insert into t values ('AC006',10,'DC001',to_date('02/01/2002','mm/dd/yyyy'));
insert into t values ('AC006',20,'DC002',to_date('02/01/2002','mm/dd/yyyy'));
insert into t values ('AC006',100,'DC001',to_date('05/01/2002','mm/dd/yyyy'));
insert into t values ('AC006',50,'DC002' to_date('05/02/2002','mm/dd/yyyy'));
insert into t values ('AC006',30,'DC003',to_date('05/03/2002','mm/dd/yyyy'));
insert into t values ('AC006',20,'DC008',to_date('05/04/2002','mm/dd/yyyy'));
insert into t values ('AC007',10,'DC001',to_date('02/01/2002','mm/dd/yyyy'));
insert into t values ('AC007',20,'DC002',to_date('02/01/2002','mm/dd/yyyy'));
insert into t values ('AC007',77,'DC001',to_date('05/01/2002','mm/dd/yyyy'));
insert into t values ('AC007',32,'DC002',to_date('05/02/2002','mm/dd/yyyy'));
insert into t values ('AC007',52,'DC003',to_date('05/03/2002','mm/dd/yyyy'));
insert into t values ('AC007',33,'DC008', to_date('05/04/2002','mm/dd/yyyy'));

commit;

select * from t;


Our requirement is simple : Get the total qty of an item in an inventory, where the qty should be considered of that item,
which has the highest effective date in a given bin.

-- We can achive this in 3 different ways :

-- #1 : Using co-related sub-query.

-- Table is referred twice .... so less performant ....
select item_no, sum(qty)
from t
where effective_date = (select max(effective_date)
from t t2
where t2.item_no = t.item_no
and t2.bin = t.bin)
group by item_no;


test@VEDALA> set autot trace exp stat;
test@VEDALA> select item_no, sum(qty)
2 from t
3 where effective_date = (select max(effective_date)
4 from t t2
5 where t2.item_no = t.item_no
6 and t2.bin = t.bin)
7 group by item_no;


Execution Plan
----------------------------------------------------------
Plan hash value: 1922650475

-----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 41 | 22 (5)| 00:00:01 |
| 1 | HASH GROUP BY | | 1 | 41 | 22 (5)| 00:00:01 |
|* 2 | FILTER | | | | | |
| 3 | TABLE ACCESS FULL | T | 12 | 492 | 3 (0)| 00:00:01 |
| 4 | SORT AGGREGATE | | 1 | 28 | | |
|* 5 | TABLE ACCESS FULL| T | 1 | 28 | 3 (0)| 00:00:01 |
-----------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

2 - filter("EFFECTIVE_DATE"= (SELECT MAX("EFFECTIVE_DATE") FROM "T"
"T2" WHERE "T2"."ITEM_NO"=:B1 AND "T2"."BIN"=:B2))
5 - filter("T2"."ITEM_NO"=:B1 AND "T2"."BIN"=:B2)

Note
-----
- dynamic sampling used for this statement


Statistics
----------------------------------------------------------
25 recursive calls
0 db block gets
136 consistent gets
0 physical reads
620 redo size
520 bytes sent via SQL*Net to client
381 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
3 sorts (memory)
0 sorts (disk)
2 rows processed


-- Approach#2 : Most famous Tom's technique, use max() to avoid co-related sub-query.

select substr(data, 14, 5) item_no, sum(to_number(substr(data, 19))) qty
from (select max(to_char(effective_date, 'YYYYMMDD') || rpad(bin, 5) ||rpad(item_no, 5)||to_char(qty)) data
from t
group by item_no, bin)
group by substr(data, 14, 5);

test@VEDALA> select substr(data, 14, 5) item_no, sum(to_number(substr(data, 19))) qty
2 from (select max(to_char(effective_date, 'YYYYMMDD') || rpad(bin, 5) ||rpad(item_no, 5)||to_char(qty)) data
3 from t
4 group by item_no, bin)
5 group by substr(data, 14, 5);


Execution Plan
----------------------------------------------------------
Plan hash value: 1926056118

-----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 12 | 372 | 5 (40)| 00:00:01 |
| 1 | HASH GROUP BY | | 12 | 372 | 5 (40)| 00:00:01 |
| 2 | VIEW | | 12 | 372 | 4 (25)| 00:00:01 |
| 3 | HASH GROUP BY | | 12 | 492 | 4 (25)| 00:00:01 |
| 4 | TABLE ACCESS FULL| T | 12 | 492 | 3 (0)| 00:00:01 |
-----------------------------------------------------------------------------

Note
-----
- dynamic sampling used for this statement


Statistics
----------------------------------------------------------
4 recursive calls
0 db block gets
17 consistent gets
0 physical reads
172 redo size
515 bytes sent via SQL*Net to client
381 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
2 rows processed

-- Approach#3 : Analytics - easy to read and performs well .. compare the plans

select item_no, sum(qty)
from (select distinct item_no,
bin,
first_value(qty) over(partition by item_no, bin order by effective_date desc) qty
from t)
group by item_no;

test@VEDALA> select item_no, sum(qty)
2 from (select distinct item_no,
3 bin,
4 first_value(qty) over(partition by item_no, bin order by effective_date desc) qty
5 from t)
6 group by item_no;

Execution Plan
----------------------------------------------------------
Plan hash value: 1317671949

------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 12 | 300 | 5 (40)| 00:00:01 |
| 1 | HASH GROUP BY | | 12 | 300 | 5 (40)| 00:00:01 |
| 2 | VIEW | | 12 | 300 | 5 (40)| 00:00:01 |
| 3 | HASH UNIQUE | | 12 | 492 | 5 (40)| 00:00:01 |
| 4 | WINDOW SORT | | 12 | 492 | 5 (40)| 00:00:01 |
| 5 | TABLE ACCESS FULL| T | 12 | 492 | 3 (0)| 00:00:01 |
------------------------------------------------------------------------------

Note
-----
- dynamic sampling used for this statement


Statistics
----------------------------------------------------------
4 recursive calls
0 db block gets
17 consistent gets
0 physical reads
172 redo size
520 bytes sent via SQL*Net to client
381 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
2 rows processed

Correlated query was costing 22 where as approach #2 or #3 costs only #5.

source : AskTom.

Long live "Tom Kyte".

Good Luck,
r-a-v-i

Thursday, December 11, 2008

Performance Tuning - When to use Bind Variables, when not to ? When histograms work and when they don't ?

The usual nightmare of almost every other Java/Oracle developer ... "ooooh...this query is not coming back...what should I do now? How to tune my query. Where should I start ?".

Though there are many ways to tune a query, n this section we see how usage of bind variables, bind variable peeking and Histograms effect the query performance.

This test case here proves that, "Bind Variable Peeking and Histograms won't go together".

Almost all Java/Oracle developers use bind variables, as a rule of thumb.

But it is important to understand how the query execution plan is determined by bind variables.

It is "VERY" important to know the type of system you are working in - whether it is a transactional(OLTP) system or a reporting system. If you are working on a database where you fire the queries against the same transactional database for regular queries and reports, then this is a must read.

The Java developer(Mr.X) would say : Wait a minute .. I am Java/developer and I don't care what the difference between these two is... to me, database is just a data store ... I just store the data and retrieve it back. Let a Java Persistence framework like Hibernate deal with it.

It is important for a Java/Oracle developer to understand that a database is more than just a data store.

Let's cut the crap and get into action, right away.

-- Create a test table

create table test
as
select 1 X, object_name data from all_objects a
/

insert into test
select 2, object_name data from all_objects a where rownum

Bind Variable Peeking :

The first time a query is HARD PARSED, the bind variable values that are supplied with that query
are taken into consideration. That is:

select * from test t1 where x = :a

when hard parsed with a bind value of 2 will be parsed as if it were

select * from test t1 where x = 2;

Oracle uses all statistics, that are available to optimize that query.

The next time the query

select * from test t1 where x = :a

is soft parsed, it will use the existing plan (else, it would be a hard parse again!)

Mr.X : oooh......what the hell all this crap is ? How it effects me ?

As a Java/Oracle developer, it is important to understand how the data is distributed in a column, against which you are querying.

If you look at our test table, it has lot of rows with X=1 and less rows where X=2

So, logically, if we need to get data where x=2, it makes sense if we go to the index and if we need data where x=1, it makes sense if we go to the table(as we are getting most of the table's data).

Let's see how it works ?

Go to sqlplus and set the following :

>set autot trace exp;

-- Case 1 : Use Bind Variable Peeking

select * from test t1 where x = :a

-- pass 2. Oracle should do an Index Range Scan and it does the same. So - CORRECT PLAN

-- pass 1: Oracle should do a Full table scan and it doesn't. So -- INCORRECT PLAN.

Now, let's generate statistics and give the optimizer more info about the data.

analyze table test compute statistics
for table
for all indexes
for all indexed columns

Now it's important to talk about Histograms here. When there is skewed data in a column, when we generate statistics Oracle generates statistics
called Histograms.

select * from User_histograms h where table_name = 'TEST' ;

select * from DBA_histograms h where h.table_name = 'TEST';

What is Skewed data ? -- If the data distribution is uneven in a column, we call that data as skewed data. In our example, we have about 50K+ values of 1 and 9999 values of 2.

-- Case 2 : Use Bind Peeking, use histograms

-- Pass 2 (should do a Index Range Scan but does a Full Table Scan - WRONG PLAN)

select * from test t1 where x = :a

-- Pass 1 (should do a Full Table Scan and does Full Table Scan - CORRECT PLAN)

select * from test t1 where x = :a

-- So, bind variable peeking and histograms may work some times and may not work some times ...

-- Case 3 : No Binds, use histograms

-- Pass 2 (should do a Index Range Scan and does Index Range Scan - CORRECT PLAN)

select * from test t1 where x = 2

-- Pass 1 (should do a Full Table Scan and does Full Table Scan- CORRECT PLAN)

select * from test t1 where x = 1

Mr.X - So, does it mean that I should STOP using bind variables and use literals ?

The answer is - it depends.

Coming back to my original point, if you are working on a transactional system where thousands of queries execute per second, USE bind variable.

If you are firing the query against a reporting database where each query runs for seconds, DON't use bind variables.

Mr.X - Common, I work on a single database which is trasactional. I use the same for reporting too. What should I do in this case ?

The answer is simple. USE bind variables for all your regular transactional queries and DON'T use bind variables for reports.

Mr.X - Now, I understand when to use bin d variables and when not to use. Also the histograms.

Source : AskTom

Refer which columns need histograms by Tom, to get a picture of which columns need histograms. Also a good explanation on Cardinality.

Long Live "Sir, Tom Kyte".

Good Luck,
r-a-v-i

Thursday, November 13, 2008

How Oracle decides whether to use an index or to do a full table scan ?

Let’s directly jump into the code without much bla, bla bla ...….

drop table t1;

create table t1 as
select
trunc((rownum-1)/15) n1,
trunc((rownum-1)/15) n2,
rpad('x',100) v1
from all_objects where rownum <= 3000; drop table t2; create table t2 as select mod(rownum,200) n1, mod(rownum,200) n2, rpad('x',100) v1 from all_objects where rownum <= 3000; create index t_i1 on t1(n1); create index t_i2 on t2(n1); analyze table t1 compute statistics for table for all indexes for all indexed columns; analyze table t2 compute statistics for table for all indexes for all indexed columns; If you look at both the tables, they have pretty much the same data. set autotrace traceonly explain; select * from t1 where n1 = 45; Execution Plan ---------------------------------------------------------- Plan hash value: 1661992259 ------------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 15 | 1020 | 2 (0)| 00:00:01 | | 1 | TABLE ACCESS BY INDEX ROWID| T1 | 15 | 1020 | 2 (0)| 00:00:01 | |* 2 | INDEX RANGE SCAN | T_I1 | 15 | | 1 (0)| 00:00:01 |
------------------------------------------------------------------------------------

Try this now :

select * from t2 where n1 = 45;

Execution Plan
----------------------------------------------------------
Plan hash value: 1513984157

--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 15 | 1020 | 14 (0)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| T2 | 15 | 1020 | 14 (0)| 00:00:01 |
--------------------------------------------------------------------------

Does a full table scan and the cost is 14.

The difference is in the data distribution, a.k.a. the way data is scattered.

Let's look into meta data and see how these tables are scattered.

select * from user_tables where table_name in (‘T1’, 'T2');

select * from USER_TAB_COLUMNS where table_name in (‘T1’, 'T2');

select * from user_indexes u where table_name in (‘T1’, 'T2');

select
table_name tab,
num_rows num_rows,
avg_leaf_blocks_per_key l_blocks,
avg_data_blocks_per_key d_blocks,
clustering_factor cl_fac
from user_indexes
where table_name in ('T1','T2');

Clustering_factor offers information on how the table rows are synchronized with the index. When the clustering factor is close to the number of data blocks and the column value is not row ordered when the clustering_factor approaches the number of rows in the table, the table rows are synchronized with the index.

Basically Four factors synchronize to help the CBO choose whether to use an index or a full-table scan: the selectivity of a column value; the db_block_size ; the avg_row_len ; and the cardinality.

An index scan is usually faster if a data column has high selectivity and a low clustering_factor.

If you look at the user_indexes table these are the facts.

To scan T2 by index : it takes one index block, 15 data blocks = 16

To scan T2 by full table scan : it takes 96 blocks / 8 (multiblock read) = 12
(this is a first approximation)

To scan T1 by index : it takes one index block, one data block = 2

So, since the cost of doing a full table scan (multi block) is lesser than going to an index, CBO choose to go with full table scan.

Let's see what happens if we change it.

alter session set DB_FILE_MULTIBLOCK_READ_COUNT = 4;

SELECT STATEMENT Optimizer=CHOOSE (Cost=16 Card=15)
TABLE ACCESS (BY INDEX ROWID) OF 'T2' (Cost=16 Card=15)
INDEX (RANGE SCAN) OF 'T_I2' (NON-UNIQUE) (Cost=1 Card=15)

alter session set DB_FILE_MULTIBLOCK_READ_COUNT = 16;

SELECT STATEMENT Optimizer=CHOOSE (Cost=10 Card=15)
TABLE ACCESS (FULL) OF 'T2' (Cost=10 Card=15)

Hope this helps !!!

Long Live "Tom Kyte".

Ravi Vedala (r-a-v-i)

Wednesday, November 12, 2008

Does the columns of the select list impact the query execution plan ?

Proceed further, if you fall into any of these categories : "I am a Java/Oracle developer or a junior DBA. I am not sure what columns should be included in an Oracle Index ?"
OR "How to decide, which columns should be there in an index (B*tree) "?

First the anwer for the question in the title is "YES".

After 9i, Oracle has gotten much better and better. In the olden days of RBO, it has a "One plan fits all" mentality. But after CBO (Cost based optimizer) , there are advanced access paths (plans) available to access the data.

wow ..wow..wow ...wait a minute, let me cut the crap and come to the point straight away !!

Before we proceed, one important mantra to recite : "Full Scans are not always evil and indexes are not always good".

In general all of us know that, if a column is in the where clause and if we create an index( or concatenated) for that column, optimizer will use the index (in most of the cases).

Is that the only criteria for a column to be part of an index ?

NOT NECESSARILY.

It is important to note that, if the column(s) in the select list are in an index then the CBO might choose to use the index and perform a high speed index fast full scan, instead of a full table scan. Some times the opposite is much faster. So, why are we discussing this at all ? This is infact a learning. If you run into a case, where you
think full table scan is not the right thing that should happen. Then this technique could come as a rescue.

As always, let's take an example to prove the point.

-- Log into sqlplus (preferably Oracle 10g database)

drop table t;

create table t
( object_id number,
object_name char(255),
object_type varchar2(20),
status varchar2(10),
constraint t_pk primary key(object_id)
);

insert into t
select object_id, object_name, object_type,status
from all_objects;

commit;

create index idx_t_1 on t(object_id,object_name);

-- Make sure we have CBO
analyze table t compute statistics
for table
for all indexes
for all indexed columns;

select count(*) from t; -- 53541

select object_id, object_name
from t
where object_id = nvl(:n ,object_id);

-- After looking at the above query, you would immediately get a question .. .why the hell we need that nvl() in the where clause ?

-- The requirement is : If a value is passed to the bind variable, we will get the data for that particular row, otherwise we get all the rows back.

-- Let's say we are using 10 for the bind variable n.

set autotrace traceonly;

select object_id, object_name
from t
where object_id = nvl(:n ,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 2828772549

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 259 | 458 (2)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_1 | 1 | 259 | 458 (2)| 00:00:06 |
--------------------------------------------------------------------------------

So, CBO did a index fast full scan on the index IDX_T_1 and the number of consistent gets is 2076.

Say now we need object_type also in the select.

select object_id, object_name, object_type
from t
where object_id = nvl(10 ,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 1601196873

--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 271 | 489 (2)| 00:00:06 |
|* 1 | TABLE ACCESS FULL| T | 1 | 271 | 489 (2)| 00:00:06 |
--------------------------------------------------------------------------

As you see CBO did a full table can and the number of consistent gets is 2160.

Say we are not passing null for the above bind variable.

select object_id, object_name, object_type
from t
where object_id = nvl(null,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 1601196873

--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 271 | 489 (2)| 00:00:06 |
|* 1 | TABLE ACCESS FULL| T | 1 | 271 | 489 (2)| 00:00:06 |
--------------------------------------------------------------------------

CBO did a full table access and the number of consistent gets is : 5301 (very high).

If you did notice, the plans change with the columns in the select list.

So, what to do now ?

Just add the new column in the select list to the index. CBO will do a index fast full scan, a high speed method to use an index as a skinnier version of the table.
It is fast since it does a multi-block read unlike a single block read like index full scan.
Following quote from Tom Kyte : "Index fast full scan will be used, in general when your query references only the indexed columns and a full scan of the table can be avoided
by a fast full scan."

Why to wait, let's give a shot.

drop index idx_t_2;

create index idx_t_2 on t(object_id,object_name, object_type);

-- Make sure we have CBO

analyze table t compute statistics
for table
for all indexes
for all indexed columns;

-- For one record
select object_id, object_name, object_type
from t
where object_id = nvl(10,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 267 | 476 (2)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 267 | 476 (2)| 00:00:06 |
--------------------------------------------------------------------------------

-- For all records
select object_id, object_name, object_type
from t
where object_id = nvl(null,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 267 | 476 (2)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 267 | 476 (2)| 00:00:06 |
--------------------------------------------------------------------------------

Take a look at the difference in CPU cost between the full table scan and index scan.

Since idx_t_2 is a concatenated index, let's see what CBO does in these cases ...

Case 1 : If we just select the last column in the index.

select object_type
from t
where object_id = nvl(null,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 12 | 476 (2)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 12 | 476 (2)| 00:00:06 |
--------------------------------------------------------------------------------

Case 2 : Select the columns in reverse order

select object_type, object_name, object_id
from t
where object_id = nvl(null,object_id);

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 267 | 476 (2)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 267 | 476 (2)| 00:00:06 |
--------------------------------------------------------------------------------

Case 3 : Change the where clause to use the last column in the index.

select object_type, object_name
from t
where object_type = 'TABLE'

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 2505 | 643K| 474 (1)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 2505 | 643K| 474 (1)| 00:00:06 |
--------------------------------------------------------------------------------

Case 4 : Change the where clause to use the columns of the index in reverse order ....

Execution Plan
----------------------------------------------------------
Plan hash value: 813020976

--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 263 | 474 (1)| 00:00:06 |
|* 1 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 263 | 474 (1)| 00:00:06 |
--------------------------------------------------------------------------------

Case 5 : If we change where clause from : object_id = nvl(null,object_id);
to : object_id = :n;

CBO understands that we are looking for "a" particular value and uses Index Unique Scan.

Execution Plan
----------------------------------------------------------
Plan hash value: 1303508680

------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 267 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| T | 1 | 267 | 2 (0)| 00:00:01 |
|* 2 | INDEX UNIQUE SCAN | T_PK | 1 | | 1 (0)| 00:00:01 |
------------------------------------------------------------------------------------

Take a look at the CPU cost ... it's just super fast.

Case 6 : Since Index fast full scan doesn't sort the results by default, let's see what happens in this case :

select object_type, object_name
from t
where object_id = nvl(10,object_id)
order by object_name;

Execution Plan
----------------------------------------------------------
Plan hash value: 1771868443

---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 267 | 477 (2)| 00:00:06 |
| 1 | SORT ORDER BY | | 1 | 267 | 477 (2)| 00:00:06 |
|* 2 | INDEX FAST FULL SCAN| IDX_T_2 | 1 | 267 | 476 (2)| 00:00:06 |
---------------------------------------------------------------------------------

So, does a IFFS and then does a sort.

Wait a minute, why did I say "Index fast full scan doesn't sort the results by default" ? Coz, Index full scan does.

Let's see an example :

Case 7 : If you are ordering the results by primary key, then that extra step of "SORT ORDER BY" would be eliminated and CBO does a Index full scan.

select object_id, object_name, object_type from t order by object_id;

Execution Plan
----------------------------------------------------------
Plan hash value: 1399892806

------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 53541 | 13M| 2444 (1)| 00:00:30 |
| 1 | TABLE ACCESS BY INDEX ROWID| T | 53541 | 13M| 2444 (1)| 00:00:30 |
| 2 | INDEX FULL SCAN | T_PK | 53541 | | 177 (2)| 00:00:03 |
------------------------------------------------------------------------------------

If you notice, there is no extra step "SORT ORDER BY" and see the cost it took to do the scan : 177

Case 8 : What if we order by a non PK column ?

Then CBO will choose IFFS and then does a SORT order by.

Eg :

select object_id, object_name, object_type from t order by object_name;

Execution Plan
----------------------------------------------------------
Plan hash value: 1771868443

-----------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
-----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 53541 | 13M| | 3565 (1)| 00:00:43 |
| 1 | SORT ORDER BY | | 53541 | 13M| 28M| 3565 (1)| 00:00:43 |
| 2 | INDEX FAST FULL SCAN| IDX_T_2 | 53541 | 13M| | 474 (1)| 00:00:06 |
-----------------------------------------------------------------------------------------

Don't get carried away with the indexes now ..... based on the cost calculations if CBO thinks that going to the table directly, instead of
doing that extra step of going to the index, get the rowid and then go to the table, it will choose to do to a full table scan.

So once again recite : "Full Scans are not always evil and indexes are not always good".

Hope this helps !!

"Long live Tom Kyte".

r-a-v-i

Thursday, July 3, 2008

Sql Tricks : Min(), Max(), ename for min sal, ename for Max sal in the same row

Let's say our requirement is to find out the Min() and Max() salaries of employees in each department with their names. So, the output we want is :

DEPTNO MAX_SAL MAX_ENAME MIN_SAL MIN_ENAME
10 5000 KING 1300 MILLER
20 3000 SCOTT 800 SMITH
30 2850 BLAKE 950 JAMES

Let's if a union does it for us ?

select deptno, ename, sal , 0 min_sal
from emp
where sal in (select max(sal) max_sal from emp group by deptno)
union
select deptno, ename, 0 , sal
from emp
where sal in (select min(sal) min_sal from emp group by deptno);

DEPTNO ENAME SAL MIN_SAL
10 KING 5000 0
10 MILLER 0 1300
20 FORD 3000 0
20 SCOTT 3000 0
20 SMITH 0 800
30 BLAKE 2850 0
30 JAMES 0 950

This gives the results, but not as we want...... both, min() and max() in the same row, with ename.

This can be achieved in 3 ways:

Solution 1 : This is the most simplest approach. Using max, min, substr(), to_number(), to_char()

select deptno, to_number (substr (xmax, 1, 14)) max_sal,
substr (xmax, 15) max_name, to_number (substr (xmin, 1, 14)) min_sal,
substr (xmin, 15) min_name
from (select deptno, max (to_char (sal, '0000000000.00') || ename) xmax,
min (to_char (sal, '0000000000.00') || ename) xmin
from emp
group by deptno);

Solution 2 : Traditional approach : Using Inline views

How about doing this : Let's calculate the max(sal), min(sal) and get the enames of employees having those sal's.

select deptno,
max_sal,
(select ename
from emp
where sal = max_sal) max_ename,
min_sal,
(select ename
from emp
where sal = min_sal) min_ename
from (select deptno, min(sal) min_sal, max(sal) max_sal
from emp
group by deptno
order by deptno);

You will get the error : "ORA-01472: single-row subquery returns more than one row."

If you notice, in deptno, there are two employees with the same max(sal) = 3000 [Ford and Scott].

So, when the query "select ename from emp where sal = max_sal' is getting executed, there are two rows
with max_sal value. If you try the above query only for departments 10,30, you won't get this error.

Let's take a look :

select deptno,
max_sal,
(select ename from emp where sal = max_sal) max_ename,
min_sal,
(select ename from emp where sal = min_sal) min_ename
from (select deptno, min(sal) min_sal, max(sal) max_sal
from emp
where deptno in (10, 30)
group by deptno
order by deptno);

So, how to get over the error above ? Include "rownum =1" in the where clause.
Here we go :

select deptno,
max_sal,
(select ename from emp where sal = max_sal and rownum =1) max_ename,
min_sal,
(select ename from emp where sal = min_sal and rownum =1) min_ename
from (select deptno, min(sal) min_sal, max(sal) max_sal
from emp
group by deptno
order by deptno);

DEPTNO MAX_SAL MAX_ENAME MIN_SAL MIN_ENAME
10 5000 KING 1300 MILLER
20 3000 SCOTT 800 SMITH
30 2850 BLAKE 950 JAMES


Solution 3 : Analytics always rock !!

select deptno, max(case when rn <> 1 then sal else null end) max_sal,
max(case when rn <> 1 then ename else null end) max_ename,
max(decode(rn, 1, sal)) min_sal, max(decode(rn, 1, ename)) min_ename
from (select *
from (select deptno,
row_number() over(partition by deptno order by sal) rn,
max(sal) over(partition by deptno) max_sal,ename, sal
from emp)
where rn = 1 or sal = max_sal)
group by deptno;

Source : AskTom.

Long Live "Tom Kyte".

Good Luck,
r-a-v-i

Wednesday, June 25, 2008

How to generate Trace & TKProf analysis ?

Autotrace :

- Go to sqlplus and login as scott/tiger.

Issue the following command at the sqlplus prompt :

sql> set autotrace on

set autotrace on; -- Results, Explain Plan, Statistics

set autotrace traceonly explain; -- Explain Plan

set autotrace traceonly explain statistics; -- Explain Plan, Statistics

Now if you run the following query you will see the subsequent output from trace.

sql> select max(sal) from emp;

MAX(SAL)
----------
5000


Execution Plan
----------------------------------------------------------
Plan hash value: 2083865914

---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 4 | 3 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 4 | | |
| 2 | TABLE ACCESS FULL| EMP | 14 | 56 | 3 (0)| 00:00:01 |
---------------------------------------------------------------------------

Statistics
----------------------------------------------------------
232 recursive calls
0 db block gets
49 consistent gets
0 physical reads
0 redo size
411 bytes sent via SQL*Net to client
381 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
6 sorts (memory)
0 sorts (disk)
1 rows processed

TKPROF

However, TKPROF provides more information than trace. To get the tkprof information, follow these steps :

a)
exec dbms_monitor.session_trace_enable( waits=>true );
OR
sql> alter session set sql_trace=true

sql> alter system set timed_statistics=true;

Log out of sqlplus.

Note : Issues these grants to scott, if you are not able to set the above.

grant alter session to scott;

grant alter system to scott;

b) cd \oracle\product\10.2.0\admin\[Oracle SID]\udump

Note : Replace with your SID from tnsnames.ora.

c)In Linux :

$ ls -t | head -1

The ouput of this command would be a file with an extension .trc.

Eg : test_ora_2996.trc

d) Now, tkprof the trace file into a temporary text file so that you can see.

$ tkprof test_ora_2996.trc tmp.txt explain=scott/tiger

e) $ vi tmp.txt

Look for the tkprof output for the sql you have fired.

ps : To quit from vi, just issue :q

In the text file, look for : select max(sal) from emp

select max(sal)
from
emp


call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.01 0.01 0 0 0 0
Execute 1 0.00 0.00 0 0 0 0
Fetch 2 0.00 0.00 0 7 0 1
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 4 0.01 0.01 0 7 0 1

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 54 (SCOTT)

Rows Row Source Operation
------- ---------------------------------------------------
1 SORT AGGREGATE (cr=7 pr=0 pw=0 time=61 us)
14 TABLE ACCESS FULL EMP (cr=7 pr=0 pw=0 time=54 us)


Rows Execution Plan
------- ---------------------------------------------------
0 SELECT STATEMENT MODE: ALL_ROWS
1 SORT (AGGREGATE)
14 TABLE ACCESS MODE: ANALYZED (FULL) OF 'EMP' (TABLE)

f) If you don't want the tmp.txt anymore,

$ rm tmp.txt

Note : You need to use the 10046 trace event to get waits in the trace file.

sql> alter session set events '10046 trace name context forever, level 12'

instead of sql_trace=true;

That's it.

References :

Auto Trace

Explain Plan

TKProf Explanation

Thursday, June 5, 2008

Oracle Analytics / Analytic Function: How to avoid “referencing the table twice in queries having aggregate functions”.

Database : Oracle 10g
Schema : Scott/Tiger
Table : emp

Analytic Functions syntax :
Function(arg1,..., argn) OVER ( [PARTITION BY <...>] [ORDER BY <....>] [] )

We come across this very often in our development world.

We need to apply an aggregate function on a column and then get another column based on that aggregate.

Let me give an example.

Say "We need to find out the Names of those employees who earn the max salary for each department".

Immediately, we jump in and write a query similar to this:

select *
from emp
where sal in (select max(e.sal) sal from emp e group by e.deptno);

Even though the table is referenced twice, this query runs very fast on the "emp" table, coz the table is very small.

But, what if your table is having millions and millions of records?

So, now the questions is "how to avoid referencing the table twice?”

Once again Analytic functions rock and roll !!

Here we go:

select * from (
select e.* ,max(sal) over (partition by deptno) max_sal
from emp e)e
where sal = max_sal;

Long Live "Tom Kyte".

Good Luck,
r-a-v-i

Oracle 10g : Connect by prior

Database : Oracle 10g
Schema : Scott/Tiger

If you have a hiearchial table, the following generic query presents the data in a hierarchical form.

column graph format a10;

select rpad( '*', 2*level, '*' ) graph, level, t.*
from t
connect by prior p_node = c_node;

p_node = parent node
c_node = child node

If you want to get the hierarchy of a particular node, use "starts with".

Eg :

column graph format a10
select rpad( '*', 2*level, '*' ) graph, level, t.*
from t
start with c_node = '&node_number'
connect by prior p_node = c_node;

Let's take a look at the standard emp table on scott/tiger schema.

To see the full hierarchy in the table (employee -> Manager relationship), fire the following query :

select rpad( '*', 2*level, '*' ) graph, level, e.*
from emp e
connect by prior e.mgr = e.empno
order by deptno;

Let's start with an employee in a department and see what happens.

select rpad( '*', 2*level, '*' ) graph, level, e.*
from emp e
start with e.empno = 7934
connect by prior e.mgr = e.empno
order by deptno;

Long live "Tom kyte".

Good Luck,
r-a-v-i

Wednesday, June 4, 2008

Using "case ....end" in order by

Schema : scott/tiger
Version : Oracle 10g

Say your manager stops by and asks you to produce a result like this, from the standard "emp" table :

DEPTNO ENAME JOB
10 CLARK MANAGER
10 MILLER CLERK
10 KING PRESIDENT
20 JONES MANAGER
20 SCOTT ANALYST
20 FORD ANALYST
20 SMITH CLERK
20 ADAMS CLERK
30 BLAKE MANAGER
30 JAMES CLERK
30 WARD SALESMAN
30 ALLEN SALESMAN
30 TURNER SALESMAN
30 MARTIN SALESMAN

So, if you notice, for each department the manager records shows up first and then the rest of the records in the department are sorted by Job.

So, how do we get this result ?

Once again, Analytics rock and roll !!

Here we go :

select e.deptno, e.ename, e.job
from scott.emp e
order by deptno,
case
when job = 'MANAGER' then
0
else
1
end;

This is how this query works :

Looking at the JOB column, there are jobs before and after 'MANAGER'. Eg : 'ANALYST' comes before 'MANAGER' and 'SALAESMAN' comes after the 'MANAGER'.
So, even if order by "asc" or "desc" we won't achieve the result. The magic is using "case ...end" in order by.
The statement : "case when job='MANAGER' then 0 else 1 end" says : if the job is 'MANAGER' then order by it otherwise, order by the job column.
So, within each partition(or group) the 'MANAGER' records becomes the first record.

Long live "Tom Kyte".

Good Luck !!
r-a-v-i

Thursday, May 29, 2008

Oracle Analytics / Analytic Functions: first_value()

Oracle database version : 10g
Schema : Scott

Analytic Functions Syntax :
Function(arg1,..., argn) OVER ( [PARTITION BY <...>] [ORDER BY <....>] [] )

On the “emp” table, say the requirement is to “get the highest paid job in each department”.

select * from emp order by deptno asc, sal desc;

If you run the above select query, you will see that the result should be :

DEPTNO JOB
10 PRESIDENT
30 MANAGER
20 ANALYST

So, how do we get this result ?

Analytics just Rock & Roll !!!

Note : This is one of the very common requirements … based on the column1, get the column2 for the respective max(column3).

Here we go …

select distinct deptno, first_value(job) over (partition by deptno order by sal DESC ) job
from emp;


first_value() : Returns the first value in an ordered set of values. If the first value in the set is null, then the function returns NULL unless you specify IGNORE NULLS

Note : Google for last_value() function as well.

Following is an example from asktom.

Script to create the test table and data.

CREATE TABLE myTable (
Ship_date DATE NOT NULL
,Ship_type VARCHAR2(10) NOT NULL
,Shipment NUMBER(6,0) NOT NULL
)
/
INSERT INTO myTable VALUES('01-01-2000', 'SHIP1', 27);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP1', 26);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP1', 25);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP2', 24);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP2', 23);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP2', 22);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP3', 21);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP3', 20);
INSERT INTO myTable VALUES('01-01-2000', 'SHIP3', 19);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP1', 18);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP1', 17);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP1', 16);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP2', 15);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP2', 14);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP2', 13);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP3', 12);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP3', 11);
INSERT INTO myTable VALUES('01-01-2001', 'SHIP3', 10);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP1', 9);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP1', 8);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP1', 7);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP2', 6);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP2', 5);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP2', 4);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP3', 3);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP3', 2);
INSERT INTO myTable VALUES('01-01-2002', 'SHIP3', 1);
COMMIT;

Requirement 1 : Select only those rows that have the greatest Shipment value, per Ship_type, per Ship_date.

Solution :

select *
from (select myTable.*,
max(shipment) over(partition by ship_date, ship_type) max_shipment
from myTable)
where shipment = max_shipment

Requirement 2 : select only those rows that have the greatest Shipment value, per Ship_type, per Ship_date, showing ONLY those records with greatest Ship_date per Ship_type.

The result should be :

SHIP_DATE SHIP_TYPE SHIPMENT
---------- ---------- ----------
01-01-2002 SHIP1 9
01-01-2002 SHIP2 6
01-01-2002 SHIP3 3

Solution :

select * from (
select a.*, max(ship_date) over(partition by ship_type) max_ship_date
from (select myTable.*, max(shipment) over(partition by ship_date,
ship_type) max_shipment from myTable
) a
where shipment = max_shipment
) where ship_date = max_ship_date;


Long live “Tom Kyte”.

Good Luck,
r-a-v-i

Insert with check option

One of the ways to validate data while insertion, directly at the database level is : INSERT …. WITH CHECK OPTION

Let’s take an example and see what it is.

Database version : Oracle 10g.

Schema : Scott

Let’s create a test table from the standard “emp” table and work on it.

create table test_emp as select * from emp;

Let us suppose that our requirement is something like this :

Hence forth, in Department 30 manager’s should have commission between 750 – 1000 (included).

For our requirement above, the select query would be :

select *
from test_emp e
where e.deptno = 30
and e.job = 'MANAGER'
and e.comm >= 750
and e.comm <= 1000;


Usually the developers take care of this validation on the front-end itself. If there is only one point of data entry to your database (Eg : Web front-end), it’s okay to validate at the entry point. But think of a situation where there are multiple point of data entries to your database (Eg : Web front-end, Feeds, Web Services, JMS, MDB’s …etc.,).

So,

a) You have to duplicate the validation (it is almost impossible to maintain a common code base between these many discrete systems. Even if you have a common code base, on system might just not invoke the validation at all.)

b) If a common code base for validations is not used, then there could be a bug or missing implementation in any of the systems.

So, how about having a validation just before inserting the data into the database ? PL/SQL strikes immediately right 

We can put the volition logic in a pl/sql procedure/function and let all the above discrete systems invoke it.

But…….. there is a performance overhead. We are adding another layer after data access layer. For simple/moderate systems, it is okay. What we have a high volume system.

Oracle “almost always” has an option.

Oracle prohibits any changes to the table or view that would produce the rows that are not included in the sub query.

Make the above select query as a sub query to insert() as shown below.

insert into
(select *
from test_emp e
where e.deptno = 30
and e.job = 'MANAGER'
and e.comm >= 750
and e.comm <= 1000) values (7935, 'New Guy', 'MANAGER', 7782, sysdate, 1600.00, 500, 30);


The data that we are trying to insert in this query is against our rules, as the commission is = 500.

If you execute the above insert, Oracle still inserts the record.

select * from test_emp e where e.empno = 7935;


1 7935 New Guy MANAGER 7782 5/29/2008 9:02:05 AM 1600.00 500.00 30

Provide “WITH CHECK OPTION” in the sub query as shown below :

insert into
(select *
from test_emp e
where e.deptno = 30
and e.job = 'MANAGER'
and e.comm >= 750
and e.comm <= 1000 with check option)
values
(7935, 'New Guy', 'MANAGER', 7782, sysdate, 1600.00, 500, 30);


When you try to execute the above insert, you will get an error message :

ORA-01402 : view WITH CHECK OPTION where-clause violation

So, no matter where the data is coming from, we could perform a validation directly on the insert itself.

Also, note that we are NOT performing an extra query, as the sub query is within the insert itself.

Useful links :

http://www.oracle.com/technology/oramag/oracle/04-mar/o24asktom.html
http://www.orafaq.com/node/55
http://www.psoug.org/reference/analytic_functions.html
http://asktom.oracle.com/pls/asktom/f?p=100:11:0::::P11_QUESTION_ID:12864646978683
http://asktom.oracle.com/pls/asktom/f?p=100:11:0::::P11_QUESTION_ID:3170642805938

Hope this helps.

Long Live “Tom Kyte”.

Good Luck,
r-a-v-i

Tuesday, May 6, 2008

Oracle Analytics/Analytic Functions : How to get number of rows(count ) returned by a given query, in the same query ?

Often we, the Java/Oracle developers run into the situation, where we need to find the number of rows (count) returned by a query.
You can just find out the count by firing 2 queries.
Let's take an example and see how it works.

Schema : scott/tiger

Analytic Functions Syntax :

Function(arg1,..., argn) OVER ( [PARTITION BY <...>] [ORDER BY <....>] [] )

Let's suppose that,we are dealing with the employees in departments 10, 20.

Query#1 :

SELECT empno, deptno
FROM emp
WHERE deptno IN (10, 20)
ORDER BY deptno, empno;

Let's suppose that the requirement is : if the number of employees in departments 10,20 is more than 10, then set their manager as SCOTT (EmpNo#7788).
(Some dummy requirement ...please don't bother about it).

So, now we need to find out the count(). Let's find out the count.

Query#2 :

SELECT sum(count(*))
FROM emp
WHERE deptno IN (10, 20)
group by deptno;

This query would return the count, but if you did notice, we don't have the "empno" column in the select list.

We can't have "empno" in the column list as it is not in the group by columns. So, to get the empno of each department,we have to
fire the Query#2.

So, the conclusion is, using this approach without firing 2 queries we can't get the count() of rows returned by Query#1.

In Java world, you can use ScrollableResultSet to get the count. But again, you are doing an extra operation there to get the count.

So....without doing any extra operation or firing an extra query, how to get the count ?

Oracle Analytics comes to rescue.

Query#3 :

SELECT empno, deptno, count(*) over() cnt
FROM emp
WHERE deptno IN (10, 20)
ORDER BY deptno, empno;

OVER(), acts on entire record set returned by the where clause. Here is the result of Query#3.

EMPNO DEPTNO CNT
1 7782 10 8
2 7839 10 8
3 7934 10 8
4 7369 20 8
5 7566 20 8
6 7788 20 8
7 7876 20 8
8 7902 20 8

If you notice, using this approach, we have 2 benefits :

1) We can find the count() of rows returned by a query
2) We can also include other columns in the select list which are not part of the where clause.
Since we are not using "group by", we are out of it's limitations.

Long live "Tom Kyte".

Hope this helps !!

Good Luck,
r-a-v-i

Wednesday, March 5, 2008

Java/Oracle : Should I Encrypt / Hash the passwords ? What is the difference ?

The standard question asked by a Java/Oracle developer.

Should I Encrypt OR Hash the passwords ? What is the difference ?

The BIG difference between Encryption and Hashing is that, the data that is encrypted, should be able to be decrypted. Whereas, the data that is hashed, CANNOT be reversed.

Let's take an example.

Usually we authenticate users to log on to our systems or web sites. So, following are the steps :

1. User logs onto a web site and provides user name/password.
2. We need to authenticate the user and if the credentials are valid, log him in or deny access.

So, in step 2, we just need to make sure that he entered a correct password. For that, we DON'T need to store the user's password either in text form or in an encrypted form.
In either of these cases, I mean either you store the password in text form or in encrypted form, there is a possibility that the password can be stolen and reversed.
If we store the password in the HASH form, since it cannot be reversed, we are SAFE.
Unix/Oracle ...etc., follow the same methodology for authentication.

So, how do we apply hashing on plain text passwords.

Let us suppose that the password is : password123

Then the query would look like :

select dbms_crypto.hash(utl_raw.cast_to_raw('password123'),dbms_crypto.HASH_MD5) hashed_password from dual;

If you notice, the hash() takes a second parameter where you can specify, which hashing algorithm you would like to use.

-- Hash Functions
HASH_MD4 CONSTANT PLS_INTEGER := 1;
HASH_MD5 CONSTANT PLS_INTEGER := 2;
HASH_SH1 CONSTANT PLS_INTEGER := 3;

If you are using Oracle 10g, then you can use dbms_crypto.hash().
Otherwise, if you are using Oracle 8i/9i, you have to use - DBMS_OBFUSCATION_TOOLKIT.MD5.

Take a stab at the Oracle Guru's web site : http://asktom.oracle.com/tkyte/Misc/Passwords.html


Long Live "Tom Kyte".


Good Luck !!

r-a-v-i

Tuesday, March 4, 2008

Oracle 10g : PL/SQL : Conditional Compilation

If you are a pl/sql developer and if you looking for best practices on unit testing, this is for you.

Usually we run into situations where we need to debug some code, but we don't want to be running the debug code in production. Do we ???

Oracle 10g provides a beautiful feature - conditional compilation. (Like #ifdef in C).

Let's see an example.

Create a test procedure ...

create or replace procedure test_proc
as
begin
dbms_output.put_line( 'Debug 1' );
dbms_output.put_line( 'Debug 2' );
$IF $$debug_code $THEN
dbms_output.put_line( 'Debug 3' );
dbms_output.put_line( 'Debug 4' );
$END
dbms_output.put_line( 'Debug 5' );
end;

set echo on;
set serveroutput on;

Let us suppose that we are running this procedure on our dev/qa environment.

Then, you have to run the following command :
------------------------------------------------------------------------------------
SQL> alter procedure test_proc compile plsql_ccflags = 'debug_code:true' reuse settings;

Procedure altered

SQL>
------------------------------------------------------------------------------------

Now let's run the procedure and see what the output is ....
------------------------------------------------------------------------------------
SQL> exec test_proc;

Debug 1
Debug 2
Debug 3
Debug 4
Debug 5

PL/SQL procedure successfully completed

SQL>
------------------------------------------------------------------------------------

Let's suppose that we are all set to go to production and we need to deploy this procedure on production and we don't want all the debugging.

Then, execute the command :

------------------------------------------------------------------------------------
SQL> alter procedure test_proc compile plsql_ccflags = 'debug_code:false' reuse settings;

Procedure altered

SQL>
------------------------------------------------------------------------------------
That's it. Let's see the output of the procedure, if we run it on production.

------------------------------------------------------------------------------------
SQL> exec test_proc;

Debug 1
Debug 2
Debug 5

PL/SQL procedure successfully completed

SQL>
------------------------------------------------------------------------------------
So, on production when the debug is disabled, you might have noticed that the statements "Debug 3" and "Debug 4" did not print.

Plan B : If you don't want to pollute your code, you can you Log4PLSQL.

Long Live "Tom Kyte".

Good Luck !!

r-a-v-i

Oracle 10g : How to delete duplicate rows ?

If you are working on a table with millions of rows and you have to delete the duplicates, most probably you should have run into situation where the query takes hours and hours and hours ....... Say your DATA table has about 10 million rows and you are attempting to delete the duplicate rows.

delete from test1
where rowid not in (select min(rowid)
from test1 group by grp_id);

Even if you try to wear a DBA's hat and increase the rollback segment size, you may not see a big difference.

You basically have two ways to go :
a) Create a new table by selecting the unique rows from the data table ,drop the data table, rename the new table to "data". But if your table is heavily being referenced by procedures/functions ... you may not do it some times. Then what to do ?
b) Use analytics.

Approach (A) :

create table new_data
as
select grp_id, ...
from
(select d.*,
row_number() over
(partition by grp_id
order by rowid) rn
from data d
)
where rn = 1;

Approach (B) :

delete from data
where rowid in
(select rid
from
(select rowid rid,
row_number() over
(partition by
grp_id
order by rowid) rn
from data
)
where rn <> 1
);

You would be surprised by the speed of analytics. It will most probably come down to minutes.

Long Live "Tom Kyte".


Good Luck !!
r-a-v-i

Oracle 10g : Case-Insensitive Searching

Say in the famous Scott's "emp" table, I have data like : "Ravi", "rAvi", "RAvi" ...etc.,. So, if I need to fire a query for "ravi", the standard practice is to create a Upper(emp_name) index and fire the following query :

select * from emp where upper(emp_name) like upper('%raVI%');

Prior to Oracle 10g, you had to adopt one of the following strategies:

* Use a function-based index on UPPER (column_name) and modify the queries to use WHERE UPPER (column_name) = value.
* Use a trigger to roll the column value to upper- or lowercase upon modification.
* Use Oracle Text to create a TEXT index on the column; text indexes (which would then mandate the use of the CONTAINS operator in the predicate) can be case-sensitive or -insensitive.

In each of these cases, one of your conditions would have been violated. You would have needed to use triggers, or UPPER() or CONTAINS in the WHERE clause.

In Oracle 10g, you can do this transparently, without effecting your query. Which means that, you don't have to use UPPER() or CONTAINS in your where clause.

Wait ...wait ...I know ... you will believe only if I show you an example. right ?

Here you go ...

drop table cit;

create table cit ( data varchar2(20) );

insert into cit values ( 'Ravi' );
insert into cit values ( 'rAVi' );
insert into cit values ( 'rAvI' );

commit;

/*Create a function-based index on the DATA column and use the binary case insensitive sort*/

create index cit_idx on cit( nlssort( data, 'NLS_SORT=BINARY_CI' ) );

select * from cit where data = 'ravi';
--You will get zero records.

alter session set nls_comp=ansi;
alter session set nls_sort=binary_ci;


select * from cit where data = 'ravi';
--You will get 3 records.

--Now, let's take a look at the explain plan. CBO is using FTS.
--Let's fake CBO that there are 10000000 in the table.
exec dbms_stats.set_table_stats (ownname=>user,tabname=>'CIT',numrows=> 10000000);

--Now run the select again and take a look at the explain plan.
select * from cit where data = 'ravi';

Execution Plan
------------------------------
SELECT STATEMENT (Cost=2)
TABLE ACCESS (BY INDEX ROWID)
INDEX (RANGE SCAN) OF 'CIT_IDX'

Isn't it just awsome !!


Long live "Tom Kyte".


Source : http://www.oracle.com/technology/oramag/oracle/04-jul/o44asktom.html

Good Luck !!

r-a-v-i

Oracle 10g : How to pass ARRAYS of records from Java/Tomcat to Oracle

Environment : JDK 1.5, Tomcat 5.5, Oracle 10gR2.

Let's suppose that you have a web app where you get some records from the user interface and from your DAO, you are trying to pass the records as Oracle ARRAYS to database. Here is a step by step example.

Yes, it's a nightmare as you have to take care of some steps. But, once you understand what to do and what are the issues, it's pretty easy.

Use Case : Let us suppose that we have a java bean Employee and we are trying to send an array of employee records at a time to database.

/*Step 1 : Create a object type in the database*/
/*
CREATE OR REPLACE TYPE "EMP_TYPE" is object(
emp_id Varchar2(500),
emp_name varchar2(500));

*/
/*
Step 2 : Create a type EMP_TYPE_TABLE

CREATE OR REPLACE TYPE "EMP_TYPE_TAB";
*/

/*Step 3 : Create a Java bean which maps the attributes of the above object type in Step 2.*/

import java.io.Serializable;
import java.sql.SQLData;
import java.sql.SQLException;
import java.sql.SQLInput;
import java.sql.SQLOutput;

public class Employee implements SQLData, Serializable{
static final long serialVersionUID = 4070409649129120458L;
public Employee(){}

// constructor that takes parameters
// getters and setters for emp_id, emp_name
// You have to implement readSQL() and writeSQL() methods, as shown below.
// This is where you are mapping the Employee table's columns to the Employee
//java bean.

public void readSQL(SQLInput stream, String typeName) throws SQLException {
this.emp_id = stream.readString();
this.emp_name = stream.readString();
}
public void writeSQL(SQLOutput stream) throws SQLException {
stream.writeString(emp_id);
stream.writeString(emp_name);
}
}

//EmpDAO class gets a connection to the database and passes the data.

public class EmpDAO{
java.sql.Connection conn;
java.sql.Connection dconn;
/*
Step 1 : Get database connection
This is a very important step. To pass your records of data as Arrays, you need to get a oracle.jdbc.driver.T4CConnection and then use ArrayDescriptor's. So, how do you get a T4CConnection ?


To get T4CConnection from java.sql.Connection, you need to cast like this :
t4cConn = ((DelegatingConnection)conn).getInnermostDelegate();

If you are working on tomcat, you have two options to get a DataSource in your context.xml.
a) By using apache commons-dbcp
OR
b) by directly using javax.sql.DataSource.
Let's see how to get the T4CConnection in both these cases.
*/

public void sendRecordsToDB(){

//Use Case (a) : if you configured apache commons-dbcp
BasicDataSource ds = (BasicDataSource)ctx.lookup(jndiName);
ds.setAccessToUnderlyingConnectionAllowed(true);
conn = ds.getConnection();
dconn = ((DelegatingConnection)conn).getInnermostDelegate();

//Use Case (b) : if you are directly using javax.sql.DataSource

BasicDataSource bds = new BasicDataSource();
bds.setDriverClassName("");
bds.setUsername("");
bds.setPassword("");
bds.setUrl("jdbc:oracle:thin:@");
bds.setAccessToUnderlyingConnectionAllowed(true);
conn = bds.getConnection();
dconn = ((DelegatingConnection)conn).getInnermostDelegate();

/*So, using either of the above approaches we got dconn, which is an instance of T4CConnection.*/

/* Now let's build an array list of employees.
*/
final List listOfEmployees = new LinkedList();

Employee e1 = new Employee();
e.setEmpId(1);
e.setEmpName("Ravi");

listOfEmployees.add(e1);

Employee e2 = new Employee();
e.setEmpId(2);
e.setEmpName("Vedala");

listOfEmployees.add(e2);

// Now, create an array descriptor

ArrayDescriptor descriptor = ArrayDescriptor.createDescriptor( "EMP_TYPE_TAB", dconn );

ARRAY array_to_pass = new ARRAY( descriptor, dconn, (Object[]) listOfEmployees.toArray());
ps = (OracleCallableStatement)dconn.prepareCall("begin insert_employees(:1); end;");
ps.setARRAY( 1, array_to_pass );
ps.execute();
conn.commit();
}

/*
- See how simple and beautiful is the procedure.
- Using the TABLE() function, you can treat the whole array as a table as EMP_TYPE_TAB is a nested table.
*/

PROCEDURE insert_employees(p_emparray in EMP_TYPE_TAB) AS
BEGIN
/* INSERT ARRAY OF RECORDS IN TO THE EMP TABLE*/
INSERT INTO scd_company_staging
(emp_id,emp_name)
SELECT * FROM TABLE(p_empparray);
END insert_employees;

The nightmare exception for Java/Oracle developers :-)

java.lang.ClassCastException: oracle.jdbc.driver.T4CConnection cannot be cast to oracle.jdbc.OracleConnection
at oracle.sql.ArrayDescriptor.createDescriptor(ArrayDescriptor.java:149)
at oracle.sql.ArrayDescriptor.createDescriptor(ArrayDescriptor.java:115)
...........

Solution :
a)You will see the above exception, if you have ojdbc14.jar in your war file. You would be having ojdbc14.jar on your classpath for compiling your java classes. Use it only for compilation. Don't include it in the build to Tomcat. ie., the war file of your web app should NOT have ojdbc14.jar in it.

b) Make sure that the Oracle thin driver (eg : ojdbc14.jar) is in tomcat's common\lib.

Long live "Tom Kyte".


Good Luck !!
r-a-v-i

Saturday, March 1, 2008

Oracle 10g : How to pass ARRAYS to Oracle using Java?

Environment : JDK 1.5 (did not test on older versions of java but it should work on them !!)

Keep the JDBC driver in the classpath : '''ojdbc14.jar'''

Note : Oracle ARRAY is supported on Oracle9i Database version 9.0.1 or later.

Take a look at the package : Tests

'''Java Code''' :
(Look at the technique - don't look at the java standard practices like using try-catch-finally ...etc., to keep the code readable, I have just taken them out.)
----------------------------------------------------------------------------------------------------------
import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import oracle.jdbc.OracleTypes;
import oracle.sql.ARRAY;
import oracle.sql.ArrayDescriptor;
public class TestArray {
private static Connection getConnection() {
Connection conn = null;
try {
DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());
conn =
DriverManager.getConnection("",
"test", "test");
conn.setAutoCommit(false);
System.out.println("Connection :- " + conn);
} catch (SQLException e) {
System.out.println(e.getMessage());
}
return conn;
}

private static void insertEmp(Connection conn) throws Exception {
/* let us create 20 names in a string array*/
String[] names = new String[20];
for (int i = 0; i < 20; i++) {
names[i] = "Ravi" + i;
}
/*Create an array descriptor of type STRINGS */
ArrayDescriptor desc =
ArrayDescriptor.createDescriptor("STRINGS", conn);
/* Create an array object */
ARRAY nameArray = new ARRAY(desc, conn, names);
String st = "begin Tests.insert_emp(:1); end;";
CallableStatement cs = conn.prepareCall(st);
cs.setArray(1, nameArray);
cs.execute();
cs.close();
conn.commit();
System.out.println("Successfully INSERTED " + names.length +
" EMPLOYEE records :- ");
}

private static void getEmp(Connection conn) throws Exception {
String[] names = { };
/*Create an array descriptor of type STRINGS */
ArrayDescriptor desc =
ArrayDescriptor.createDescriptor("STRINGS", conn);
/* Create an array object */
ARRAY nameArray = new ARRAY(desc, conn, names);
String st = "begin Tests.get_emp(:1); end;";
CallableStatement cs = conn.prepareCall(st);
/* Register the out parameter as STRINGS */
cs.registerOutParameter(1, OracleTypes.ARRAY, "STRINGS");
cs.execute();
/* Get the array into a local ARRAY object */
nameArray = (ARRAY)cs.getArray(1);
System.out.println("Array is of type " + nameArray.getSQLTypeName());
System.out.println("Array element is of type code "+nameArray.getBaseType());
System.out.println("Array is of length " + nameArray.length());
/* Get the array of names into a string array */
names = (String[])nameArray.getArray();
System.out.println("*****************************************************************");
System.out.println("Retrieving ONLY the names of employees as an ARRAY \n");
System.out.println("*****************************************************************");
for (int i = 0; i < names.length; i++)
System.out.println(names[i]);
cs.close();
}

private static void getEmpRecs(Connection conn) throws SQLException {
/* Create an array of objects (since we are getting heterogeneous data)*/
Object[] empRecs = { };
/* Create an array descriptor for EMP_REC_ARRAY */
ArrayDescriptor desc =
ArrayDescriptor.createDescriptor("EMP_REC_ARRAY", conn);
ARRAY nameArray = new ARRAY(desc, conn, empRecs);
String st = "begin Tests.get_emp_recs(:1); end;";
CallableStatement cs = conn.prepareCall(st);
/* Register OUT param as EMP_REC_ARRAY */
cs.registerOutParameter(1, OracleTypes.ARRAY, "EMP_REC_ARRAY");
cs.execute();
/* Get the Array into a local ARRAY object */
nameArray = (ARRAY)cs.getArray(1);
System.out.println("Array is of type " + nameArray.getSQLTypeName());
System.out.println("Array element is of type code "+nameArray.getBaseType());
System.out.println("Array is of length " + nameArray.length());
/* Get the Employee Records */
empRecs = (Object[])nameArray.getArray();
System.out.println("***********************************************************");
System.out.println("Retrieving the EMPLOYEE RECORDS as an ARRAY !!\n");
System.out.println("***********************************************************");
int id = 0;
String name = "";
for (int i = 0; i < empRecs.length; i++) {
/* Since we don't know the type of the record, get it into STRUCT !! */
oracle.sql.STRUCT empRec = (oracle.sql.STRUCT)empRecs[i];
/* Get the attributes - nothing but the columns of the table */
Object[] attributes = empRec.getAttributes();
/* 0- first column, 1 - second column ...*/
id = Integer.parseInt("" + attributes[0]);
name = "" + attributes[1];
System.out.println("id = " + id + " name = " + name);
}
cs.close();
}

public static void main(String[] args) throws Exception {
Connection conn = null;
try {
conn = getConnection();
/*Insert 20 employee names*/
insertEmp(conn);
/*Get the 20 employee names*/
getEmp(conn);
/*Get the 20 employee RECORDS*/
getEmpRecs(conn);
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
if (conn != null)
conn.close();
}
}
}

----------------------------------------------------------------------------------------------------------
'''SQL Scripts :'''

Create a test table and the types :

create table emp(id number , name varchar2(50))
/

create or replace type Strings IS VARRAY(20) of VARCHAR2(30)
/

create or replace type emp_rec IS OBJECT (id number, name varchar2(30))
/

create or replace type emp_rec_array AS VARRAY(100) of emp_rec
/

create or replace package Tests is

-- Author : r-a-v-i

procedure insert_emp(emp_names Strings);
procedure get_emp(emp_names out Strings);
procedure get_emp_recs(emp_recs out emp_rec_array);
end Tests;
/
create or replace package body Tests is
procedure insert_emp(emp_names Strings)
as
cnt Integer := 1;
v_id Integer := 0;
begin
dbms_output.put_line(emp_names.COUNT);
loop
if cnt > emp_names.COUNT then
exit;
else
dbms_output.put_line(cnt||' = '||emp_names(cnt));
select nvl(max(id),0) + 1 into v_id from emp;
insert into emp(id,name) values (v_id,emp_names(cnt));
end if;
cnt := cnt + 1;
end loop;
commit;
end;
procedure get_emp(emp_names out Strings)
as
v_emp_names Strings := Strings();
cursor c1 is select name from emp order by id;
v_name varchar2(30);
cnt Integer := 1;
begin
open c1;
loop
fetch c1 into v_name;
if c1%notfound then exit;
else
dbms_output.put_line(cnt||' = '||v_name);
v_emp_names.extend;
v_emp_names(cnt) := v_name;
end if;
cnt := cnt + 1;
end loop;
emp_names := v_emp_names;
end;
procedure get_emp_recs(emp_recs out emp_rec_array)
as
v_emp_recs emp_rec_array := emp_rec_array();
cursor c1 is select id,name from emp order by id;
v_id Integer;
v_name varchar2(30);
v_emp_rec emp_rec;
cnt Integer := 1;
begin
open c1;
loop
fetch c1 into v_id,v_name;
if c1%notfound then exit;
else
v_emp_rec := emp_rec(v_id,v_name);
v_emp_recs.extend;
v_emp_recs(cnt) := v_emp_rec;
end if;
cnt := cnt + 1;
end loop;
emp_recs := v_emp_recs;
end;
end Tests;
/

Long live "Tom Kyte".


----------------------------------------------------------------------------------------------------------