-- UPDATE, JULY 2006 --
Here is a reliable way to use the analytical function ROW_NUMBER()
to delete duplcates with one sql statement, very fast!
/* ©2006 by Edward Stoever, */
DELETE FROM goremal
WHERE ROWID IN (
--
SELECT ROWID
FROM (SELECT ROWID,
ROW_NUMBER () OVER
(PARTITION BY goremal_pidm ORDER BY goremal_preferred_ind DESC,
goremal_activity_date DESC) AS rownumber,
goremal_pidm, goremal_emal_code,
goremal_email_address, goremal_status_ind,
goremal_preferred_ind, goremal_activity_date
FROM goremal
WHERE goremal_email_address
|| goremal_pidm
|| goremal_emal_code IN (
SELECT goremal_email_address
|| goremal_pidm
|| goremal_emal_code
FROM goremal
GROUP BY goremal_pidm,
goremal_emal_code,
goremal_email_address
HAVING COUNT (*) > 1)
ORDER BY goremal_pidm,
goremal_preferred_ind DESC,
goremal_activity_date DESC)
WHERE rownumber <> 1
--
);
-- In the above deletion, you can figure out what it is doing by working from the
-- inner-most query out. The first query finds duplicates where the new primary
-- key, based on three columns, are concatenated together. Then, it just looks
-- throught the table and finds those values again from all rows, orders them
-- based upon criteria most desirable for keeping such as an indicator, a date.
-- It deletes the duplicates that are not most desirable, and if there are any
-- exact duplcates, it only deletes one of them. Works great and is easier to use
-- and understand than the PL/SQL below!
-- ORACLE - DELETE DUPLICATES EASILY
-- I recently did some work for a private client whose database was designed in a
-- very sloppy manner. The database had no primary keys or foreign keys at all!
-- I needed a way to delete duplicate rows so that I could apply primary keys
-- in all the necessary places. Most queries that look for duplcate rows are
-- extremely slow. One query that can find such rows is:
/*
SELECT col_1, col_2, COUNT (*)
FROM table_name
GROUP BY col_1, col_2
HAVING COUNT (*) > 1;
*/
-- Note: this table would have a primary key on the combination of col_1 and col_2
-- This query is fast. The poblem with it is that it returns BOTH rows that
-- duplicate each other and we only want to delete ONE of the rows in order to apply our
-- primary key. The following anonymous block transforms the above query into a
-- delete statement for one of the rows, and it is still very fast!
DECLARE -- Code ©2004 by Edward Stoever
CURSOR c_get_duplicates
IS
SELECT user_login, user_password, COUNT (*)
FROM sp.sp_users
GROUP BY user_login, user_password
HAVING COUNT (*) > 1;
var_get_duplicates c_get_duplicates%ROWTYPE;
CURSOR c_del_only_one
IS
SELECT ROWID
FROM sp.sp_users
WHERE user_login = var_get_duplicates.user_login
AND user_password = var_get_duplicates.user_password;
var_del_only_one ROWID;
BEGIN
OPEN c_get_duplicates;
LOOP
FETCH c_get_duplicates
INTO var_get_duplicates;
EXIT WHEN c_get_duplicates%NOTFOUND;
OPEN c_del_only_one;
FETCH c_del_only_one
INTO var_del_only_one;
DELETE FROM sp.sp_users
WHERE ROWID = var_del_only_one;
COMMIT;
CLOSE c_del_only_one;
END LOOP;
CLOSE c_get_duplicates;
END;
/
--NOTE - This script needs a tweak to handle values that might be NULL.
--Here is an example of how I ran it recently on values that were NULL.
DECLARE -- Code ©2004 by Edward Stoever
CURSOR c_get_duplicates
IS
SELECT ssrfees_term_code, ssrfees_crn, ssrfees_detl_code,
ssrfees_ftyp_code, ssrfees_levl_code, COUNT (*)
FROM ssrfees
HAVING COUNT (*) > 1
GROUP BY ssrfees_term_code,
ssrfees_crn,
ssrfees_detl_code,
ssrfees_ftyp_code,
ssrfees_levl_code;
var_get_duplicates c_get_duplicates%ROWTYPE;
CURSOR c_del_only_one
IS
SELECT ROWID
FROM ssrfees
WHERE ssrfees_term_code = var_get_duplicates.ssrfees_term_code
AND ssrfees_crn = var_get_duplicates.ssrfees_crn
AND ssrfees_detl_code = var_get_duplicates.ssrfees_detl_code
AND NVL(ssrfees_ftyp_code,'1') = NVL(var_get_duplicates.ssrfees_ftyp_code,'1')
AND NVL(ssrfees_levl_code,'1') = NVL(var_get_duplicates.ssrfees_levl_code,'1');
var_del_only_one ROWID;
BEGIN
OPEN c_get_duplicates;
LOOP
FETCH c_get_duplicates
INTO var_get_duplicates;
EXIT WHEN c_get_duplicates%NOTFOUND;
OPEN c_del_only_one;
FETCH c_del_only_one
INTO var_del_only_one;
DELETE FROM ssrfees
WHERE ROWID = var_del_only_one;
COMMIT;
CLOSE c_del_only_one;
END LOOP;
CLOSE c_get_duplicates;
END;
/
|