Wednesday, June 26, 2013

How to find duplicate value in a row

Comments:
        You can use this proc in 2 different ways.
         1. If you want to compare all columns in a table then only specify a table name like
         
         exec Find_duplicate_values_in_row 'Table_name'

         2.If you want to compare in specific columns then run this proc like

         exec Find_duplicate_values_in_row 'Table_name','column_1,column_2,column_3'

========================================================================

   I created a table  [GETDUPLICATEENTRYINROW @#$]  in this proc.
   So plz check if this table already exists in your database or not.
   If there is a table of same name in your database ,
   then replace this table name in my procdure by
   another unique table name which does not exists in your database

========================================================================
   I used a function 'fncsvtotable' in this proc.
   I am providing this function seprately.
   Download this function and run it first , then run the main procdure.

Thanks


Function 'FnCsvToTable'

-- select col_value from fncsvtotable('2,3')

CREATE FUNCTION [dbo].[fnCsvToTable]
(
@csv_string VARCHAR(max)
)

RETURNS @output_table_tmp TABLE(col_value VARCHAR(100))

AS

BEGIN

DECLARE @value VARCHAR(100)

WHILE LEN(@csv_string) > 0
BEGIN
SET @value = LTRIM(LEFT(@csv_string, ISNULL(NULLIF(CHARINDEX(',', @csv_string) - 1, -1), LEN(@csv_string))))
SET @csv_string = SUBSTRING(@csv_string,ISNULL(NULLIF(CHARINDEX(',', @csv_string), 0),
  LEN(@csv_string)) + 1, LEN(@csv_string))
IF ISNULL(@value, '') <> ''
INSERT INTO @output_table_tmp VALUES (@value)
END

RETURN
END


Main Procdure 'Find_duplicate_values_in_row'

create proc find_duplicate_values_in_row
(
@tablename varchar(1000)
,@columnname varchar(max)='all'
)
as
begin
set nocount on

/*==================================================================

   i created a table  [getduplicateentryinrow @#$]  in this proc.
   so plz check if this table already exists in your database or not.
   if there is a table of same name in your database , then replace
   this table name in my procdure by another unique table name which
   does not exists in your database

----------------------------------------------------------------------------------

   i used a function 'fncsvtotable' in this proc.
   i am providing this function below in comment.
   copy this function and run it first , then run the main procdure.

---------------------------------------------------------------------------------

create function [dbo].[fncsvtotable]
(
@csv_string varchar(max)
)

returns @output_table_tmp table(col_value varchar(100))

as

begin

declare @value varchar(100)

while len(@csv_string) > 0
begin
set @value = ltrim(left(@csv_string, isnull(nullif(charindex(',', @csv_string) - 1, -1), len(@csv_string))))
set @csv_string = substring(@csv_string,isnull(nullif(charindex(',', @csv_string), 0),
  len(@csv_string)) + 1, len(@csv_string))
if isnull(@value, '') <> ''
insert into @output_table_tmp values (@value)
end

return
end

===============================================================*/

if not exists(select 1
 from information_schema.tables
 where table_name=@tablename)
begin
select 'table does not exists.'
return
end
create table #table_structure
(
row_number int
,column_name varchar(1000)
)

create table #temp_match_xy
(
row_no int
,x varchar(max)
,y  varchar(max)
)

create table #final_table
(
row_number int
,value varchar(max)
,count int
)

if @columnname='all'
begin
insert into #table_structure(row_number,column_name)
select ordinal_position,column_name from information_schema.columns
where table_name=@tablename
order by ordinal_position
end
else
begin
declare @columns_not_exists varchar(max)
set @columns_not_exists=''

select @columns_not_exists=@columns_not_exists + ','+ column_name
 from (select column_name
from information_schema.columns
where table_name=@tablename
and column_name in (select col_value
from fncsvtotable(@columnname)
group by col_value)
union all
select col_value
from fncsvtotable(@columnname)
group by col_value) as asdf
group by column_name
having count(*)=1

if @columns_not_exists<>''
begin
select stuff(@columns_not_exists + ' columns does not exists in specified table',1,1,'')
return
end

insert into #table_structure
select row_number() over (order by col_value) as rownumber,col_value
from fncsvtotable(@columnname)
end

declare @query varchar(max)
declare @column_name_csv varchar(max)
set @column_name_csv=''
(select @column_name_csv=@column_name_csv + ','+ column_name from #table_structure)
set @column_name_csv=(select stuff(@column_name_csv,1,1,''))


set @query='select row_number() over (order by (select top 1 column_name from #table_structure)) as [identity column @#$],'+ @column_name_csv +
' into [getduplicateentryinrow @#$] from  ' +@tablename

exec(@query)


declare @x varchar(max)
set @x=''
declare @y varchar(max)
set @y=''

declare @i varchar(10)
set @i=1

declare @j varchar(10)
set @j=1

declare @k varchar(10)
set @k=1

declare @count varchar(10)
set @count=1

while(@k<= (select max([identity column @#$]) from [getduplicateentryinrow @#$]))
begin
while(@i<= (select max(row_number) from #table_structure))
begin
-- print('i='+@i)
set @x=(select top 1 'select top 1 ''1'',' + column_name + ' from [getduplicateentryinrow @#$]
where [identity column @#$]=' + @k from #table_structure where row_number=@i)

delete from #temp_match_xy where row_no=1

insert into #temp_match_xy(row_no,x)
exec(@x)
--exec('select * from #temp_match_xy')
while(@j <= (select max(row_number) from #table_structure))
begin
-- print('j='+@j)
set @y=(select top 1 'select top 1 ''2'',' + column_name + ' from [getduplicateentryinrow @#$]
where  [identity column @#$]=' + @k from #table_structure where row_number=@j)

delete from #temp_match_xy where row_no=2

insert into #temp_match_xy(row_no,y)
exec(@y)
--exec('select * from #temp_match_xy')

if (@i<>@j)
begin
if((select x from #temp_match_xy where row_no=1)=(select y from #temp_match_xy where row_no=2))
begin
set @count=@count+1
--print('count='+@count)
insert into #final_table(row_number,value,count)
select @k,(select x from #temp_match_xy where row_no=1),@count
end
end
set @j=@j+ 1
end
set @count=1
set @j=1
set @i=@i+1
end
set @i=1
set @k=@k+1
end


-- select * from #final_table

select ROW_NUMBER,value as DUPLICATE_VALUE,max(count) as COUNT from #final_table
group by row_number,value

-- select * from #table_structure
drop table #table_structure
drop table #final_table
drop table #temp_match_xy
drop table [getduplicateentryinrow @#$]
set nocount off

end









2 comments:

  1. hi prashant...in the first table when you have 4 coloumn,at rownum=5 you have to duplicate values 1 and 3 but yor result show that you have duplicate value 1 and at rownum 7 you dont have any duplicate value but your result still shows duplicate_value is 3 and count is 2.
    Have a look...
    otherwise great work..:):)

    ReplyDelete
  2. You reading this result wrong. Result is showing

    row_no dup_val count
    ----------------------------
    5 1 2
    5 3 2

    means in mail table
    row no 5 which is
    1 3 3 1
    has 2 duplicate values first is 1 and other is 3

    ReplyDelete