Sql Authority to Make easy to overflowing codes,scripts,queries,projects: How to find duplicate value in a row

Comments:
You can use this proc in 2 different ways.
1. If you want to compare all columns in a table then only specify a table name like

exec Find_duplicate_values_in_row 'Table_name'

2.If you want to compare in specific columns then run this proc like

exec Find_duplicate_values_in_row 'Table_name','column_1,column_2,column_3'

========================================================================

I created a table [GETDUPLICATEENTRYINROW @#$] in this proc.
So plz check if this table already exists in your database or not.
If there is a table of same name in your database ,
then replace this table name in my procdure by
another unique table name which does not exists in your database

========================================================================
I used a function 'fncsvtotable' in this proc.
I am providing this function seprately.
Download this function and run it first , then run the main procdure.

Thanks

Function 'FnCsvToTable'

-- select col_value from fncsvtotable('2,3')

CREATE FUNCTION [dbo].[fnCsvToTable]
(
@csv_string VARCHAR(max)
)

RETURNS @output_table_tmp TABLE(col_value VARCHAR(100))

AS

BEGIN

DECLARE @value VARCHAR(100)

WHILE LEN(@csv_string) > 0
BEGIN
SET @value = LTRIM(LEFT(@csv_string, ISNULL(NULLIF(CHARINDEX(',', @csv_string) - 1, -1), LEN(@csv_string))))
SET @csv_string = SUBSTRING(@csv_string,ISNULL(NULLIF(CHARINDEX(',', @csv_string), 0),
LEN(@csv_string)) + 1, LEN(@csv_string))
IF ISNULL(@value, '') <> ''
INSERT INTO @output_table_tmp VALUES (@value)
END

RETURN
END

Main Procdure 'Find_duplicate_values_in_row'

create proc find_duplicate_values_in_row
(
@tablename varchar(1000)
,@columnname varchar(max)='all'
)
as
begin
set nocount on

/*==================================================================

i created a table [getduplicateentryinrow @#$] in this proc.
so plz check if this table already exists in your database or not.
if there is a table of same name in your database , then replace
this table name in my procdure by another unique table name which
does not exists in your database

----------------------------------------------------------------------------------

i used a function 'fncsvtotable' in this proc.
i am providing this function below in comment.
copy this function and run it first , then run the main procdure.

---------------------------------------------------------------------------------

create function [dbo].[fncsvtotable]
(
@csv_string varchar(max)
)

returns @output_table_tmp table(col_value varchar(100))

as

begin

declare @value varchar(100)

while len(@csv_string) > 0
begin
set @value = ltrim(left(@csv_string, isnull(nullif(charindex(',', @csv_string) - 1, -1), len(@csv_string))))
set @csv_string = substring(@csv_string,isnull(nullif(charindex(',', @csv_string), 0),
len(@csv_string)) + 1, len(@csv_string))
if isnull(@value, '') <> ''
insert into @output_table_tmp values (@value)
end

return
end

===============================================================*/

if not exists(select 1
from information_schema.tables
where table_name=@tablename)
begin
select 'table does not exists.'
return
end
create table #table_structure
(
row_number int
,column_name varchar(1000)
)

create table #temp_match_xy
(
row_no int
,x varchar(max)
,y varchar(max)
)

create table #final_table
(
row_number int
,value varchar(max)
,count int
)

if @columnname='all'
begin
insert into #table_structure(row_number,column_name)
select ordinal_position,column_name from information_schema.columns
where table_name=@tablename
order by ordinal_position
end
else
begin
declare @columns_not_exists varchar(max)
set @columns_not_exists=''

select @columns_not_exists=@columns_not_exists + ','+ column_name
from (select column_name
from information_schema.columns
where table_name=@tablename
and column_name in (select col_value
from fncsvtotable(@columnname)
group by col_value)
union all
select col_value
from fncsvtotable(@columnname)
group by col_value) as asdf
group by column_name
having count(*)=1

if @columns_not_exists<>''
begin
select stuff(@columns_not_exists + ' columns does not exists in specified table',1,1,'')
return
end

insert into #table_structure
select row_number() over (order by col_value) as rownumber,col_value
from fncsvtotable(@columnname)
end

declare @query varchar(max)
declare @column_name_csv varchar(max)
set @column_name_csv=''
(select @column_name_csv=@column_name_csv + ','+ column_name from #table_structure)
set @column_name_csv=(select stuff(@column_name_csv,1,1,''))

set @query='select row_number() over (order by (select top 1 column_name from #table_structure)) as [identity column @#$],'+ @column_name_csv +
' into [getduplicateentryinrow @#$] from ' +@tablename

exec(@query)

declare @x varchar(max)
set @x=''
declare @y varchar(max)
set @y=''

declare @i varchar(10)
set @i=1

declare @j varchar(10)
set @j=1

declare @k varchar(10)
set @k=1

declare @count varchar(10)
set @count=1

while(@k<= (select max([identity column @#$]) from [getduplicateentryinrow @#$]))
begin
while(@i<= (select max(row_number) from #table_structure))
begin
-- print('i='+@i)
set @x=(select top 1 'select top 1 ''1'',' + column_name + ' from [getduplicateentryinrow @#$]
where [identity column @#$]=' + @k from #table_structure where row_number=@i)

delete from #temp_match_xy where row_no=1

insert into #temp_match_xy(row_no,x)
exec(@x)
--exec('select * from #temp_match_xy')
while(@j <= (select max(row_number) from #table_structure))
begin
-- print('j='+@j)
set @y=(select top 1 'select top 1 ''2'',' + column_name + ' from [getduplicateentryinrow @#$]
where [identity column @#$]=' + @k from #table_structure where row_number=@j)

delete from #temp_match_xy where row_no=2

insert into #temp_match_xy(row_no,y)
exec(@y)
--exec('select * from #temp_match_xy')

if (@i<>@j)
begin
if((select x from #temp_match_xy where row_no=1)=(select y from #temp_match_xy where row_no=2))
begin
set @count=@count+1
--print('count='+@count)
insert into #final_table(row_number,value,count)
select @k,(select x from #temp_match_xy where row_no=1),@count
end
end
set @j=@j+ 1
end
set @count=1
set @j=1
set @i=@i+1
end
set @i=1
set @k=@k+1
end

-- select * from #final_table

select ROW_NUMBER,value as DUPLICATE_VALUE,max(count) as COUNT from #final_table
group by row_number,value

-- select * from #table_structure
drop table #table_structure
drop table #final_table
drop table #temp_match_xy
drop table [getduplicateentryinrow @#$]
set nocount off

end

2 comments:

UnknownJune 26, 2013 at 10:58 PM
hi prashant...in the first table when you have 4 coloumn,at rownum=5 you have to duplicate values 1 and 3 but yor result show that you have duplicate value 1 and at rownum 7 you dont have any duplicate value but your result still shows duplicate_value is 3 and count is 2.
Have a look...
otherwise great work..:):)
UnknownJune 27, 2013 at 8:00 AM
You reading this result wrong. Result is showing

row_no dup_val count
----------------------------
5 1 2
5 3 2

means in mail table
row no 5 which is
1 3 3 1
has 2 duplicate values first is 1 and other is 3

Sql Authority to Make easy to overflowing codes,scripts,queries,projects

Pages

Wednesday, June 26, 2013

How to find duplicate value in a row

2 comments: