Comments:
You can use this proc in 2 different ways.
1. If you want to compare all columns in a table then only specify a table name like
exec Find_duplicate_values_in_row 'Table_name'
2.If you want to compare in specific columns then run this proc like
exec Find_duplicate_values_in_row 'Table_name','column_1,column_2,column_3'
========================================================================
I created a table [GETDUPLICATEENTRYINROW @#$] in this proc.
So plz check if this table already exists in your database or not.
If there is a table of same name in your database ,
then replace this table name in my procdure by
another unique table name which does not exists in your database
========================================================================
I used a function 'fncsvtotable' in this proc.
I am providing this function seprately.
Download this function and run it first , then run the main procdure.
Thanks
You can use this proc in 2 different ways.
1. If you want to compare all columns in a table then only specify a table name like
exec Find_duplicate_values_in_row 'Table_name'
2.If you want to compare in specific columns then run this proc like
exec Find_duplicate_values_in_row 'Table_name','column_1,column_2,column_3'
========================================================================
I created a table [GETDUPLICATEENTRYINROW @#$] in this proc.
So plz check if this table already exists in your database or not.
If there is a table of same name in your database ,
then replace this table name in my procdure by
another unique table name which does not exists in your database
========================================================================
I used a function 'fncsvtotable' in this proc.
I am providing this function seprately.
Download this function and run it first , then run the main procdure.
Thanks
Function 'FnCsvToTable'
-- select col_value from fncsvtotable('2,3')
CREATE FUNCTION [dbo].[fnCsvToTable]
(
@csv_string VARCHAR(max)
)
RETURNS @output_table_tmp TABLE(col_value VARCHAR(100))
AS
BEGIN
DECLARE @value VARCHAR(100)
WHILE LEN(@csv_string) > 0
BEGIN
SET @value = LTRIM(LEFT(@csv_string, ISNULL(NULLIF(CHARINDEX(',', @csv_string) - 1, -1), LEN(@csv_string))))
SET @csv_string = SUBSTRING(@csv_string,ISNULL(NULLIF(CHARINDEX(',', @csv_string), 0),
LEN(@csv_string)) + 1, LEN(@csv_string))
IF ISNULL(@value, '') <> ''
INSERT INTO @output_table_tmp VALUES (@value)
END
RETURN
END
Main Procdure 'Find_duplicate_values_in_row'
create proc find_duplicate_values_in_row
(
@tablename varchar(1000)
,@columnname varchar(max)='all'
)
as
begin
set nocount on
/*==================================================================
i created a table [getduplicateentryinrow @#$] in this proc.
so plz check if this table already exists in your database or not.
if there is a table of same name in your database , then replace
this table name in my procdure by another unique table name which
does not exists in your database
----------------------------------------------------------------------------------
i used a function 'fncsvtotable' in this proc.
i am providing this function below in comment.
copy this function and run it first , then run the main procdure.
---------------------------------------------------------------------------------
create function [dbo].[fncsvtotable]
(
@csv_string varchar(max)
)
returns @output_table_tmp table(col_value varchar(100))
as
begin
declare @value varchar(100)
while len(@csv_string) > 0
begin
set @value = ltrim(left(@csv_string, isnull(nullif(charindex(',', @csv_string) - 1, -1), len(@csv_string))))
set @csv_string = substring(@csv_string,isnull(nullif(charindex(',', @csv_string), 0),
len(@csv_string)) + 1, len(@csv_string))
if isnull(@value, '') <> ''
insert into @output_table_tmp values (@value)
end
return
end
===============================================================*/
if not exists(select 1
from information_schema.tables
where table_name=@tablename)
begin
select 'table does not exists.'
return
end
create table #table_structure
(
row_number int
,column_name varchar(1000)
)
create table #temp_match_xy
(
row_no int
,x varchar(max)
,y varchar(max)
)
create table #final_table
(
row_number int
,value varchar(max)
,count int
)
if @columnname='all'
begin
insert into #table_structure(row_number,column_name)
select ordinal_position,column_name from information_schema.columns
where table_name=@tablename
order by ordinal_position
end
else
begin
declare @columns_not_exists varchar(max)
set @columns_not_exists=''
select @columns_not_exists=@columns_not_exists + ','+ column_name
from (select column_name
from information_schema.columns
where table_name=@tablename
and column_name in (select col_value
from fncsvtotable(@columnname)
group by col_value)
union all
select col_value
from fncsvtotable(@columnname)
group by col_value) as asdf
group by column_name
having count(*)=1
if @columns_not_exists<>''
begin
select stuff(@columns_not_exists + ' columns does not exists in specified table',1,1,'')
return
end
insert into #table_structure
select row_number() over (order by col_value) as rownumber,col_value
from fncsvtotable(@columnname)
end
declare @query varchar(max)
declare @column_name_csv varchar(max)
set @column_name_csv=''
(select @column_name_csv=@column_name_csv + ','+ column_name from #table_structure)
set @column_name_csv=(select stuff(@column_name_csv,1,1,''))
set @query='select row_number() over (order by (select top 1 column_name from #table_structure)) as [identity column @#$],'+ @column_name_csv +
' into [getduplicateentryinrow @#$] from ' +@tablename
exec(@query)
declare @x varchar(max)
set @x=''
declare @y varchar(max)
set @y=''
declare @i varchar(10)
set @i=1
declare @j varchar(10)
set @j=1
declare @k varchar(10)
set @k=1
declare @count varchar(10)
set @count=1
while(@k<= (select max([identity column @#$]) from [getduplicateentryinrow @#$]))
begin
while(@i<= (select max(row_number) from #table_structure))
begin
-- print('i='+@i)
set @x=(select top 1 'select top 1 ''1'',' + column_name + ' from [getduplicateentryinrow @#$]
where [identity column @#$]=' + @k from #table_structure where row_number=@i)
delete from #temp_match_xy where row_no=1
insert into #temp_match_xy(row_no,x)
exec(@x)
--exec('select * from #temp_match_xy')
while(@j <= (select max(row_number) from #table_structure))
begin
-- print('j='+@j)
set @y=(select top 1 'select top 1 ''2'',' + column_name + ' from [getduplicateentryinrow @#$]
where [identity column @#$]=' + @k from #table_structure where row_number=@j)
delete from #temp_match_xy where row_no=2
insert into #temp_match_xy(row_no,y)
exec(@y)
--exec('select * from #temp_match_xy')
if (@i<>@j)
begin
if((select x from #temp_match_xy where row_no=1)=(select y from #temp_match_xy where row_no=2))
begin
set @count=@count+1
--print('count='+@count)
insert into #final_table(row_number,value,count)
select @k,(select x from #temp_match_xy where row_no=1),@count
end
end
set @j=@j+ 1
end
set @count=1
set @j=1
set @i=@i+1
end
set @i=1
set @k=@k+1
end
-- select * from #final_table
select ROW_NUMBER,value as DUPLICATE_VALUE,max(count) as COUNT from #final_table
group by row_number,value
-- select * from #table_structure
drop table #table_structure
drop table #final_table
drop table #temp_match_xy
drop table [getduplicateentryinrow @#$]
set nocount off
end
hi prashant...in the first table when you have 4 coloumn,at rownum=5 you have to duplicate values 1 and 3 but yor result show that you have duplicate value 1 and at rownum 7 you dont have any duplicate value but your result still shows duplicate_value is 3 and count is 2.
ReplyDeleteHave a look...
otherwise great work..:):)
You reading this result wrong. Result is showing
ReplyDeleterow_no dup_val count
----------------------------
5 1 2
5 3 2
means in mail table
row no 5 which is
1 3 3 1
has 2 duplicate values first is 1 and other is 3