SQL SERVER – DATA PROFILING WITH SQL SCRIPT

CREATE DATABASE [Data_Profiling]
GO

USE [Data_Profiling]
GO

CREATE TABLE [dbo].[Data_Profile_ColumnStatistics] (
[Database_Name] [varchar](100) NULL
,[Table_Name] [varchar](100) NULL
,[Column_Name] [varchar](100) NULL
,[Row_Count] [int] NULL
,[Min_Value] [varchar](50) NULL
,[Max_Value] [varchar](50) NULL
,[Mean] [varchar](50) NULL
,[StdDev] [varchar](50) NULL
,[JobId] [int] NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[Data_Profile_LengthDistribution] (
[Database_Name] [varchar](100) NULL
,[Table_Name] [varchar](100) NULL
,[Column_Name] [varchar](100) NULL
,[Row_Count] [int] NULL
,[Min_Length] [int] NULL
,[Max_Length] [int] NULL
,[Length] [int] NULL
,[Count] [int] NULL
,[JobId] [int] NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[Data_Profile_NULL_Counts] (
[Database_Name] [varchar](100) NULL
,[Table_Name] [varchar](100) NULL
,[Column_Name] [varchar](100) NULL
,[SqlDbType] [varchar](100) NULL
,[Max_Length] [int] NULL
,[Precision] [int] NULL
,[Scale] [int] NULL
,[IsNullable] [varchar](100) NULL
,[Row_Count] [int] NULL
,[Null_Count] [int] NULL
,[JobId] [int] NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[Data_Profile_ValueDistribution] (
[Database_Name] [varchar](100) NULL
,[Table_Name] [varchar](100) NULL
,[Column_Name] [varchar](100) NULL
,[Row_Count] [int] NULL
,[NumberOfDistinctValues] [int] NULL
,[Value] [varchar](max) NULL
,[Count] [int] NULL
,[JobId] [int] NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO

--exec Data_Profiling 'DBName'
CREATE PROCEDURE [dbo].[Data_Profiling] (@dbname VARCHAR(50))
AS
BEGIN
--declare @dbname varchar(50)='xyz'
--declare @dbname varchar(50)='DBName
DECLARE @dataprofiling TABLE (
[ColumnName] [nvarchar](128) NULL
,[SchemaName] [nvarchar](128) NULL
,[TableName] [nvarchar](128) NULL
,[ObjectType] [nvarchar](60) NULL
,[DataType] [nvarchar](128) NULL
,[max_length] [smallint] NULL
,[is_nullable] [bit] NULL
,[column_id] [int] NULL
,[column_id_1] [int] NULL
,[object_id] [int] NULL
,[definition] [ntext] NULL
,precision INT NULL
,scale INT NULL
,seed_value INT
,increment_value INT
,last_value INT
,scdefination NVARCHAR(max)
,ccdefination NVARCHAR(max)
)
DECLARE @sql NVARCHAR(max);

SET @sql = 'select
  rtrim(ltrim(s.name)) as ColumnName,
    sh.name as SchemaName,
    o.name as TableName,
    o.type_desc AS ObjectType,
    t.name as DataType,
    s.max_length,
    s.is_nullable,
    ic.column_id,
    sc.column_id as column_id_1,
    cc.object_id,
    cc.definition,
    cast(S.precision as int) as precision,
    cast(s.scale as int) as scale,
   cast(ic.seed_value as int) as seed_value,
   cast(increment_value as int) as increment_value,
  cast(last_value as int) as last_value,
  sc.definition as scdefination,
 cc.definition as ccdefination
     from ' + @dbname + '.sys.tables  tx  join
    ' + @dbname + '.sys.columns                           s on tx.object_id=s.object_id
        INNER JOIN ' + @dbname + '.sys.types                   t ON s.system_type_id=t.user_type_id and t.is_user_defined=0
        INNER JOIN ' + @dbname + '.sys.objects                 o ON s.object_id=o.object_id
        INNER JOIN ' + @dbname + 
'.sys.schemas                sh on o.schema_id=sh.schema_id
        LEFT OUTER JOIN ' + @dbname + '.sys.identity_columns  ic ON s.object_id=ic.object_id AND s.column_id=ic.column_id
        LEFT OUTER JOIN ' + @dbname + '.sys.computed_columns  sc ON s.object_id=sc.object_id AND s.column_id=sc.column_id
        LEFT OUTER JOIN ' + @dbname + '.sys.check_constraints cc ON s.object_id=cc.parent_object_id AND s.column_id=cc.parent_column_id where s.name not like ''%-%'' and s.name not like ''%[%'' and s.name not like '' ''';

INSERT INTO @dataprofiling
EXEC (@sql)

DECLARE @BIP_sourcetable TABLE (
[ColumnName] [nvarchar](128) NULL
,[ObjectName] [nvarchar](257) NULL
,[ObjectType] [nvarchar](60) NULL
,[DataType] [nvarchar](151) NULL
,[Nullable] [varchar](8) NULL
,[MiscInfo] [ntext] NULL
,[DatabaseName] [nvarchar](100) NULL
,length INT
,precision INT NULL
,scale INT NULL
,[is_nullable] [bit] NULL
)

INSERT INTO @BIP_sourcetable (
[ColumnName]
,[ObjectName]
,[ObjectType]
,[DataType]
,[Nullable]
,[MiscInfo]
,DatabaseName
,length
,precision
,scale
,[is_nullable]
)
SELECT '[' + RTRIM(LTRIM(ColumnName)) + ']'
,SchemaName + '.' + '[' + TableName + ']' AS ObjectName
,ObjectType
,CASE 
WHEN [DataType] IN ('char', 'varchar')
THEN [DataType] + '(' + CASE 
WHEN max_length < 0
THEN 'MAX'
ELSE CONVERT(VARCHAR(10), max_length)
END + ')'
WHEN [DataType] IN ('nvarchar', 'nchar')
THEN [DataType] + '(' + CASE 
WHEN max_length < 0
THEN 'MAX'
ELSE CONVERT(VARCHAR(10), max_length / 2)
END + ')'
WHEN [DataType] IN ('numeric')
THEN [DataType] + '(' + CONVERT(VARCHAR(10), precision) + ',' + CONVERT(VARCHAR(10), scale) + ')'
ELSE [DataType]
END AS DataType
,CASE 
WHEN is_nullable = 1
THEN 'NULL'
ELSE 'NOT NULL'
END AS Nullable
,CASE 
WHEN column_id IS NULL
THEN ''
ELSE ' identity(' + ISNULL(CONVERT(VARCHAR(10), seed_value), '') + ',' + ISNULL(CONVERT(VARCHAR(10), increment_value), '') + ')=' + ISNULL(CONVERT(VARCHAR(10), last_value), 'null')
END + CASE 
WHEN column_id_1 IS NULL
THEN ''
ELSE ' computed(' + ISNULL(scdefination, '') + ')'
END + CASE 
WHEN object_id IS NULL
THEN ''
ELSE ' check(' + ISNULL(ccdefination, '') + ')'
END AS MiscInfo
,@dbname
,max_length
,precision
,scale
,[is_nullable]
FROM @dataprofiling

DECLARE @temptable TABLE (
id INT IDENTITY(1, 1)
,[ColumnName] NVARCHAR(100)
,[ObjectName] NVARCHAR(100)
,DatabaseName NVARCHAR(100)
,DataType VARCHAR(100)
,length INT
,precision INT NULL
,scale INT NULL
,[is_nullable] BIT
)
DECLARE @temptable1 TABLE (
id INT IDENTITY(1, 1)
,[ColumnName] NVARCHAR(100)
,[ObjectName] NVARCHAR(100)
,DatabaseName NVARCHAR(100)
,DataType VARCHAR(50)
,length INT
,precision INT NULL
,scale INT NULL
,[is_nullable] BIT
)
DECLARE @ColumnObjectName TABLE (
id INT IDENTITY(1, 1)
,ColumnName NVARCHAR(100) NULL
,ObjectName NVARCHAR(100) NULL
,DataType VARCHAR(50)
,DBName VARCHAR(50)
,length INT
,precision INT NULL
,scale INT NULL
,[is_nullable] BIT
)
DECLARE @ColumnObjectName1 TABLE (
id INT IDENTITY(1, 1)
,[ColumnName] NVARCHAR(100) NULL
,[ObjectName] NVARCHAR(100) NULL
,DataType VARCHAR(50)
,DBName VARCHAR(50)
,length INT
,precision INT NULL
,scale INT NULL
,[is_nullable] BIT
)
DECLARE @minMax TABLE (
id INT IDENTITY(1, 1)
,Minvalue FLOAT
,MaxValue FLOAT
,AvgValue FLOAT
,STDEVValue FLOAT
,DistinctCount INT
,Count INT
,ColumnName NVARCHAR(100) NULL
,ObjectName NVARCHAR(100) NULL
,P FLOAT NULL
,NullCount INT
,minLen NVARCHAR(100) NULL
,Maxlen NVARCHAR(100) NULL
)
DECLARE @Count_Distinct TABLE (
id INT IDENTITY(1, 1)
,Minvalue FLOAT
,MaxValue FLOAT
,AvgValue FLOAT
,STDEVValue FLOAT
,DistinctCount INT
,Count INT
,ColumnName NVARCHAR(100) NULL
,ObjectName NVARCHAR(100) NULL
,P FLOAT NULL
,NullCount INT
,minLen NVARCHAR(100) NULL
,Maxlen NVARCHAR(100) NULL
)

INSERT INTO @temptable (
[ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
,precision
,scale
,[is_nullable]
)
SELECT [ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
,precision
,scale
,[is_nullable]
FROM @BIP_sourcetable
WHERE DataType IN ('int', 'float', 'decimal', 'tinyint', 'smallint')

INSERT INTO @temptable1 (
[ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
,precision
,scale
,[is_nullable]
)
SELECT [ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
,precision
,scale
,[is_nullable]
FROM @BIP_sourcetable
WHERE DataType NOT IN ('int', 'float', 'decimal', 'tinyint', 'smallint', 'xml', 'Image', 'ntext')

DECLARE @dbname1 VARCHAR(50) = @dbname
DECLARE @max INT = (
SELECT MAX(id)
FROM @temptable
)
DECLARE @i INT = 1

WHILE @i <= @max
BEGIN
DECLARE @ColumnName VARCHAR(50) = (
SELECT [ColumnName]
FROM @temptable
WHERE id = @i
)
DECLARE @ObjectName VARCHAR(50) = (
SELECT [ObjectName]
FROM @temptable
WHERE id = @i
)
DECLARE @DataType VARCHAR(50) = (
SELECT DataType
FROM @temptable
WHERE id = @i
)
DECLARE @length INT = (
SELECT length
FROM @temptable
WHERE id = @i
)
DECLARE @precision INT = (
SELECT precision
FROM @temptable
WHERE id = @i
)
DECLARE @scale INT = (
SELECT scale
FROM @temptable
WHERE id = @i
)
DECLARE @is_nullable BIT = (
SELECT [is_nullable]
FROM @temptable
WHERE id = @i
)
DECLARE @sql1 NVARCHAR(max)

SET @sql1 = (
'select  min(' + @ColumnName + '),max(' + @ColumnName + '),avg(cast(' + @ColumnName + ' as bigint)) ,STDEV(' + @ColumnName + '),Count(distinct ' + @ColumnName + ' ) as DistinctCount ,
  Count(*) as Count,''' + @columnName + ''',''' + @ObjectName + ''',(100.0 * SUM(CASE WHEN ' + @ColumnName + ' IS NULL THEN 1 ELSE 0 END) / COUNT(*)) as p,
  SUM(CASE WHEN ' + @ColumnName + ' IS NULL THEN 1 ELSE 0 END) as NullCount,Min(len(' + @ColumnName + ')),Max(len(' + @ColumnName + '))  from ' + @dbname1 + '.' + @ObjectName + ''
)

INSERT INTO @minMax (
minvalue
,maxvalue
,AvgValue
,STDEVValue
,DistinctCount
,Count
,ColumnName
,ObjectName
,P
,NullCount
,minLen
,Maxlen
)
EXEC (@sql1)

INSERT INTO @ColumnObjectName (
ColumnName
,ObjectName
,DataType
,DBName
,length
,precision
,scale
,[is_nullable]
)
SELECT @ColumnName
,@ObjectName
,@DataType
,@dbname1
,@length
,@precision
,@scale
,@is_nullable

SET @i = @i + 1
END

DECLARE @max1 INT = (
SELECT MAX(id)
FROM @temptable1
)
DECLARE @i1 INT = 1

WHILE @i1 <= @max1
BEGIN
DECLARE @ColumnName1 VARCHAR(50) = (
SELECT [ColumnName]
FROM @temptable1
WHERE id = @i1
)
DECLARE @ObjectName1 VARCHAR(50) = (
SELECT [ObjectName]
FROM @temptable1
WHERE id = @i1
)
DECLARE @DataType1 VARCHAR(50) = (
SELECT DataType
FROM @temptable1
WHERE id = @i1
)
DECLARE @length1 INT = (
SELECT length
FROM @temptable1
WHERE id = @i1
)
DECLARE @precision1 INT = (
SELECT precision
FROM @temptable1
WHERE id = @i1
)
DECLARE @scale1 INT = (
SELECT scale
FROM @temptable1
WHERE id = @i1
)
DECLARE @is_nullable1 BIT = (
SELECT [is_nullable]
FROM @temptable1
WHERE id = @i1
)
DECLARE @sql2 NVARCHAR(max)

SET @sql2 = (
'select  Null,Null,Null,Null,Count(distinct ' + @ColumnName1 + ' ) as DistinctCount ,Count(*) as Count, ''' + @columnName1 + ''',
  ''' + @ObjectName1 + ''' ,(100.0 * SUM(CASE WHEN ' + @ColumnName1 + ' IS NULL THEN 1 ELSE 0 END) / COUNT(*)) as p,SUM(CASE WHEN ' + @ColumnName1 + ' IS NULL THEN 1 ELSE 0 END) as NullCount,
  Min(len(' + @ColumnName1 + ')),Max(len(' + @ColumnName1 + '))  from ' + @dbname1 + '.' + @ObjectName1 + ''
)

INSERT INTO @Count_Distinct (
minvalue
,maxvalue
,AvgValue
,STDEVValue
,DistinctCount
,Count
,ColumnName
,ObjectName
,p
,NullCount
,minLen
,Maxlen
)
EXEC (@sql2)

INSERT INTO @ColumnObjectName1 (
ColumnName
,ObjectName
,DataType
,DBName
,length
,precision
,scale
,[is_nullable]
)
SELECT @ColumnName1
,@ObjectName1
,@DataType1
,@dbname1
,@length1
,@precision1
,@scale1
,@is_nullable1

SET @i1 = @i1 + 1
END

DELETE
FROM [Data_Profiling].[dbo].[Data_Profile_NULL_Counts]
WHERE [Database_Name] = @dbname

INSERT INTO [Data_Profiling].[dbo].[Data_Profile_NULL_Counts] (
[Database_Name]
,[Table_Name]
,[Column_Name]
,[SqlDbType]
,[Max_Length]
,[Precision]
,[Scale]
,[IsNullable]
,[Row_Count]
,[Null_Count]
)
SELECT DISTINCT t.DataBaseName
,t.TableName
,t.ColumnName
,t.DataType
,t.length
,t.precision
,t.scale
,CASE 
WHEN t.is_nullable = 0
THEN 'No'
ELSE 'Yes'
END AS Nullable
,t.Count
,CASE 
WHEN t.NullCount IS NULL
THEN 0
ELSE t.NullCount
END AS NullCount
FROM (
SELECT c.ColumnName
,c.ObjectName AS TableName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @minMax m
JOIN @ColumnObjectName c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName

UNION

SELECT c.ColumnName
,c.ObjectName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @Count_Distinct m
JOIN @ColumnObjectName1 c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName
) t

DELETE
FROM [Data_Profiling].[dbo].[Data_Profile_ColumnStatistics]
WHERE [Database_Name] = @dbname

INSERT INTO [Data_Profiling].[dbo].[Data_Profile_ColumnStatistics] (
[Database_Name]
,[Table_Name]
,[Column_Name]
,[Row_Count]
,[Min_Value]
,[Max_Value]
,[Mean]
,[StdDev]
)
SELECT DISTINCT t.DataBaseName
,t.TableName
,t.ColumnName
,t.Count
,t.Minvalue
,t.MaxValue
,t.AvgValue
,t.StdValue
FROM (
SELECT c.ColumnName
,c.ObjectName AS TableName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @minMax m
JOIN @ColumnObjectName c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName

UNION

SELECT c.ColumnName
,c.ObjectName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @Count_Distinct m
JOIN @ColumnObjectName1 c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName
) t

DELETE
FROM [Data_Profiling].[dbo].[Data_Profile_LengthDistribution]
WHERE [Database_Name] = @dbname

INSERT INTO [Data_Profiling].[dbo].[Data_Profile_LengthDistribution] (
[Database_Name]
,[Table_Name]
,[Column_Name]
,[Row_Count]
,[Min_Length]
,[Max_Length]
,[Length]
,[Count]
)
SELECT DISTINCT t.DataBaseName
,t.TableName
,t.ColumnName
,t.Count
,t.MinLen
,t.MaxLen
,t.length
,t.DistinctCount
FROM (
SELECT c.ColumnName
,c.ObjectName AS TableName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @minMax m
JOIN @ColumnObjectName c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName

UNION

SELECT c.ColumnName
,c.ObjectName
,c.DBName AS DataBaseName
,DataType
,m.Minvalue
,m.MaxValue
,AvgValue
,m.STDEVValue AS StdValue
,DistinctCount
,Count
,CASE 
WHEN P IS NULL
THEN 100
ELSE P
END AS NullPercentage
,ISNULL(minLen, 0) AS MinLen
,ISNULL(Maxlen, 0) AS MaxLen
,c.length
,m.NullCount
,c.precision
,c.scale
,CAST(c.is_nullable AS VARCHAR) AS is_nullable
FROM @Count_Distinct m
JOIN @ColumnObjectName1 c ON c.ColumnName = m.ColumnName
AND m.ObjectName = c.ObjectName
) t

/*
  -------------------------------------------------------------
  insert into [Data_Profiling].[dbo].[Data_Profile_CandiateKey2]
  (
        [Database_Name]
        ,[Table_Name]
        ,[Column_Name]
        ,[Keycolumn]
   )
  
  
   select distinct t.DataBaseName,t.TableName,t.ColumnName,(t.DistinctCount/t.Count)*100
   from (
  
  
  select c.ColumnName,c.ObjectName as TableName,c.DBName as DataBaseName,DataType,m.Minvalue,m.MaxValue,AvgValue,
  m.STDEVValue as StdValue,DistinctCount,Count,case when P IS null then 100 else P end as NullPercentage,isnull(minLen,0) as MinLen,isnull(Maxlen,0) as MaxLen,
  c.length,m.NullCount ,c.precision,c.scale,cast(c.is_nullable as varchar) as is_nullable
  
   from @minMax m
  join @ColumnObjectName c on c.ColumnName=m.ColumnName and m.ObjectName=c.ObjectName
  union
  select c.ColumnName,c.ObjectName,c.DBName as DataBaseName,DataType,m.Minvalue,m.MaxValue,AvgValue,
  m.STDEVValue as StdValue,DistinctCount,Count,case when P IS null then 100 else P end as NullPercentage,isnull(minLen,0) as MinLen,isnull(Maxlen,0) as MaxLen,
  c.length,m.NullCount ,c.precision,c.scale,cast(c.is_nullable as varchar) as is_nullable from
   @Count_Distinct m join @ColumnObjectName1 c on  c.ColumnName=m.ColumnName and m.ObjectName=c.ObjectName
  ) t
  
  
  -----------------------------------------------------
  */
DECLARE @temptable3 TABLE (
id INT IDENTITY(1, 1)
,[ColumnName] NVARCHAR(100)
,[ObjectName] NVARCHAR(100)
,DatabaseName NVARCHAR(100)
,DataType VARCHAR(50)
,length INT
)
DECLARE @minMax3 TABLE (
id INT IDENTITY(1, 1)
,value VARCHAR(50)
,count INT
,ColumnName VARCHAR(50)
,ObjectName VARCHAR(50)
,DBName VARCHAR(50)
)
DECLARE @minMax4 TABLE (
id INT IDENTITY(1, 1)
,Minvalue FLOAT
,MaxValue FLOAT
,AvgValue FLOAT
,STDEVValue FLOAT
,DistinctCount INT
,Count INT
,ColumnName NVARCHAR(100) NULL
,ObjectName NVARCHAR(100) NULL
,P FLOAT NULL
,minLen NVARCHAR(100) NULL
,Maxlen NVARCHAR(100) NULL
)

INSERT INTO @temptable3 (
[ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
)
SELECT [ColumnName]
,[ObjectName]
,DatabaseName
,DataType
,length
FROM @BIP_sourcetable
WHERE DataType IN ('int', 'float', 'decimal', 'tinyint', 'smallint')

DECLARE @max3 INT = (
SELECT MAX(id)
FROM @temptable3
)
DECLARE @i3 INT = 1

WHILE @i3 <= @max3
BEGIN
DECLARE @ColumnName3 VARCHAR(50) = (
SELECT [ColumnName]
FROM @temptable3
WHERE id = @i3
)
DECLARE @ObjectName3 VARCHAR(50) = (
SELECT [ObjectName]
FROM @temptable3
WHERE id = @i3
)
DECLARE @DataType3 VARCHAR(50) = (
SELECT DataType
FROM @temptable3
WHERE id = @i3
)
DECLARE @sql3 NVARCHAR(max)
DECLARE @sql4 NVARCHAR(max)

SET @sql3 = ('select ' + @ColumnName3 + ',count(' + @ColumnName3 + ') , ''' + @ColumnName3 + ''',''' + @ObjectName3 + ''',''' + @dbname1 + ''' from ' + @dbname1 + '.' + @ObjectName3 + ' group by ' + @ColumnName3 + '')

INSERT INTO @minMax3 (
value
,count
,ColumnName
,ObjectName
,DBName
)
EXEC (@sql3)

SET @sql4 = (
'select  min(' + @ColumnName3 + '),max(' + @ColumnName3 + '),avg(cast(' + @ColumnName3 + ' as bigint)) ,STDEV(' + @ColumnName3 + '),Count(distinct ' + @ColumnName3 + ' ) as DistinctCount ,
  Count(*) as Count,''' + @columnName3 + ''',''' + @ObjectName3 + ''',(100.0 * SUM(CASE WHEN ' + @ColumnName3 + ' IS NULL THEN 1 ELSE 0 END) / COUNT(*)) as p,Min(len(' + @ColumnName3 + ')),Max(len(' + @ColumnName3 + '))  from ' + @dbname1 + '.' + @ObjectName3 + ''
)

INSERT INTO @minMax4 (
minvalue
,maxvalue
,AvgValue
,STDEVValue
,DistinctCount
,Count
,ColumnName
,ObjectName
,P
,minLen
,Maxlen
)
EXEC (@sql4)

SET @i3 = @i3 + 1
END

DELETE
FROM [Data_Profiling].[dbo].[Data_Profile_ValueDistribution]
WHERE [Database_Name] = @dbname

INSERT INTO [Data_Profiling].[dbo].[Data_Profile_ValueDistribution] (
[Database_Name]
,[Table_Name]
,[Column_Name]
,[Row_Count]
,[NumberOfDistinctValues]
,[Value]
,[Count]
)
SELECT m.DBName
,m.ObjectName
,m.ColumnName
,m1.Count AS [RowCount]
,m1.DistinctCount
,m.value
,m.count
FROM @minMax3 m
JOIN @minMax4 m1 ON m1.ColumnName = m.ColumnName
AND m.ObjectName = m1.ObjectName
END
author

Kiran Reddy A

Kiran has been a part of the IT industry for more than six years and Involved in many MSBI projects and product development with extensive usage of ETL and Reporting tools like SQL Server Integration Services (SSIS), SQL Server Analysis Services (SSAS), SQL Server Reporting Service (SSRS) and T-SQL

Get Free Email Updates to your Inbox!

3 comments

Thanks and most welcome for your Comments

Reply

Fantastic article ! You havemade some very astute statements and I appreciate the the effort you have put into your writing. Its clear that you know what you are writing about. I am excited to read more of your sites content.


Hadoop Training Online

Reply

Nice blog! Clearly explained. very useful.

Reply

www.CodeNirvana.in

Powered by Blogger.

Translate

Total Pageviews

Copyright © Journey to MSBI World With Kiran Reddy | Blogger Templates | Designed By Code Nirvana