Skip to content

Instantly share code, notes, and snippets.

@ridercz
Created March 4, 2025 23:01
Show Gist options
  • Save ridercz/c5e7398c3ed588b894921b3129b79db9 to your computer and use it in GitHub Desktop.
Save ridercz/c5e7398c3ed588b894921b3129b79db9 to your computer and use it in GitHub Desktop.
Find candidates for sparse columns in MS SQL
SET NOCOUNT ON
CREATE TABLE #SparseTypes (
[DataType] nvarchar(20),
[Percentage] int
)
-- Values from https://learn.microsoft.com/en-us/sql/relational-databases/tables/use-sparse-columns
INSERT INTO #SparseTypes VALUES('bit', 98)
INSERT INTO #SparseTypes VALUES('tinyint', 86)
INSERT INTO #SparseTypes VALUES('smallint', 76)
INSERT INTO #SparseTypes VALUES('int', 64)
INSERT INTO #SparseTypes VALUES('bigint', 52)
INSERT INTO #SparseTypes VALUES('real', 64)
INSERT INTO #SparseTypes VALUES('float', 52)
INSERT INTO #SparseTypes VALUES('smallmoney', 64)
INSERT INTO #SparseTypes VALUES('money', 52)
INSERT INTO #SparseTypes VALUES('smalldatetime', 64)
INSERT INTO #SparseTypes VALUES('datetime', 52)
INSERT INTO #SparseTypes VALUES('uniqueidentifier', 43)
INSERT INTO #SparseTypes VALUES('date', 69)
CREATE TABLE #Columns (
[ColumnName] nvarchar(500),
[NullCount] int,
[DataType] nvarchar(20),
[TotalCount] int
)
DECLARE @SQL nvarchar(max)
SELECT @SQL = COALESCE(@SQL,'') + CAST('INSERT INTO #Columns Select ''' + TABLE_SCHEMA + '.' + REPLACE(TABLE_NAME,'''','''''') + '.' + COLUMN_NAME + ''' AS ColumnName, count(*) NullCount, ''' + DATA_TYPE + ''', (Select count(*) FROM ' + TABLE_SCHEMA + '.[' + TABLE_NAME + ']) AS TotalCount FROM ' + TABLE_SCHEMA + '.[' + TABLE_NAME + '] WHERE [' + COLUMN_NAME + '] IS NULL ;' + CHAR(13) AS VARCHAR(MAX))
FROM INFORMATION_SCHEMA.COLUMNS
JOIN sys.sysobjects B ON INFORMATION_SCHEMA.COLUMNS.TABLE_NAME = B.NAME
WHERE XTYPE = 'U'
EXEC(@SQL)
SELECT
A.ColumnName,
A.DataType,
A.TotalCount,
A.NullCount,
CAST((A.NullCount * 1.0 / A.TotalCount) * 100 AS int) AS NullPercentage,
ISNULL(B.Percentage, 60) AS RecommendedPercentage
FROM #Columns A
LEFT JOIN #SparseTypes B ON A.DataType = B.DataType
WHERE A.NullCount > 0 AND (A.NullCount * 1.0 / A.TotalCount) >= ISNULL(B.Percentage, 60) * .01
ORDER BY ColumnName
DROP TABLE #Columns
DROP TABLE #SparseTypes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment