Sunday, January 20, 2008

Estimate table size for growth

/*
This code is mentioned in Inside Microsoft SQL Server 2005 - The Storage Engine
It contains 4 sps: sp_EstTableSize, sp_EstTableSizeNCI, sp_EstTableSizeCI, sp_EstTableSizeData
Modified code to output size in Mb. - 17 Jan 2008
 
From Kalen:
The procedure calculates the storage requirements for the table and all indexes by extracting information from the sys.indexes, sys.columns, and sys.types views.
The result is only an estimate when you have variable-length fields. The procedure has no way of knowing whether a variable-length field will be completely filled, half filled, or mostly empty in every row, so it assumes that variable-length columns will be filled to the maximum size.
If you know that this won't be the case with your data, you can create a second table that more closely matches the expected data. For example, if you have a varchar(1000) column that will only rarely use the full 1000 bytes, and 99 percent of your rows will use only about 100 bytes, you can create a second table with a varchar(100) column and run sp_EstTableSize on that table.
*/
 

if object_id('dbo.sp_EstTableSizeNCI', 'P') IS NOT NULL  drop procedure dbo.sp_EstTableSizeNCI
go
 
create procedure dbo.sp_EstTableSizeNCI
 @ObjId int, @Num_Rows int, @DataPages int, @CIndex_Row_Size int, @Has_UniqueClusteredIndex int,
 @NCIPages int OUTPUT
as
/*
**
**  Do not call the procedure directly. It will be called
**  from the main procedure:  sp_estTableSize
**
**
*/
 
set nocount on
 
declare @Indid as smallint, @IndexName as sysname
declare @Num_Key_Cols as smallint, @Num_Variable_Key_Cols as smallint
declare @Fixed_Key_Size as int, @Max_Var_Key_Size as int
declare @Index_Null_Bitmap as smallint, @Variable_Key_Size as int
declare @Index_Row_Size as int, @Index_Rows_Per_Page as int
declare @Free_Index_Rows_Per_Page as int, @T as int
declare @Est_Num_Pages as int
 
declare @Fixed_Size as int
declare @Variable_Col as tinyint, @Var_Size as int
declare @LevelPages as int, @CLevel as int, @PrevCount as int
 
set @NCIPages = 0
set @PrevCount = 0
 

-- Get the index id's of the non-clustered indexes
declare NCI_cursor insensitive cursor
for
select indid, name
from sysindexes
where id = @ObjId
and     indid < 255  -- exclude pointers to text / ntext / image pages
and   IndexProperty(@ObjId, name, 'IsClustered') = 0
and IndexProperty(@ObjId, name, 'IsStatistics') = 0
and IndexProperty(@ObjId, name, 'IsHypothetical') = 0
for read only
 
declare @indkey as int, @TableName as sysname, @IndexColName as sysname
set @TableName = object_name(@ObjId)
 
open NCI_cursor
 
fetch next from NCI_cursor into @Indid, @IndexName
 
while @@FETCH_STATUS = 0
   begin  -- cursor loop
 
-- Initialize additive variables to zero
   set @Num_Key_Cols = 0
 
   set @Fixed_Key_Size = 0
   set @Num_Variable_Key_Cols = 0
   set @Max_Var_Key_Size = 0
 
-- Start with the first column in the non-clustered index
   set @indkey = 1
 
   while (@indkey <= 16)
      begin
 
--       use Index_Col to get each column of the non-clustered index
         set @IndexColName = INDEX_COL(@TableName, @Indid, @indkey)
         if @IndexColName IS NULL Break
 
         set @Num_Key_Cols = @Num_Key_Cols + 1
 
         select @Fixed_Size = sc.length * (1 - st.variable),
  @Variable_Col = st.variable,
  @Var_Size = sc.length * st.variable
 
         from sysobjects as so
 
         join syscolumns as sc
           on (so.id = sc.id)
 
         join systypes as st
           on (sc.xtype = st.xtype)
 
         where sc.id   = @ObjId
         and sc.name = @IndexColName
 
         set @Fixed_Key_Size = @Fixed_Key_Size + @Fixed_Size
         set @Num_Variable_Key_Cols = @Num_Variable_Key_Cols + @Variable_Col
 
--  If the table has a non-unique clustered index, then the 'uniqueifer' is internally treated
-- as a variable length column.
 set @Num_Variable_Key_Cols = @Num_Variable_Key_Cols +
         case when @CIndex_Row_Size > 0  and @Has_UniqueClusteredIndex = 1
         then 1
         else 0
       end
 
         set @Max_Var_Key_Size = @Max_Var_Key_Size + @Var_Size
 
--       Get the next column in the non-clustered index
         set @indkey = @indkey + 1
 
      end
 
   set @Index_Null_Bitmap = case when @Fixed_Key_Size = 0
                                then 0
                                else (2 + (( @Num_Key_Cols + 7) / 8 ) )
                            end
 
   set @Variable_Key_Size = case when @Num_Variable_Key_Cols = 0
           then 0
           else 2 + @Num_Variable_Key_Cols + @Num_Variable_Key_Cols +
      @Max_Var_Key_Size
       end
 
   set @Index_Row_Size = @CIndex_Row_Size +
    @Fixed_Key_Size + @Variable_Key_Size + @Index_Null_Bitmap + 9
   set @Index_Rows_Per_Page = 8096 / (@Index_Row_Size + 2)
 
-- Get the Fill Factor used in the index (i.e., the Pad_Index factor)
 
declare @Fill_Factor as int
select @Fill_Factor = IndexProperty (@ObjId, indid, 'IndexFillFactor')
from sysindexes
where id = @ObjId
 

--    According to Books Online (CREATE INDEX (T-SQL)):
-- for non-clustered indexes, space is always left on non-leaf pages
-- to accomomdate one additional row regardless of the setting of
-- FillFactor. I believe this is incorrect, and that, as in
-- SQL Server 6.x, a FillFactor from 0% to 99% results in one free row,
-- while a setting of 100% results in no free rows.
   set @Free_Index_Rows_Per_Page = case when @Fill_Factor = 100 then 0  else 1  end
 
--    Calculate the number of pages required to store all the index rows at each
--    level of the index
 
   set @LevelPages = 0
   set @CLevel = @Num_Rows
 
   set @T = (@Index_Rows_Per_Page - @Free_Index_Rows_Per_Page)
 
--    According to Books Online (CREATE INDEX (T-SQL)):
-- "The number of rows on an intermediate index page is never less than two,
--  regardless of how low the value of FILLFACTOR."
   set @T = case when @T < 2  then 2  else @T  end
 
   while (@LevelPages <> 1)
      begin
 
         set @LevelPages = ceiling (1.0 * @CLevel / @T)
         set @NCIPages = @NCIPages + @LevelPages
         set @CLevel = @LevelPages
 
      end
 
-- Populate the #ncindexes table created in sp_EstTableSize
   insert into #ncindexes (IndexName, IndexPages) values (@IndexName, @NCIPages - @PrevCount)
 
   set @PrevCount = @NCIPages
 
fetch next from NCI_cursor into @Indid, @IndexName
end  -- cursor loop
 
close NCI_cursor
deallocate NCI_cursor
 
GO
 
if object_id('dbo.sp_EstTableSizeCI', 'P') IS NOT NULL  drop procedure dbo.sp_EstTableSizeCI
go
 
create procedure dbo.sp_EstTableSizeCI
 @ObjId int, @Num_Rows int, @DataPages int,
 @Indid smallint, @Has_UniqueClusteredIndex int,
 @CIndex_Row_Size int OUTPUT, @CIPages int OUTPUT
as
/*
**
**  Do not call the procedure directly. It will be called
**  from the main procedure:  sp_estTableSize
**
**
*/
set nocount on
 
declare @Num_CKey_Cols as smallint, @Num_Variable_CKey_Cols as smallint
declare @Fixed_CKey_Size as int, @Max_Var_CKey_Size as int
declare @CIndex_Null_Bitmap as smallint, @Variable_CKey_Size as int
declare @CIndex_Rows_Per_Page as int
declare @Free_CIndex_Rows_Per_Page as int, @T as int
declare @Est_Num_Pages as int
 
declare @Fixed_Size as int
declare @Variable_Col as tinyint, @Var_Size as int
 
declare @indkey as int, @TableName as sysname, @IndexColName as sysname
set @TableName = object_name(@ObjId)
 
-- Initialize additive variables
set @Num_CKey_Cols = 0
 
set @Fixed_CKey_Size = case when @Has_UniqueClusteredIndex = 1
                          then 0
                          else 4
                       end
 
set @Num_Variable_CKey_Cols = 0
set @Max_Var_CKey_Size = 0
 
-- Start with the first column in the clustered index
set @indkey = 1
 
while (@indkey <= 16)  /* SQL Server 7.0 limits number of columns in an index to 16 */
   begin
 
--    use Index_Col to get each column of the clustered index
      set @IndexColName = INDEX_COL(@TableName, @Indid, @indkey)
      if @IndexColName IS NULL Break
 
      set @Num_CKey_Cols = @Num_CKey_Cols + 1
 
      select @Fixed_Size = sc.length * (1 - st.variable),
  @Variable_Col = st.variable,
  @Var_Size = sc.length * st.variable
 
      from sysobjects as so
 
      join syscolumns as sc
        on (so.id = sc.id)
 
      join systypes as st
        on (sc.xtype = st.xtype)
 
      where sc.id   = @ObjId
      and sc.name = @IndexColName
 
      set @Fixed_CKey_Size = @Fixed_CKey_Size + @Fixed_Size
      set @Num_Variable_CKey_Cols = @Num_Variable_CKey_Cols + @Variable_Col
      set @Max_Var_CKey_Size = @Max_Var_CKey_Size + @Var_Size
 
--    Get the next column in the clustered index
      set @indkey = @indkey + 1
 
   end  /* while (@indkey <= 16) */
 
set @CIndex_Null_Bitmap = (2 + (( @Num_CKey_Cols + 7) / 8 ) )
 
set @Variable_CKey_Size = case when @Num_Variable_CKey_Cols = 0
        then 0
        else 2 + @Num_Variable_CKey_Cols + @Num_Variable_CKey_Cols +
     @Max_Var_CKey_Size
     end
 
set @CIndex_Row_Size = @Fixed_CKey_Size + @Variable_CKey_Size + @CIndex_Null_Bitmap + 9
set @CIndex_Rows_Per_Page = 8096 / (@CIndex_Row_Size + 2)
 
-- Get the Fill Factor used in the index (i.e., the Pad_Index factor)
 
declare @Fill_Factor as int
select @Fill_Factor = IndexProperty (@ObjId, object_name(@Indid), 'IndexFillFactor')
from sysindexes
where id    = @ObjId
and indid = @Indid
 
   set @Free_CIndex_Rows_Per_Page =
 case
    when @Has_UniqueClusteredIndex = 1
  then case when @Fill_Factor = 100  then 0  else 2  end
 
    else case when @Fill_Factor = 100  then 0  else 1  end
 end
 
-- Calculate the number of pages required to store all the index rows at each
-- level of the index
 
declare @LevelPages as int, @CLevel as int
 
set @LevelPages = 0
set @CLevel = @DataPages  -- number of pages needed to store table data
set @CIPages = 0
 
   set @T = (@CIndex_Rows_Per_Page - @Free_CIndex_Rows_Per_Page)
 
--    According to Books Online (CREATE INDEX (T-SQL)):
-- "The number of rows on an intermediate index page is never less than two,
--  regardless of how low the value of FILLFACTOR."
   set @T = case when @T < 2  then 2  else @T  end
 
while (@LevelPages <> 1)
   begin
 
      set @LevelPages = ceiling (1.0 * @CLevel / @T)
      set @CIPages = @CIPages + @LevelPages
      set @CLevel = @LevelPages
 
   end
 
GO
 
if object_id('dbo.sp_EstTableSizeData', 'P') IS NOT NULL  drop procedure dbo.sp_EstTableSizeData
go
 
create procedure dbo.sp_EstTableSizeData
 @ObjId int, @Num_Rows int,
 @Has_ClusteredIndex int, @Has_UniqueClusteredIndex int,
 @DataPages int OUTPUT
as
/*
**
**  Do not call the procedure directly. It will be called
**  from the main procedure:  sp_estTableSize
**
**
*/
 
--<-- Addition #1: Computed columns do not consume physical space
--<-- Addition #2: A non-unique clustered index on the table adds a "uniqueifer" to
--                 "subsequent" instances of duplicate keys. This "uniqueifer" appears
--                 not only on the index pages, but also on the leaf (data) pages.
--                 See sp_EstTableSizeCI for additional information.
 
set nocount on
 
declare @Num_Cols as smallint, @Num_Variable_Cols as smallint
declare @Fixed_Data_Size as int, @Max_Var_Size as int
declare @Null_Bitmap as smallint, @Variable_Data_Size as int
declare @Row_Size as int, @Rows_Per_Page as int
declare @Free_Rows_Per_Page as int
 
-- Pull together information about the columns, the size of the columns and whether the
--    column is fixed or variable
select @Num_Cols = count(*),
 @Fixed_Data_Size = sum(sc.length * (1 - st.variable)),
 @Num_Variable_Cols = sum(cast(st.variable as smallint)),
 @Max_Var_Size = sum(sc.length * st.variable)
 
from sysobjects as so
 
join syscolumns as sc
  on so.id = sc.id
 
join systypes as st
  on sc.xtype = st.xtype
 
where so.id = @ObjId
and ObjectProperty (so.id, 'IsUserTable') = 1
and ColumnProperty (so.id, sc.name, 'IsComputed')  = 0  --<-- Addition #1
 
set @Null_Bitmap = case when @Fixed_Data_Size = 0 then 0 else (2 + (( @Num_Cols + 7) / 8 ) ) end
set @Variable_Data_Size = case
       when @Num_Variable_Cols = 0
    then 0
    else 2 + (@Num_Variable_Cols + @Num_Variable_Cols) +
     @Max_Var_Size
     end
set @Row_Size = @Fixed_Data_Size + @Variable_Data_Size + @Null_Bitmap + 4 +
                case when @Has_ClusteredIndex = 1 and @Has_UniqueClusteredIndex = 0
                  then 4
                  else 0
                end
                         
set @Rows_Per_Page = 8096 / (@Row_Size + 2)
 
-- If there is a clustered index on the table, get the Fill Factor used
 
declare @Fill_Factor as int
select @Fill_Factor = case
      when IndexProperty (@ObjId, name, 'IndexFillFactor') IS NULL
         then 100
      when IndexProperty (@ObjId, name, 'IndexFillFactor') = 0
         then 100
      else IndexProperty (@ObjId, name, 'IndexFillFactor')
   end
from sysindexes
where id = @ObjId
and IndexProperty(@ObjId, name, 'IsClustered') = 1
set @Fill_Factor = Coalesce (@Fill_Factor, 100)
 
set @Free_Rows_Per_Page = 8096 * ((100 - @Fill_Factor) / 100.0) / @Row_Size
 
set @DataPages = ceiling (1.0 * @Num_Rows / (@Rows_Per_Page - @Free_Rows_Per_Page) )
 
RETURN(0)
 
GO
 

if object_id('dbo.sp_EstTableSize', 'P') IS NOT NULL  drop procedure dbo.sp_EstTableSize
go
 
create procedure dbo.sp_EstTableSize
 @TableName sysname = NULL,
 @Num_Rows int = NULL
as
 
-- Calculations and logic are based upon:
-- The Microsoft SQL Server 7.0 Books Online, plus
-- 'Inside Microsoft SQL Server 7.0' by Ron Soukup and Kalen Delaney,
--   published by Microsoft Press, plus
-- additional information generously supplied by Kalen Delaney.
 
set nocount on
 
-- If no or partial arguments were provided, print usage.
if (@TableName IS NULL) or (@Num_Rows IS NULL) goto usage
 
-- Verify that @TableName is an object name in the current database, and that it is a user
--    table
declare @ObjId int, @ErrMsg varchar(255)
 
select @ObjId = object_id(@TableName)
if (@ObjId IS NULL) OR (ObjectProperty (@ObjId, 'IsUserTable') = 0)
   begin
      set @ErrMsg = @TableName + ' is not a user table in the current database'
      goto ErrExit
   end
 
-- Verify that the estimated number of rows provided by the user is a positive number
if (@Num_Rows <= 0)
   begin
      set @ErrMsg = 'Please enter a positive number for @Num_Rows (estimated number of rows)'
      goto ErrExit
   end
 
-- Compute the estimated number of pages by:
--   calculating the pages needed to store data (sp_EstTableSizeData)
--   calculating the pages needed to store the clustered index (sp_EstTableSizeCI)
--   calculating the pages needed to store each non-clustered index (sp_EstTableSizeNCI)
--   adding the pages together to arrive at a grand total
 
-- From the SQL Server 7.0 Books Online section 'Nonclustered Indexes':
--   'If the clustered index is not a unique index, SQL Server 7.0 adds an internal
--    value to duplicate keys to make them unique. This value is not visible to users.'
-- Thanks to Kalen Delaney for establishing that the length of the internal value is
--    4-bytes (some Microsoft documentation incorrectly states that the length is 8-bytes).
-- Note that this 4-byte 'Uniqueifer' is added only to 'subsequent' instances of duplicate
--    keys. Since the code you are reading is designed to estimate space requirements, we can
--    not assume that the table given to us is populated with data. (If it were populated
--    with data, then it would make better sense to run the system stored procedure
--    sp_spaceused to determine the actual space requirements of the table.)
--    Since we can not assume that the table is populated, we can not make an accurate,
--    or even an informed guess, as to the distribution of data in the clustered index.
--    Thus, keeping in line with the view that we are creating an upper bound of storage
--    requirements, we assume that for a non-unique clustered index, that every row
--    pointer has this extra 4-byte Uniqueifer.
 
-- Thanks to Kalen Delaney for establishing that the Uniqueifer, when present uses space
--    not only in the index pages, but also in the data pages.
 
-- Since the presence of a clustered index, and whether it is unique or not impacts all
--    of the sub-procedures, the determination of its existence is determined up-front.
 
declare @CI_name as sysname, @Indid as smallint
declare @Has_ClusteredIndex as int, @Has_UniqueClusteredIndex as int
 
set @Has_ClusteredIndex = ObjectProperty (@ObjId, 'TableHasClustIndex')
 
if @Has_ClusteredIndex = 1
  begin
    set @CI_name = NULL
    set @Has_UniqueClusteredIndex = 0
  end
else
  begin  -- get the name and index id of the clustered index
    select @CI_name = name, @Indid = indid
    from   sysindexes
    where id = @ObjId
    and   IndexProperty (@Objid, name, 'IsClustered') = 1
 
    set @Has_UniqueClusteredIndex = IndexProperty (@ObjId, @CI_name, 'IsUnique')
  end
 

-- Create a temporary table to hold the details on the non-clustered indexes.
create table #ncindexes
(
 IndexName sysname,
 IndexPages int
)
 
-- Call the supporting sub-procedures
declare @DataPages as int, @CIndex_Row_Size as int, @CIPages as int, @NCIPages as int
 
exec sp_EstTableSizeData @ObjId, @Num_Rows,
    @Has_ClusteredIndex, @Has_UniqueClusteredIndex,
    @DataPages OUTPUT
 
if @Has_ClusteredIndex = 1
   exec sp_EstTableSizeCI @ObjId, @Num_Rows, @DataPages,
     @Indid, @Has_UniqueClusteredIndex,
     @CIndex_Row_Size OUTPUT, @CIPages OUTPUT
else
   begin
      set @CIPages = 0
      set @CIndex_Row_Size = 0
   end
 
exec sp_EstTableSizeNCI @ObjId, @Num_Rows, @DataPages,
   @CIndex_Row_Size, @Has_UniqueClusteredIndex, @NCIPages OUTPUT
 
select 'UserTable' = @TableName
select 'Total Pages' = @DataPages + @CIPages + @NCIPages,
 'Data Pages' = @DataPages,
 'Clustered Index Pages' = @CIPages,
 'Non-Clustered Index Pages' = @NCIPages,
 'Total Size (Mb)' = (@DataPages + @CIPages + @NCIPages) * 8 / 1024
 
if exists (select * from #ncindexes)
   begin
      print   ' '
      print   'Non-Clustered Index Details'
      select  'Index Pages' = IndexPages, 'Index size (Mb)' = IndexPages * 8 / 1024, 'Index Name' = IndexName
      from    #ncindexes
      order by IndexName
   end
 
RETURN(0)
 
usage:
print ' '
print 'Usage:'
print '   exec sp_EstTableSize [@TableName=]userTableName, [@Num_Rows=]estimatedNumberRowsInTable'
print ' '
print 'Note that the result is an ESTIMATE of the storage to be used by the table.'
print ' '
print 'It should be treated as an upper bound of the storage requirements because it is'
print 'assumed that variable length data (varchar, nvarchar, varbinary) will use the'
print 'maximum storage on every row. That is, it is assumed that a varchar(100) field'
print 'will always contain 100 bytes of data.'
print ' '
print 'If your knowledge of the actual data is such that you may assume that on average'
print 'only 20 bytes will be stored in the varchar(100) field, then for a more accurate'
print 'estimate, create a second table definition with a varchar(20) field and use that'
print 'table as input to sp_EstTableSize.'
RETURN (0)
 

ErrExit:
print ' '
print @ErrMsg
RETURN (-1)
 
GO

No comments: