Saturday, December 15, 2007

Auxiliary Tables

Auxiliary tables (also referred as utility or helper tables) are a great tool for many needs in SQL. The good part is they are portable between systems, easy to create and normally do not take much storage. There are many benefits: faster SQL, simplified problem solutions, provide flexibility and options that are not easy to replicate with pure SQL code, and in many cases will help to avoid procedural coding and offer a set based way.

Here are samples for creating the two most common auxiliary tables: table with numbers and calendar table, as well as a couple brief examples of utilizing them.

-- Create table structure

CREATE TABLE Numbers (

 nbr INT NOT NULL

   CONSTRAINT df_numbers_nbr

   DEFAULT 1

   CONSTRAINT ck_numbers_nbr

   CHECK (nbr > 0)

   CONSTRAINT pk_numbers

   PRIMARY KEY);

 

-- Populate via cross joining CTEs

-- Very fast, can be implemented as UDF

WITH

Num1 (n) AS (SELECT 1 UNION ALL SELECT 1),

Num2 (n) AS (SELECT 1 FROM Num1 AS X, Num1 AS Y),

Num3 (n) AS (SELECT 1 FROM Num2 AS X, Num2 AS Y),

Num4 (n) AS (SELECT 1 FROM Num3 AS X, Num3 AS Y),

Num5 (n) AS (SELECT 1 FROM Num4 AS X, Num4 AS Y),

Num6 (n) AS (SELECT 1 FROM Num5 AS X, Num5 AS Y),

Nums (n) AS

(SELECT ROW_NUMBER() OVER(ORDER BY n)

 FROM Num6)

INSERT INTO Numbers (nbr)

SELECT n FROM Nums

WHERE n <= 1000000;

 

-- Populate via generating digits and multiplication

-- for tens, hundreds, thousands, etc.

WITH Digits (n) AS

( SELECT 0 UNION SELECT 1 UNION SELECT 2

  UNION SELECT 3 UNION SELECT 4 UNION SELECT 5

  UNION SELECT 6 UNION SELECT 7 UNION SELECT 8

  UNION SELECT 9),

Nums (n) AS

( SELECT VII.n * 1000000 +

        VI.n * 100000 +

        V.n * 10000 +

        IV.n * 1000 +

        III.n * 100 +

        II.n * 10 +

        I.n

  FROM Digits AS I

  CROSS JOIN Digits AS II

  CROSS JOIN Digits AS III

  CROSS JOIN Digits AS IV

  CROSS JOIN Digits AS V

  CROSS JOIN Digits AS VI

  CROSS JOIN Digits AS VII

)

INSERT INTO Numbers (nbr)

SELECT n FROM Nums

WHERE n BETWEEN 1 AND 1000000;

 

-- One example of using table with numbers

-- to parse a list string to table format

-- with index for each element

DECLARE @list VARCHAR(100);

DECLARE @delimiter CHAR(1);

 

SET @list = 'Mon,Tue,Wed,Thu,Fri,Sat,Sun';

SET @delimiter = ',';

 

-- Split the list based on delimiter

SELECT SUBSTRING(@list, nbr,

                CHARINDEX(@delimiter,

                           @list + @delimiter, nbr) - nbr) AS list_value,

       nbr + 1 - LEN(REPLACE(LEFT(@list, nbr), @delimiter, '')) AS list_index

FROM Numbers

WHERE SUBSTRING(@delimiter + @list, nbr, 1) = @delimiter

  AND nbr < LEN(@list) + 1;

 

/*

 

-- Results

list_value list_index

---------- -----------

Mon        1

Tue        2

Wed        3

Thu        4

Fri        5

Sat        6

Sun        7

 

 

*/

 

-- Another example to clean non-numeric characters from string

-- The table with numbers is used to slice the string to characters

-- then non-numeric characters are eliminated and the string

-- is concatenated back using FOR XML PATH with blank element name

DECLARE @string VARCHAR(30);

 

SET @string = '01234ab23-97z@';

 

SELECT SUBSTRING(@string, nbr, 1)

FROM Numbers

WHERE nbr <= LEN(@string)

  AND SUBSTRING(@string, nbr, 1) LIKE '[0-9]'

FOR XML PATH('');

 

/*

 

-- Result

012342397

 

*/

 

-- Calendar table structure

CREATE TABLE Calendar (

 cdate DATETIME NOT NULL

   CONSTRAINT df_calendar_cdate

   DEFAULT DATEADD(day, DATEDIFF(day, 0, CURRENT_TIMESTAMP), 0)

   CONSTRAINT ck_calendar_cdate

   CHECK (cdate = DATEADD(day, DATEDIFF(day, 0, cdate), 0))

   CONSTRAINT pk_calendar

   PRIMARY KEY,

 cweek INT NOT NULL

   CONSTRAINT df_calendar_cweek

   DEFAULT 1

   CONSTRAINT ck_calendar_cweek

   CHECK (cweek BETWEEN 1 AND 53),

 cweekday INT NOT NULL

   CONSTRAINT df_calendar_cweekday

   DEFAULT 1

   CONSTRAINT ck_calendar_cweekday

   CHECK (cweekday BETWEEN 1 AND 7),

 cyear INT NOT NULL

   CONSTRAINT df_calendar_cyear

   DEFAULT 1900

   CONSTRAINT ck_calendar_cyear

   CHECK (cyear >= 1900),

 cquarter INT NOT NULL

   CONSTRAINT df_calendar_cquarter

   DEFAULT 1

   CONSTRAINT ck_calendar_cquarter

   CHECK (cquarter BETWEEN 1 AND 4),

 cmonth INT NOT NULL

   CONSTRAINT df_calendar_cmonth

   DEFAULT 1

   CONSTRAINT ck_calendar_cmonth

   CHECK (cmonth BETWEEN 1 AND 12),

 cday INT NOT NULL

   CONSTRAINT df_calendar_cday

   DEFAULT 1

   CONSTRAINT ck_calendar_cday

   CHECK (cday BETWEEN 1 AND 31),

 cday_of_year INT NOT NULL

   CONSTRAINT df_calendar_cday_of_year

   DEFAULT 1

   CONSTRAINT ck_calendar_cday_of_year

   CHECK (cday_of_year BETWEEN 1 AND 366),

 cmonth_name VARCHAR(9) NOT NULL

   CONSTRAINT df_calendar_cmonth_name

   DEFAULT 'N/A',

 cday_name VARCHAR(9) NOT NULL

   CONSTRAINT df_calendar_cday_name

   DEFAULT 'N/A',

 holiday VARCHAR(35) NOT NULL

   CONSTRAINT df_calendar_holiday

   DEFAULT 'N/A');

 

 

-- Calculate number of days for 10 years from '20080101'

SELECT DATEDIFF(day, '20080101', '20180101') AS days;

 

/*

 

-- Result

3653

 

*/

 

-- Insert 3653 days, which is 10 years

-- Another use for table with numbers

INSERT INTO Calendar (cdate)

SELECT DATEADD(dd, N.nbr - 1, '20080101')

FROM Numbers AS N

WHERE N.nbr <= 3653;

 

-- Populate calendar columns

-- First day of the week is logically set to Monday

-- Can be enhanced to have column for

-- ISO week as it differs from internal SQL Server

-- week date part

UPDATE Calendar

SET cweek = DATEPART(wk, cdate - DATEPART(dw, cdate + @@DATEFIRST - 1) + 1),

    cweekday = DATEPART(dw, cdate + @@DATEFIRST - 1),

    cyear = DATEPART(yyyy, cdate),

    cquarter = DATEPART(qq, cdate),

    cmonth = DATEPART(mm, cdate),

    cday = DATEPART(day, cdate),

    cday_of_year = DATEPART(dy, cdate),

    cmonth_name = DATENAME(mm, cdate),

    cday_name = DATENAME(dw, cdate);

 

-- Add some fixed date official holidays

-- See http://en.wikipedia.org/wiki/List_of_holidays_by_country

-- for full list by country

 

-- New Year's Day

UPDATE Calendar

SET holiday = 'New Year''s Day'

WHERE cday = 1

  AND cmonth = 1;

 

-- Christmas Eve

UPDATE Calendar

SET holiday = 'Christmas Eve'

WHERE cday = 24

  AND cmonth = 12;

 

-- Christmas Day

UPDATE Calendar

SET holiday = 'Christmas Day'

WHERE cday = 25

  AND cmonth = 12;

 

-- New Year's Eve

UPDATE Calendar

SET holiday = 'New Year''s Eve'

WHERE cday = 31

  AND cmonth = 12;

 

-- Get work days for year 2008

-- Note that not all official holidays are defined

-- and this is only counting non-weekend days

-- and existing holidays

SELECT COUNT(*) AS work_days

FROM Calendar

WHERE cyear = 2008

  AND cweekday NOT IN (6, 7) -- 6 = Saturday; 7 = Sunday

  AND holiday = 'N/A';

 

/*

 

-- Result

work_days

-----------

258

 

*/



To get ISO week calculation use the function example IsoWeek in SQL Server Books Online under CREATE FUNCTION:
http://msdn2.microsoft.com/en-us/library/ms186755.aspx

In SQL Server 2008 the DATEPART function has been enhanced and has date part parameter ISO_WEEK, which supports ISO 8601 week numbering:
http://msdn2.microsoft.com/en-us/library/ms174420(SQL.100).aspx

On SQL Server 2008 the calendar table can be modeled using the new DATE data type which will eliminate the need of constraints to handle the time portion:
http://technet.microsoft.com/en-us/library/bb630352(SQL.100).aspx

4 comments:

Mohammad said...

Hi Plamen Ratchev.
I like your weblog very much!
I have an idea for publishing numbers in the numbers table.
The approch is mix of recursive CTE and ROW_NUMBER and CROSS JOIN.
For one million numbers I use following method. Is the method efficient and best?

;WITH R_CTE_1(i) AS
(SELECT 1 UNION ALL
SELECT i + 1
FROM R_CTE_1
WHERE i < 100),R_CTE_2(i) AS
(SELECT 1 UNION ALL
SELECT i + 1
FROM R_CTE_2
WHERE i < 100),R_CTE_3(i) AS
(SELECT 1 UNION ALL
SELECT i + 1
FROM R_CTE_3
WHERE i < 100)
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 0)) AS n
FROM R_CTE_1
CROSS JOIN R_CTE_2
CROSS JOIN R_CTE_3
OPTION (MAXRECURSION 0);

Plamen Ratchev said...

Hi Mohammad,

This is interesting approach. Have in mind that recursive CTE are not performing very well as they are pretty much a hidden cursor that iterates. In your case it is very small set and may not matter too much.

If you want to compare methods I suggest reading the following article and testing each method, then compare to yours:

http://www.projectdmx.com/tsql/tblnumbers.aspx

Mohammad said...

Thanks for the like.
Also when we need very small set of data is simpler to execute following query:

SELECT TOP 1000 number
FROM master..spt_values
WHERE type='p' AND number > 0
ORDER BY number;

Mohammad said...

Hi Plamen,
I do not need to create separate instances of the base CTEs, I could use one several times, like this:

;WITH R_CTE(i) AS
(SELECT 1
UNION ALL
SELECT i + 1 FROM R_CTE WHERE i < 100)
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 0)) AS n
FROM R_CTE AS C1
CROSS JOIN R_CTE AS C2
CROSS JOIN R_CTE AS C3
OPTION (MAXRECURSION 0);