Top N by Group

It is a very common request to select the top N items per group. Examples are the top 2 sales agents per region, the last two orders per customer, etc. There are various techniques to accomplish this. On SQL Server 2005 and 2008 this normally involves CROSS APPLY, TOP, and the ranking functions.

Here are a couple examples of solving this problem utilizing the ranking functions in SQL Server 2005/2008. These methods are very simple and efficient, at the same time providing flexibility to manage ties.

CREATE TABLE Loans (

  loan_nbr INT NOT NULL PRIMARY KEY,

  loan_date DATETIME NOT NULL,

  loan_amount DECIMAL(12, 2) NOT NULL

              DEFAULT 0.0,

  customer_nbr INT NOT NULL,

  loan_type CHAR(1) NOT NULL

            DEFAULT 'P'

            CHECK (loan_type IN ('P'-- personal

                                'B')) -- business

  );

 

INSERT INTO Loans

VALUES (1, '20080801', 12000.00, 2, 'P'),

       (2, '20080805', 15700.00, 1, 'B'),

       (3, '20080610', 12000.00, 3, 'P'),

       (4, '20080401', 5000.00, 1, 'P'),

       (5, '20080715', 25000.00, 4, 'B'),

       (6, '20080610', 25000.00, 5, 'P'),

       (7, '20080501', 1000.00, 6, 'P'),

       (8, '20080810', 6000.00, 7, 'B'),

       (9, '20080815', 2000.00, 8, 'B'),

       (10, '20080815', 1000.00, 9, 'P'),

       (11, '20080715', 5500.00, 10, 'P'),

       (12, '20080615', 1000.00, 11, 'B'),

       (13, '20080820', 6000.00, 12, 'P'),

       (14, '20080510', 28000.00, 6, 'B'),

       (15, '20080815', 2000.00, 10, 'P'),

       (16, '20080810', 1500.00, 8, 'P'),

       (17, '20080817', 10000.00, 10, 'B'),

       (18, '20080816', 2500.00, 9, 'P');  

 

-- Top 3 loans by loan type (no ties)

SELECT loan_nbr, loan_type, loan_amount, rk

FROM (SELECT loan_nbr, loan_type, loan_amount,

            ROW_NUMBER() OVER(PARTITION BY loan_type

                               ORDER BY loan_amount DESC) AS rk

      FROM Loans) AS L

WHERE rk <= 3;

 

-- Top 3 loans by loan type (with ties)

SELECT loan_nbr, loan_type, loan_amount, rk

FROM (SELECT loan_nbr, loan_type, loan_amount,

            DENSE_RANK() OVER(PARTITION BY loan_type

                               ORDER BY loan_amount DESC) AS rk

      FROM Loans) AS L

WHERE rk <= 3;

 

-- Latest loan for each customer

SELECT customer_nbr, loan_nbr, loan_type, loan_amount, loan_date

FROM (SELECT loan_nbr, loan_type, loan_amount,

            customer_nbr, loan_date,

            ROW_NUMBER() OVER(PARTITION BY customer_nbr

                               ORDER BY loan_date DESC) AS rk

      FROM Loans) AS L

WHERE rk = 1;

Labels: