📈 Module 8: Window Functions & Analytics

ROW_NUMBER, RANK, LEAD, LAG, Analytical Functions

OVER
Window Clause
RANK
Ranking
LAG/LEAD
Offset
FRAME
Ranges

🪟 Window Functions Basics

Window Function Syntax

SELECT 
  column_name,
  aggregate_function() OVER (
    [PARTITION BY partition_column]
    [ORDER BY sort_column [ASC|DESC]]
    [ROWS frame_specification]
  ) AS window_result
FROM table_name;

Key Components:

Window Function Categories

Category Functions Purpose
Ranking ROW_NUMBER, RANK, DENSE_RANK Assign position/rank
Aggregate SUM, AVG, COUNT, MIN, MAX Running calculations
Offset LAG, LEAD, FIRST_VALUE, LAST_VALUE Access adjacent rows
Distribution PERCENT_RANK, CUME_DIST, NTILE Relative positioning

🏆 Ranking Functions

ROW_NUMBER, RANK, DENSE_RANK Comparison

-- ROW_NUMBER: Sequential numbers, unique always
SELECT 
  employee_id,
  first_name,
  salary,
  ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num
FROM employees;

-- RANK: Same rank for ties, gaps in sequence
SELECT 
  employee_id,
  first_name,
  salary,
  RANK() OVER (ORDER BY salary DESC) AS rank_num
FROM employees;

-- DENSE_RANK: Same rank for ties, no gaps
SELECT 
  employee_id,
  first_name,
  salary,
  DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;

-- Ranking by department
SELECT 
  department_id,
  employee_id,
  first_name,
  salary,
  ROW_NUMBER() OVER (
    PARTITION BY department_id 
    ORDER BY salary DESC
  ) AS dept_rank
FROM employees;

Top-N Queries using ROW_NUMBER

-- Top 3 highest paid in each department
WITH ranked_employees AS (
  SELECT 
    department_id,
    employee_id,
    first_name,
    salary,
    ROW_NUMBER() OVER (
      PARTITION BY department_id 
      ORDER BY salary DESC
    ) AS rank_in_dept
  FROM employees
)
SELECT *
FROM ranked_employees
WHERE rank_in_dept <= 3;

⬅️➡️ Offset Functions - LAG & LEAD

LAG and LEAD

Access data from previous or following rows without self-join.

-- LAG: Get previous row value
SELECT 
  employee_id,
  hire_date,
  salary,
  LAG(salary) OVER (ORDER BY hire_date) AS prev_salary
FROM employees;

-- LEAD: Get next row value
SELECT 
  employee_id,
  hire_date,
  salary,
  LEAD(salary) OVER (ORDER BY hire_date) AS next_salary
FROM employees;

-- LAG/LEAD with offset (2 rows back/forward)
SELECT 
  employee_id,
  salary,
  LAG(salary, 2) OVER (ORDER BY employee_id) AS prev_2_salary,
  LEAD(salary, 2) OVER (ORDER BY employee_id) AS next_2_salary
FROM employees;

-- LAG/LEAD with default value
SELECT 
  employee_id,
  salary,
  LAG(salary, 1, 0) OVER (ORDER BY employee_id) AS prev_salary
FROM employees;

-- Calculate salary change
SELECT 
  employee_id,
  first_name,
  salary,
  LAG(salary) OVER (ORDER BY hire_date) AS prev_salary,
  salary - LAG(salary) OVER (ORDER BY hire_date) AS salary_change
FROM employees;

📊 Running Aggregates & Cumulative Functions

Running Totals

-- Running total with cumulative frame
SELECT 
  employee_id,
  salary,
  SUM(salary) OVER (
    ORDER BY employee_id
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_total
FROM employees;

-- Running average by department
SELECT 
  department_id,
  employee_id,
  salary,
  AVG(salary) OVER (
    PARTITION BY department_id
    ORDER BY employee_id
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_avg
FROM employees;

-- Moving average (3-row window)
SELECT 
  employee_id,
  salary,
  AVG(salary) OVER (
    ORDER BY employee_id
    ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
  ) AS moving_avg_3_rows
FROM employees;

🔲 Window Frame Specifications

Frame Type Syntax Includes
Unbounded Preceding UNBOUNDED PRECEDING First row to current
N Preceding N PRECEDING N rows before to current
Current Row CURRENT ROW Current row only
N Following N FOLLOWING Current to N rows after
Unbounded Following UNBOUNDED FOLLOWING Current to last row
-- Default frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
SELECT 
  employee_id,
  salary,
  SUM(salary) OVER (ORDER BY employee_id) AS running_sum
FROM employees;

-- All rows in partition (no ordering default)
SELECT 
  employee_id,
  salary,
  COUNT(*) OVER (PARTITION BY department_id) AS dept_total
FROM employees;

-- Frame with RANGE (groups ties together)
SELECT 
  employee_id,
  salary,
  SUM(salary) OVER (
    ORDER BY salary
    RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS cumulative_sum
FROM employees;

📊 Distribution Functions

PERCENT_RANK, CUME_DIST, NTILE

-- PERCENT_RANK: Percentage rank (0 to 1)
SELECT 
  employee_id,
  salary,
  PERCENT_RANK() OVER (ORDER BY salary) AS pct_rank
FROM employees;

-- CUME_DIST: Cumulative distribution (0 to 1)
SELECT 
  employee_id,
  salary,
  CUME_DIST() OVER (ORDER BY salary) AS cum_dist
FROM employees;

-- NTILE: Divide into N equal groups
SELECT 
  employee_id,
  salary,
  NTILE(4) OVER (ORDER BY salary) AS quartile
FROM employees;

-- Quartile analysis
SELECT 
  quartile,
  COUNT(*) AS emp_count,
  MIN(salary) AS min_salary,
  MAX(salary) AS max_salary,
  AVG(salary) AS avg_salary
FROM (
  SELECT 
    salary,
    NTILE(4) OVER (ORDER BY salary) AS quartile
  FROM employees
)
GROUP BY quartile;

🚀 Advanced Window Function Examples

FIRST_VALUE and LAST_VALUE

-- Get first and last salary in department
SELECT 
  department_id,
  employee_id,
  salary,
  FIRST_VALUE(salary) OVER (
    PARTITION BY department_id 
    ORDER BY hire_date
  ) AS first_hire_salary,
  LAST_VALUE(salary) OVER (
    PARTITION BY department_id 
    ORDER BY hire_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS last_hire_salary
FROM employees;

Complex Analysis Example

-- Employee performance ranking with variance
WITH emp_stats AS (
  SELECT 
    e.employee_id,
    e.first_name,
    e.salary,
    d.department_name,
    AVG(e.salary) OVER (PARTITION BY d.department_id) AS dept_avg_salary,
    ROW_NUMBER() OVER (
      PARTITION BY d.department_id 
      ORDER BY e.salary DESC
    ) AS dept_rank,
    PERCENT_RANK() OVER (
      ORDER BY e.salary
    ) AS company_percentile
  FROM employees e
  JOIN departments d ON e.department_id = d.department_id
)
SELECT *
FROM emp_stats
WHERE dept_rank <= 3 OR company_percentile > 0.75;
💡 Tip: Window functions are processed after WHERE but before ORDER BY. Always specify PARTITION BY for grouped analysis. Use UNBOUNDED FOLLOWING in LAST_VALUE to get actual last row.
✓ Learning Checklist - Module 8