🪟 Window Functions Basics
Window Function Syntax
SELECT
column_name,
aggregate_function() OVER (
[PARTITION BY partition_column]
[ORDER BY sort_column [ASC|DESC]]
[ROWS frame_specification]
) AS window_result
FROM table_name;
Key Components:
- PARTITION BY: Divides rows into groups (like GROUP BY for windows)
- ORDER BY: Sorts rows within partition
- ROWS/RANGE: Defines the frame (window boundaries)
Window Function Categories
| Category | Functions | Purpose |
|---|---|---|
| Ranking | ROW_NUMBER, RANK, DENSE_RANK | Assign position/rank |
| Aggregate | SUM, AVG, COUNT, MIN, MAX | Running calculations |
| Offset | LAG, LEAD, FIRST_VALUE, LAST_VALUE | Access adjacent rows |
| Distribution | PERCENT_RANK, CUME_DIST, NTILE | Relative positioning |
🏆 Ranking Functions
ROW_NUMBER, RANK, DENSE_RANK Comparison
-- ROW_NUMBER: Sequential numbers, unique always
SELECT
employee_id,
first_name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num
FROM employees;
-- RANK: Same rank for ties, gaps in sequence
SELECT
employee_id,
first_name,
salary,
RANK() OVER (ORDER BY salary DESC) AS rank_num
FROM employees;
-- DENSE_RANK: Same rank for ties, no gaps
SELECT
employee_id,
first_name,
salary,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;
-- Ranking by department
SELECT
department_id,
employee_id,
first_name,
salary,
ROW_NUMBER() OVER (
PARTITION BY department_id
ORDER BY salary DESC
) AS dept_rank
FROM employees;
Top-N Queries using ROW_NUMBER
-- Top 3 highest paid in each department
WITH ranked_employees AS (
SELECT
department_id,
employee_id,
first_name,
salary,
ROW_NUMBER() OVER (
PARTITION BY department_id
ORDER BY salary DESC
) AS rank_in_dept
FROM employees
)
SELECT *
FROM ranked_employees
WHERE rank_in_dept <= 3;
⬅️➡️ Offset Functions - LAG & LEAD
LAG and LEAD
Access data from previous or following rows without self-join.
-- LAG: Get previous row value
SELECT
employee_id,
hire_date,
salary,
LAG(salary) OVER (ORDER BY hire_date) AS prev_salary
FROM employees;
-- LEAD: Get next row value
SELECT
employee_id,
hire_date,
salary,
LEAD(salary) OVER (ORDER BY hire_date) AS next_salary
FROM employees;
-- LAG/LEAD with offset (2 rows back/forward)
SELECT
employee_id,
salary,
LAG(salary, 2) OVER (ORDER BY employee_id) AS prev_2_salary,
LEAD(salary, 2) OVER (ORDER BY employee_id) AS next_2_salary
FROM employees;
-- LAG/LEAD with default value
SELECT
employee_id,
salary,
LAG(salary, 1, 0) OVER (ORDER BY employee_id) AS prev_salary
FROM employees;
-- Calculate salary change
SELECT
employee_id,
first_name,
salary,
LAG(salary) OVER (ORDER BY hire_date) AS prev_salary,
salary - LAG(salary) OVER (ORDER BY hire_date) AS salary_change
FROM employees;
📊 Running Aggregates & Cumulative Functions
Running Totals
-- Running total with cumulative frame
SELECT
employee_id,
salary,
SUM(salary) OVER (
ORDER BY employee_id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM employees;
-- Running average by department
SELECT
department_id,
employee_id,
salary,
AVG(salary) OVER (
PARTITION BY department_id
ORDER BY employee_id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_avg
FROM employees;
-- Moving average (3-row window)
SELECT
employee_id,
salary,
AVG(salary) OVER (
ORDER BY employee_id
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS moving_avg_3_rows
FROM employees;
🔲 Window Frame Specifications
| Frame Type | Syntax | Includes |
|---|---|---|
| Unbounded Preceding | UNBOUNDED PRECEDING | First row to current |
| N Preceding | N PRECEDING | N rows before to current |
| Current Row | CURRENT ROW | Current row only |
| N Following | N FOLLOWING | Current to N rows after |
| Unbounded Following | UNBOUNDED FOLLOWING | Current to last row |
-- Default frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
SELECT
employee_id,
salary,
SUM(salary) OVER (ORDER BY employee_id) AS running_sum
FROM employees;
-- All rows in partition (no ordering default)
SELECT
employee_id,
salary,
COUNT(*) OVER (PARTITION BY department_id) AS dept_total
FROM employees;
-- Frame with RANGE (groups ties together)
SELECT
employee_id,
salary,
SUM(salary) OVER (
ORDER BY salary
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_sum
FROM employees;
📊 Distribution Functions
PERCENT_RANK, CUME_DIST, NTILE
-- PERCENT_RANK: Percentage rank (0 to 1)
SELECT
employee_id,
salary,
PERCENT_RANK() OVER (ORDER BY salary) AS pct_rank
FROM employees;
-- CUME_DIST: Cumulative distribution (0 to 1)
SELECT
employee_id,
salary,
CUME_DIST() OVER (ORDER BY salary) AS cum_dist
FROM employees;
-- NTILE: Divide into N equal groups
SELECT
employee_id,
salary,
NTILE(4) OVER (ORDER BY salary) AS quartile
FROM employees;
-- Quartile analysis
SELECT
quartile,
COUNT(*) AS emp_count,
MIN(salary) AS min_salary,
MAX(salary) AS max_salary,
AVG(salary) AS avg_salary
FROM (
SELECT
salary,
NTILE(4) OVER (ORDER BY salary) AS quartile
FROM employees
)
GROUP BY quartile;
🚀 Advanced Window Function Examples
FIRST_VALUE and LAST_VALUE
-- Get first and last salary in department
SELECT
department_id,
employee_id,
salary,
FIRST_VALUE(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
) AS first_hire_salary,
LAST_VALUE(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_hire_salary
FROM employees;
Complex Analysis Example
-- Employee performance ranking with variance
WITH emp_stats AS (
SELECT
e.employee_id,
e.first_name,
e.salary,
d.department_name,
AVG(e.salary) OVER (PARTITION BY d.department_id) AS dept_avg_salary,
ROW_NUMBER() OVER (
PARTITION BY d.department_id
ORDER BY e.salary DESC
) AS dept_rank,
PERCENT_RANK() OVER (
ORDER BY e.salary
) AS company_percentile
FROM employees e
JOIN departments d ON e.department_id = d.department_id
)
SELECT *
FROM emp_stats
WHERE dept_rank <= 3 OR company_percentile > 0.75;
💡 Tip: Window functions are processed after WHERE but before ORDER BY. Always specify PARTITION BY for grouped analysis. Use UNBOUNDED FOLLOWING in LAST_VALUE to get actual last row.
✓ Learning Checklist - Module 8