🔄 Module 6: Subqueries & CTEs

Scalar, Correlated Subqueries, WITH Clause (Common Table Expressions)

Subquery
Inner Query
Scalar
Single Value
CTE
WITH Clause
Recursive
Hierarchical

🎯 Subquery Basics

Types of Subqueries

Type Returns Usage Example
Scalar Single value SELECT, WHERE WHERE salary = (SELECT MAX(salary))
Row Single row WHERE, HAVING WHERE (id, dept) = (SELECT id, dept)
Table Multiple rows/cols FROM clause FROM (SELECT * FROM emp) subq
Correlated Depends on outer query WHERE, HAVING WHERE e.dept_id = d.dept_id

Scalar Subqueries

Return a single value (one row, one column). Can be used in SELECT, WHERE, HAVING.

-- Scalar subquery in WHERE clause
SELECT 
  employee_id,
  first_name,
  salary
FROM employees
WHERE salary = (SELECT MAX(salary) FROM employees);

-- Scalar subquery in SELECT clause
SELECT 
  employee_id,
  first_name,
  salary,
  (SELECT AVG(salary) FROM employees) AS avg_salary,
  salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;

-- Scalar subquery in HAVING clause
SELECT 
  department_id,
  COUNT(*) AS emp_count,
  AVG(salary) AS avg_dept_salary
FROM employees
GROUP BY department_id
HAVING AVG(salary) > (SELECT AVG(salary) FROM employees);

📊 Multi-Row Subqueries

IN, ANY, ALL Operators

-- IN operator - match any value in subquery result
SELECT *
FROM employees
WHERE department_id IN (
  SELECT department_id FROM departments 
  WHERE location_id = 1
);

-- ANY operator - compare with any value in subquery
SELECT *
FROM employees
WHERE salary > ANY (
  SELECT salary FROM employees 
  WHERE department_id = 30
);

-- ALL operator - compare with all values in subquery
SELECT *
FROM employees
WHERE salary > ALL (
  SELECT salary FROM employees 
  WHERE department_id = 30
);

-- EXISTS operator - check if subquery returns any rows
SELECT *
FROM employees e
WHERE EXISTS (
  SELECT 1 FROM projects p 
  WHERE p.lead_id = e.employee_id
);

-- NOT EXISTS operator
SELECT *
FROM departments d
WHERE NOT EXISTS (
  SELECT 1 FROM employees e 
  WHERE e.department_id = d.department_id
);

🔗 Correlated Subqueries

Subqueries that reference columns from outer query. Executed once per outer row.

-- Correlated subquery - find employees earning above their dept average
SELECT 
  e.employee_id,
  e.first_name,
  e.salary,
  e.department_id
FROM employees e
WHERE e.salary > (
  SELECT AVG(e2.salary) 
  FROM employees e2 
  WHERE e2.department_id = e.department_id
);

-- Correlated subquery with ranking
SELECT 
  e.employee_id,
  e.first_name,
  e.salary,
  (
    SELECT COUNT(*) 
    FROM employees e2 
    WHERE e2.department_id = e.department_id 
      AND e2.salary > e.salary
  ) AS num_higher_paid
FROM employees e;

📌 Common Table Expressions (CTEs) - WITH Clause

Basic CTE Syntax

-- Simple CTE
WITH high_earners AS (
  SELECT 
    employee_id,
    first_name,
    salary
  FROM employees
  WHERE salary > 100000
)
SELECT *
FROM high_earners
ORDER BY salary DESC;

-- Multiple CTEs
WITH dept_stats AS (
  SELECT 
    department_id,
    COUNT(*) AS emp_count,
    AVG(salary) AS avg_salary
  FROM employees
  GROUP BY department_id
),
high_paid AS (
  SELECT 
    employee_id,
    first_name,
    salary,
    department_id
  FROM employees
  WHERE salary > 100000
)
SELECT 
  hp.employee_id,
  hp.first_name,
  hp.salary,
  ds.emp_count,
  ds.avg_salary
FROM high_paid hp
JOIN dept_stats ds ON hp.department_id = ds.department_id;

CTE with JOINs

-- CTE with complex JOIN logic
WITH employee_projects AS (
  SELECT 
    e.employee_id,
    e.first_name,
    COUNT(p.project_id) AS project_count
  FROM employees e
  LEFT JOIN projects p ON e.employee_id = p.lead_id
  GROUP BY e.employee_id, e.first_name
)
SELECT *
FROM employee_projects
WHERE project_count > 2
ORDER BY project_count DESC;

🔀 Recursive CTEs

For hierarchical or recursive data (organizational charts, bill of materials, etc.)

-- Recursive CTE - organizational hierarchy
WITH org_hierarchy AS (
  -- Anchor member: start with top-level employees
  SELECT 
    employee_id,
    first_name,
    manager_id,
    1 AS level
  FROM employees
  WHERE manager_id IS NULL
  
  UNION ALL
  
  -- Recursive member: find employees under each manager
  SELECT 
    e.employee_id,
    e.first_name,
    e.manager_id,
    oh.level + 1
  FROM employees e
  INNER JOIN org_hierarchy oh ON e.manager_id = oh.employee_id
)
SELECT *
FROM org_hierarchy
ORDER BY level, employee_id;

Recursive CTE with Path

-- Recursive CTE - build complete hierarchy path
WITH org_tree AS (
  -- Anchor: top-level managers
  SELECT 
    employee_id,
    first_name,
    manager_id,
    first_name AS path,
    1 AS depth
  FROM employees
  WHERE manager_id IS NULL
  
  UNION ALL
  
  -- Recursive: add children with path concatenation
  SELECT 
    e.employee_id,
    e.first_name,
    e.manager_id,
    ot.path || ' → ' || e.first_name,
    ot.depth + 1
  FROM employees e
  INNER JOIN org_tree ot ON e.manager_id = ot.employee_id
  WHERE ot.depth < 10  -- Prevent infinite recursion
)
SELECT *
FROM org_tree
ORDER BY path;

⚡ Performance & Best Practices

Subquery vs CTE vs JOIN

Approach Performance Readability Best For
JOIN ⭐⭐⭐ Best Good Simple relationships
CTE ⭐⭐⭐ Best ⭐⭐⭐ Excellent Complex logic, reuse
Scalar Subquery ⭐ Poor Moderate Small result sets
Correlated ⭐⭐ Moderate Moderate Row-by-row logic
💡 Tip: Prefer CTEs for readability and reusability. Use JOINs for simple relationships. Avoid correlated subqueries in WHERE with large tables - use JOINs or CTEs instead.

CTE Best Practices

✓ Learning Checklist - Module 6