🎯 Subquery Basics
Types of Subqueries
| Type | Returns | Usage | Example |
|---|---|---|---|
| Scalar | Single value | SELECT, WHERE | WHERE salary = (SELECT MAX(salary)) |
| Row | Single row | WHERE, HAVING | WHERE (id, dept) = (SELECT id, dept) |
| Table | Multiple rows/cols | FROM clause | FROM (SELECT * FROM emp) subq |
| Correlated | Depends on outer query | WHERE, HAVING | WHERE e.dept_id = d.dept_id |
Scalar Subqueries
Return a single value (one row, one column). Can be used in SELECT, WHERE, HAVING.
-- Scalar subquery in WHERE clause
SELECT
employee_id,
first_name,
salary
FROM employees
WHERE salary = (SELECT MAX(salary) FROM employees);
-- Scalar subquery in SELECT clause
SELECT
employee_id,
first_name,
salary,
(SELECT AVG(salary) FROM employees) AS avg_salary,
salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;
-- Scalar subquery in HAVING clause
SELECT
department_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_dept_salary
FROM employees
GROUP BY department_id
HAVING AVG(salary) > (SELECT AVG(salary) FROM employees);
📊 Multi-Row Subqueries
IN, ANY, ALL Operators
-- IN operator - match any value in subquery result
SELECT *
FROM employees
WHERE department_id IN (
SELECT department_id FROM departments
WHERE location_id = 1
);
-- ANY operator - compare with any value in subquery
SELECT *
FROM employees
WHERE salary > ANY (
SELECT salary FROM employees
WHERE department_id = 30
);
-- ALL operator - compare with all values in subquery
SELECT *
FROM employees
WHERE salary > ALL (
SELECT salary FROM employees
WHERE department_id = 30
);
-- EXISTS operator - check if subquery returns any rows
SELECT *
FROM employees e
WHERE EXISTS (
SELECT 1 FROM projects p
WHERE p.lead_id = e.employee_id
);
-- NOT EXISTS operator
SELECT *
FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.department_id
);
🔗 Correlated Subqueries
Subqueries that reference columns from outer query. Executed once per outer row.
-- Correlated subquery - find employees earning above their dept average
SELECT
e.employee_id,
e.first_name,
e.salary,
e.department_id
FROM employees e
WHERE e.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department_id = e.department_id
);
-- Correlated subquery with ranking
SELECT
e.employee_id,
e.first_name,
e.salary,
(
SELECT COUNT(*)
FROM employees e2
WHERE e2.department_id = e.department_id
AND e2.salary > e.salary
) AS num_higher_paid
FROM employees e;
📌 Common Table Expressions (CTEs) - WITH Clause
Basic CTE Syntax
-- Simple CTE
WITH high_earners AS (
SELECT
employee_id,
first_name,
salary
FROM employees
WHERE salary > 100000
)
SELECT *
FROM high_earners
ORDER BY salary DESC;
-- Multiple CTEs
WITH dept_stats AS (
SELECT
department_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY department_id
),
high_paid AS (
SELECT
employee_id,
first_name,
salary,
department_id
FROM employees
WHERE salary > 100000
)
SELECT
hp.employee_id,
hp.first_name,
hp.salary,
ds.emp_count,
ds.avg_salary
FROM high_paid hp
JOIN dept_stats ds ON hp.department_id = ds.department_id;
CTE with JOINs
-- CTE with complex JOIN logic
WITH employee_projects AS (
SELECT
e.employee_id,
e.first_name,
COUNT(p.project_id) AS project_count
FROM employees e
LEFT JOIN projects p ON e.employee_id = p.lead_id
GROUP BY e.employee_id, e.first_name
)
SELECT *
FROM employee_projects
WHERE project_count > 2
ORDER BY project_count DESC;
🔀 Recursive CTEs
For hierarchical or recursive data (organizational charts, bill of materials, etc.)
-- Recursive CTE - organizational hierarchy
WITH org_hierarchy AS (
-- Anchor member: start with top-level employees
SELECT
employee_id,
first_name,
manager_id,
1 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive member: find employees under each manager
SELECT
e.employee_id,
e.first_name,
e.manager_id,
oh.level + 1
FROM employees e
INNER JOIN org_hierarchy oh ON e.manager_id = oh.employee_id
)
SELECT *
FROM org_hierarchy
ORDER BY level, employee_id;
Recursive CTE with Path
-- Recursive CTE - build complete hierarchy path
WITH org_tree AS (
-- Anchor: top-level managers
SELECT
employee_id,
first_name,
manager_id,
first_name AS path,
1 AS depth
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive: add children with path concatenation
SELECT
e.employee_id,
e.first_name,
e.manager_id,
ot.path || ' → ' || e.first_name,
ot.depth + 1
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.employee_id
WHERE ot.depth < 10 -- Prevent infinite recursion
)
SELECT *
FROM org_tree
ORDER BY path;
⚡ Performance & Best Practices
Subquery vs CTE vs JOIN
| Approach | Performance | Readability | Best For |
|---|---|---|---|
| JOIN | ⭐⭐⭐ Best | Good | Simple relationships |
| CTE | ⭐⭐⭐ Best | ⭐⭐⭐ Excellent | Complex logic, reuse |
| Scalar Subquery | ⭐ Poor | Moderate | Small result sets |
| Correlated | ⭐⭐ Moderate | Moderate | Row-by-row logic |
💡 Tip: Prefer CTEs for readability and reusability. Use JOINs for simple relationships. Avoid correlated subqueries in WHERE with large tables - use JOINs or CTEs instead.
CTE Best Practices
- Use meaningful names: CTE names should clearly describe their content
- Avoid deep nesting: Recursion depth can impact performance
- Place filters early: In CTEs and anchor queries, not just outer query
- Materialize intermediate results: CTEs help the optimizer
- Test with EXPLAIN: Verify execution plans
✓ Learning Checklist - Module 6