💾 Oracle Data Pump
Data Pump Export & Import
-- Export table using expdp (command line)
expdp scott/tiger TABLES=employees,departments FILE=exp_data.dmp LOG=exp.log
-- Export entire schema
expdp scott/tiger FULL=Y FILE=full_db.dmp LOG=full.log
-- Export with parallel processing
expdp scott/tiger TABLES=employees PARALLEL=4 FILE=exp_%U.dmp LOG=exp.log
-- Import table (impdp)
impdp scott/tiger TABLES=employees FILE=exp_data.dmp LOG=imp.log
-- Import with remap schema
impdp scott/tiger TABLES=employees FILE=exp_data.dmp REMAP_SCHEMA=scott:new_user LOG=imp.log
-- DBMS_DATAPUMP API (PL/SQL)
DECLARE
h1 NUMBER;
BEGIN
h1 := DBMS_DATAPUMP.OPEN(
operation => 'EXPORT',
job_mode => 'TABLE',
remote_link => NULL,
job_name => 'EXP_JOB',
version => 'LATEST'
);
DBMS_DATAPUMP.ADD_FILE(
handle => h1,
filename => 'exp_data.dmp',
directory => 'DPUMP_DIR',
filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_DUMP_FILE
);
DBMS_DATAPUMP.ADD_FILE(
handle => h1,
filename => 'exp.log',
directory => 'DPUMP_DIR',
filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_LOG_FILE
);
DBMS_DATAPUMP.START_JOB(h1);
END;
/
📄 External Tables
Creating External Tables
-- Create external table from CSV file
CREATE TABLE emp_external (
emp_id NUMBER,
name VARCHAR2(50),
salary NUMBER,
dept_id NUMBER
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY data_dir
ACCESS PARAMETERS (
RECORDS DELIMITED BY NEWLINE
FIELDS TERMINATED BY ','
MISSING FIELD VALUES ARE NULL
(
emp_id,
name,
salary,
dept_id
)
)
LOCATION ('employees.csv')
);
-- Create external table from fixed-width file
CREATE TABLE emp_fixed (
emp_id NUMBER EXTERNAL CHAR(5),
name VARCHAR2(50) EXTERNAL CHAR(30),
salary NUMBER EXTERNAL CHAR(10)
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY data_dir
ACCESS PARAMETERS (
RECORDS FIXED 100
FIELDS (
emp_id POSITION (1:5),
name POSITION (6:35),
salary POSITION (36:45)
)
)
LOCATION ('employees.txt')
);
-- Query external table
SELECT * FROM emp_external WHERE salary > 50000;
🚀 SQL*Loader
SQL*Loader Control Files
-- Control file for CSV loading
LOAD DATA
INFILE 'employees.csv'
INTO TABLE employees
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
emp_id,
name,
salary,
dept_id
)
-- Control file with data transformation
LOAD DATA
INFILE 'employees.txt'
INTO TABLE employees
INSERT
FIELDS TERMINATED BY '|'
(
emp_id,
name,
salary DECIMAL EXTERNAL,
hire_date DATE \"YYYY-MM-DD\",
dept_id
)
-- Run SQL*Loader from command line
sqlldr scott/tiger CONTROL=employees.ctl LOG=load.log BAD=bad.txt
🔀 Data Integration Techniques
ETL Operations Comparison
-- Control file for CSV loading
LOAD DATA
INFILE 'employees.csv'
INTO TABLE employees
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
emp_id,
name,
salary,
dept_id
)
-- Control file with data transformation
LOAD DATA
INFILE 'employees.txt'
INTO TABLE employees
INSERT
FIELDS TERMINATED BY '|'
(
emp_id,
name,
salary DECIMAL EXTERNAL,
hire_date DATE \"YYYY-MM-DD\",
dept_id
)
-- Run SQL*Loader from command line
sqlldr scott/tiger CONTROL=employees.ctl LOG=load.log BAD=bad.txt
| Tool/Method | Speed | Flexibility | Best For |
|---|---|---|---|
| Data Pump | Very Fast | High | Database migration, full exports |
| SQL*Loader | Fast | High | Bulk loading flat files |
| External Tables | Medium | Medium | Transparent file querying |
| INSERT AS SELECT | Slow | Medium | Complex transformations |
Merge Operations
-- MERGE for ETL processing
MERGE INTO employees tgt
USING emp_source src
ON (tgt.emp_id = src.emp_id)
WHEN MATCHED THEN
UPDATE SET
tgt.name = src.name,
tgt.salary = src.salary
WHERE tgt.salary != src.salary
WHEN NOT MATCHED THEN
INSERT (emp_id, name, salary, dept_id)
VALUES (src.emp_id, src.name, src.salary, src.dept_id);
💡 ETL Best Practices: Use Data Pump for large migrations. SQL*Loader for bulk file
loads. External tables for transparent querying. Test data quality before loading. Log all ETL
processes. Implement error handling. Validate row counts. Use parallel loading. Compress export
files. Monitor loader performance.
✓ Learning Checklist - Module 19