🔄 Module 19: Data Integration & ETL

Data Pump, External Tables, SQL*Loader, Data Movement

PUMP
Data Export/Import
EXTERNAL
File Access
LOADER
Bulk Load
ETL
Data Transform

💾 Oracle Data Pump

Data Pump Export & Import

-- Export table using expdp (command line)
expdp scott/tiger TABLES=employees,departments FILE=exp_data.dmp LOG=exp.log

-- Export entire schema
expdp scott/tiger FULL=Y FILE=full_db.dmp LOG=full.log

-- Export with parallel processing
expdp scott/tiger TABLES=employees PARALLEL=4 FILE=exp_%U.dmp LOG=exp.log

-- Import table (impdp)
impdp scott/tiger TABLES=employees FILE=exp_data.dmp LOG=imp.log

-- Import with remap schema
impdp scott/tiger TABLES=employees FILE=exp_data.dmp REMAP_SCHEMA=scott:new_user LOG=imp.log

-- DBMS_DATAPUMP API (PL/SQL)
DECLARE
  h1 NUMBER;
BEGIN
  h1 := DBMS_DATAPUMP.OPEN(
    operation => 'EXPORT',
    job_mode => 'TABLE',
    remote_link => NULL,
    job_name => 'EXP_JOB',
    version => 'LATEST'
  );
  
  DBMS_DATAPUMP.ADD_FILE(
    handle => h1,
    filename => 'exp_data.dmp',
    directory => 'DPUMP_DIR',
    filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_DUMP_FILE
  );
  
  DBMS_DATAPUMP.ADD_FILE(
    handle => h1,
    filename => 'exp.log',
    directory => 'DPUMP_DIR',
    filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_LOG_FILE
  );
  
  DBMS_DATAPUMP.START_JOB(h1);
END;
/

📄 External Tables

Creating External Tables

-- Create external table from CSV file
CREATE TABLE emp_external (
  emp_id NUMBER,
  name VARCHAR2(50),
  salary NUMBER,
  dept_id NUMBER
)
ORGANIZATION EXTERNAL (
  TYPE ORACLE_LOADER
  DEFAULT DIRECTORY data_dir
  ACCESS PARAMETERS (
    RECORDS DELIMITED BY NEWLINE
    FIELDS TERMINATED BY ','
    MISSING FIELD VALUES ARE NULL
    (
      emp_id,
      name,
      salary,
      dept_id
    )
  )
  LOCATION ('employees.csv')
);

-- Create external table from fixed-width file
CREATE TABLE emp_fixed (
  emp_id NUMBER EXTERNAL CHAR(5),
  name VARCHAR2(50) EXTERNAL CHAR(30),
  salary NUMBER EXTERNAL CHAR(10)
)
ORGANIZATION EXTERNAL (
  TYPE ORACLE_LOADER
  DEFAULT DIRECTORY data_dir
  ACCESS PARAMETERS (
    RECORDS FIXED 100
    FIELDS (
      emp_id POSITION (1:5),
      name POSITION (6:35),
      salary POSITION (36:45)
    )
  )
  LOCATION ('employees.txt')
);

-- Query external table
SELECT * FROM emp_external WHERE salary > 50000;

🚀 SQL*Loader

SQL*Loader Control Files

-- Control file for CSV loading
LOAD DATA
INFILE 'employees.csv'
INTO TABLE employees
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
  emp_id,
  name,
  salary,
  dept_id
)

-- Control file with data transformation
LOAD DATA
INFILE 'employees.txt'
INTO TABLE employees
INSERT
FIELDS TERMINATED BY '|'
(
  emp_id,
  name,
  salary DECIMAL EXTERNAL,
  hire_date DATE \"YYYY-MM-DD\",
  dept_id
)

-- Run SQL*Loader from command line
sqlldr scott/tiger CONTROL=employees.ctl LOG=load.log BAD=bad.txt

🔀 Data Integration Techniques

ETL Operations Comparison

Tool/Method Speed Flexibility Best For
Data Pump Very Fast High Database migration, full exports
SQL*Loader Fast High Bulk loading flat files
External Tables Medium Medium Transparent file querying
INSERT AS SELECT Slow Medium Complex transformations

Merge Operations

-- MERGE for ETL processing
MERGE INTO employees tgt
USING emp_source src
ON (tgt.emp_id = src.emp_id)
WHEN MATCHED THEN
  UPDATE SET
    tgt.name = src.name,
    tgt.salary = src.salary
  WHERE tgt.salary != src.salary
WHEN NOT MATCHED THEN
  INSERT (emp_id, name, salary, dept_id)
  VALUES (src.emp_id, src.name, src.salary, src.dept_id);
💡 ETL Best Practices: Use Data Pump for large migrations. SQL*Loader for bulk file loads. External tables for transparent querying. Test data quality before loading. Log all ETL processes. Implement error handling. Validate row counts. Use parallel loading. Compress export files. Monitor loader performance.
✓ Learning Checklist - Module 19