Module 19: Data Integration & ETL

💾 Oracle Data Pump

Data Pump Export & Import

-- Export table using expdp (command line)
expdp scott/tiger TABLES=employees,departments FILE=exp_data.dmp LOG=exp.log

-- Export entire schema
expdp scott/tiger FULL=Y FILE=full_db.dmp LOG=full.log

-- Export with parallel processing
expdp scott/tiger TABLES=employees PARALLEL=4 FILE=exp_%U.dmp LOG=exp.log

-- Import table (impdp)
impdp scott/tiger TABLES=employees FILE=exp_data.dmp LOG=imp.log

-- Import with remap schema
impdp scott/tiger TABLES=employees FILE=exp_data.dmp REMAP_SCHEMA=scott:new_user LOG=imp.log

-- DBMS_DATAPUMP API (PL/SQL)
DECLARE
  h1 NUMBER;
BEGIN
  h1 := DBMS_DATAPUMP.OPEN(
    operation => 'EXPORT',
    job_mode => 'TABLE',
    remote_link => NULL,
    job_name => 'EXP_JOB',
    version => 'LATEST'
  );
  
  DBMS_DATAPUMP.ADD_FILE(
    handle => h1,
    filename => 'exp_data.dmp',
    directory => 'DPUMP_DIR',
    filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_DUMP_FILE
  );
  
  DBMS_DATAPUMP.ADD_FILE(
    handle => h1,
    filename => 'exp.log',
    directory => 'DPUMP_DIR',
    filetype => DBMS_DATAPUMP.KU$_FILE_TYPE_LOG_FILE
  );
  
  DBMS_DATAPUMP.START_JOB(h1);
END;
/

📄 External Tables

Creating External Tables

-- Create external table from CSV file
CREATE TABLE emp_external (
  emp_id NUMBER,
  name VARCHAR2(50),
  salary NUMBER,
  dept_id NUMBER
)
ORGANIZATION EXTERNAL (
  TYPE ORACLE_LOADER
  DEFAULT DIRECTORY data_dir
  ACCESS PARAMETERS (
    RECORDS DELIMITED BY NEWLINE
    FIELDS TERMINATED BY ','
    MISSING FIELD VALUES ARE NULL
    (
      emp_id,
      name,
      salary,
      dept_id
    )
  )
  LOCATION ('employees.csv')
);

-- Create external table from fixed-width file
CREATE TABLE emp_fixed (
  emp_id NUMBER EXTERNAL CHAR(5),
  name VARCHAR2(50) EXTERNAL CHAR(30),
  salary NUMBER EXTERNAL CHAR(10)
)
ORGANIZATION EXTERNAL (
  TYPE ORACLE_LOADER
  DEFAULT DIRECTORY data_dir
  ACCESS PARAMETERS (
    RECORDS FIXED 100
    FIELDS (
      emp_id POSITION (1:5),
      name POSITION (6:35),
      salary POSITION (36:45)
    )
  )
  LOCATION ('employees.txt')
);

-- Query external table
SELECT * FROM emp_external WHERE salary > 50000;

🚀 SQL*Loader

SQL*Loader Control Files

-- Control file for CSV loading
LOAD DATA
INFILE 'employees.csv'
INTO TABLE employees
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
  emp_id,
  name,
  salary,
  dept_id
)

-- Control file with data transformation
LOAD DATA
INFILE 'employees.txt'
INTO TABLE employees
INSERT
FIELDS TERMINATED BY '|'
(
  emp_id,
  name,
  salary DECIMAL EXTERNAL,
  hire_date DATE \"YYYY-MM-DD\",
  dept_id
)

-- Run SQL*Loader from command line
sqlldr scott/tiger CONTROL=employees.ctl LOG=load.log BAD=bad.txt

🔀 Data Integration Techniques

ETL Operations Comparison

Tool/Method	Speed	Flexibility	Best For
Data Pump	Very Fast	High	Database migration, full exports
*SQLLoader**	Fast	High	Bulk loading flat files
External Tables	Medium	Medium	Transparent file querying
INSERT AS SELECT	Slow	Medium	Complex transformations

Merge Operations

-- MERGE for ETL processing
MERGE INTO employees tgt
USING emp_source src
ON (tgt.emp_id = src.emp_id)
WHEN MATCHED THEN
  UPDATE SET
    tgt.name = src.name,
    tgt.salary = src.salary
  WHERE tgt.salary != src.salary
WHEN NOT MATCHED THEN
  INSERT (emp_id, name, salary, dept_id)
  VALUES (src.emp_id, src.name, src.salary, src.dept_id);

                    💡 ETL Best Practices: Use Data Pump for large migrations. SQL*Loader for bulk file
                    loads. External tables for transparent querying. Test data quality before loading. Log all ETL
                    processes. Implement error handling. Validate row counts. Use parallel loading. Compress export
                    files. Monitor loader performance.
                

✓ Learning Checklist - Module 19

Use Data Pump expdp/impdp

Create external tables

Work with SQL*Loader

Load CSV and flat files

Implement data transformations

Use MERGE for ETL

Monitor ETL processes