Humble Trader

Thursday, August 31, 2006

Dimension Library

This post contains links to all Dimension implementations:

Data Warehouse Generic Components

This post contains links to Data Warehouse Components that may be used by Process Flow-specific Mappings:

Start Here

This post is the primary reference for the entire blog. Here is a site overview.

Disclaimer

Wednesday, August 30, 2006

Country Dimension - Functional Specification

When dealing with organisations that have cross-border business, the Country Dimension is used to analyse and report geographically.

In this implementation, I have chosen to grab the data from the internet. The UniCode site (here) has just the sort of page I need. To support this, this component utilises the Get HTML component by calling sta.get_html.fetch_html (Overload 1) with a label for the web page, and then sta.get_html.parse_html (Overload 1). This leaves parsed HTML containing the Country data in the table sta.parsed_html. This component, therefore, is largely concerned with extracting data from this generic table and moving it into the whs table D_COUNTRIES.

Monday, August 14, 2006

Get HTML - HTML_PARSE_PASSES

Scripts that support this object:

Monday, August 07, 2006

Get HTML - ctl_get_html_test.sql

SET SERVEROUTPUT ON SIZE 1000000;

-- Test ctl_gen:
DECLARE
l_mapping_1 VARCHAR2(30) := 'PAGE_ACCESS_TEST_1';
l_mapping_2 VARCHAR2(30) := 'PARSE_PASSES_TEST_2';
l_raw_run_no NUMBER;
l_parse_run_no NUMBER;
l_web_page_1 VARCHAR2(30) := 'PAGE_ACCESS_TEST_PAGE_1';
l_row_count NUMBER;
l_row_data VARCHAR2(4000);
l_passed BOOLEAN := TRUE;
BEGIN
dbms_output.put_line('-');
dbms_output.put_line('- Initialise...');

-- Initialisation code.
DELETE FROM html_parse_passes
WHERE raw_run_no =
(SELECT run_no
FROM html_page_access
WHERE html_page_name = l_web_page_1);

DELETE FROM html_page_access
WHERE html_page_name = l_web_page_1;

DELETE FROM mapping_histories
WHERE mapping_library IN (l_mapping_1
,l_mapping_2);

DELETE FROM mapping_libraries
WHERE mapping_name IN (l_mapping_1
,l_mapping_2);

DELETE FROM html_pages
WHERE name = l_web_page_1;

INSERT INTO mapping_libraries
(
mapping_name
,rep_from
,rep_to
)
VALUES
(
l_mapping_1
,'CONTROL'
,'CONTROL'
);

INSERT INTO mapping_libraries
(
mapping_name
,rep_from
,rep_to
)
VALUES
(
l_mapping_2
,'CONTROL'
,'CONTROL'
);

INSERT INTO mapping_histories
(
run_no
,mapping_library
,start_tsp
,end_tsp
)
VALUES
(
run_no_s.NEXTVAL
,l_mapping_1
,SYSDATE
,NULL
);

SELECT run_no_s.CURRVAL
INTO l_raw_run_no
FROM dual;

INSERT INTO mapping_histories
(
run_no
,mapping_library
,start_tsp
,end_tsp
)
VALUES
(
run_no_s.NEXTVAL
,l_mapping_2
,SYSDATE
,NULL
);

SELECT run_no_s.CURRVAL
INTO l_parse_run_no
FROM dual;

INSERT INTO html_pages
(
name
,url
,username
,password
)
VALUES
(
l_web_page_1
,'http://test/'
,NULL
,NULL
);

COMMIT;

dbms_output.put_line('- Done.');
dbms_output.put_line('-');

dbms_output.put_line('+- PROCEDURE init_html_page_access test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: Pass : web_page = ' || l_web_page_1);
dbms_output.put_line
('| : run_no = ' || l_raw_run_no);
dbms_output.put_line('|');
dbms_output.put_line
('| Expected result: Inserts a row into HTML_PAGE_ACCESS.');
dbms_output.put_line('|');

ctl_get_html.init_html_page_access(l_web_page_1, l_raw_run_no);

SELECT COUNT(*)
INTO l_row_count
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line('| Result: HTML_PAGE_ACCESS row count = ' ||
l_row_count);
dbms_output.put_line('| HTML_PAGE_ACCESS data :');

SELECT '| [' || run_no || ', ' || html_page_name || ', ' || status ||
', ' || TO_CHAR(start_tsp, 'DD-MON-YYYY HH24:MI:SS') || ']'
INTO l_row_data
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line(l_row_data);
dbms_output.put_line('|');

dbms_output.put_line('| +----------+');
IF l_row_count = 1
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE init_html_page_access test end:');
dbms_output.put_line('+');

dbms_output.put_line('+- PROCEDURE html_page_access_update_status test ' ||
'start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: Pass : raw_run_no = ' || l_raw_run_no);
dbms_output.put_line
('| : new_status = FETCHED');
dbms_output.put_line('|');
dbms_output.put_line
('| Expected result: Updates the row status in previous test to ' ||
'FETCHED.');
dbms_output.put_line('|');

ctl_get_html.html_page_access_update_status(l_raw_run_no, 'FETCHED');

dbms_output.put_line('| Results: HTML_PAGE_ACCESS data :');

SELECT '| [' || run_no || ', ' || html_page_name || ', ' || status ||
', ' || TO_CHAR(start_tsp, 'DD-MON-YYYY HH24:MI:SS') || ']'
INTO l_row_data
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line(l_row_data);

SELECT status
INTO l_row_data
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line('| Row status = ' || l_row_data);

dbms_output.put_line('|');
dbms_output.put_line('| +----------+');
IF l_row_data = 'FETCHED'
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line(
'+- PROCEDURE html_page_access_update_status test end:');
dbms_output.put_line('+');

dbms_output.put_line('+- PROCEDURE init_html_parse_passes test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: Pass : parse_run_no = ' || l_parse_run_no);
dbms_output.put_line
('| : raw_run_no = ' || l_raw_run_no);
dbms_output.put_line('|');
dbms_output.put_line
('| Expected result: Inserts a row into HTML_PARSE_PASSES.');
dbms_output.put_line('|');

ctl_get_html.init_html_parse_passes(l_parse_run_no, l_raw_run_no);

SELECT COUNT(*)
INTO l_row_count
FROM html_parse_passes
WHERE parse_run_no = l_parse_run_no
AND raw_run_no = l_raw_run_no;

dbms_output.put_line('| Result: HTML_PARSE_PASSES row count = ' ||
l_row_count);
dbms_output.put_line('| HTML_PARSE_PASSES data :');

SELECT '| [' || parse_run_no || ', ' || raw_run_no || ', ' || status ||
', ' || TO_CHAR(start_tsp, 'DD-MON-YYYY HH24:MI:SS') || ']'
INTO l_row_data
FROM html_parse_passes
WHERE parse_run_no = l_parse_run_no
AND raw_run_no = l_raw_run_no;

dbms_output.put_line(l_row_data);
dbms_output.put_line('|');

dbms_output.put_line('| +----------+');
IF l_row_count = 1
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE init_html_parse_passes test end:');
dbms_output.put_line('+');

dbms_output.put_line(
'+- PROCEDURE html_parse_passes_update_stat test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: Pass : parse_run_no = ' || l_parse_run_no);
dbms_output.put_line
('| : raw_run_no = ' || l_raw_run_no);
dbms_output.put_line
('| : new_status = PARSED');
dbms_output.put_line('|');
dbms_output.put_line
('| Expected result: Updates the row status in previous test to ' ||
'PARSED.');
dbms_output.put_line('|');

ctl_get_html.html_parse_passes_update_stat(l_parse_run_no, l_raw_run_no,
'PARSED');

dbms_output.put_line('| Results: HTML_PARSE_PASSES data :');

SELECT '| [' || parse_run_no || ', ' || raw_run_no || ', ' || ', ' ||
status || ', ' || TO_CHAR(start_tsp, 'DD-MON-YYYY HH24:MI:SS') || ']'
INTO l_row_data
FROM html_parse_passes
WHERE parse_run_no = l_parse_run_no
AND raw_run_no = l_raw_run_no;

dbms_output.put_line(l_row_data);

SELECT status
INTO l_row_data
FROM html_parse_passes
WHERE parse_run_no = l_parse_run_no
AND raw_run_no = l_raw_run_no;

dbms_output.put_line('| Row status = ' || l_row_data);

dbms_output.put_line('|');
dbms_output.put_line('| +----------+');
IF l_row_data = 'PARSED'
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line(
'+- PROCEDURE html_page_access_update_status test end:');

dbms_output.put_line('-');

dbms_output.put_line('- Overall result:');
dbms_output.put_line('-');
dbms_output.put_line('- +----------+');
IF l_passed = TRUE
THEN
dbms_output.put_line('- | PASSED |');
ELSE
dbms_output.put_line('- | FAILED |');
END IF;
dbms_output.put_line('- +----------+');

dbms_output.put_line('-');
dbms_output.put_line('- Clean up...');

-- Clean up code.
DELETE FROM html_parse_passes
WHERE raw_run_no =
(SELECT run_no
FROM html_page_access
WHERE html_page_name = l_web_page_1);

DELETE FROM html_parse_passes
WHERE parse_run_no = l_parse_run_no;

DELETE FROM html_page_access
WHERE html_page_name = l_web_page_1;

DELETE FROM mapping_histories
WHERE mapping_library IN (l_mapping_1
,l_mapping_2);

DELETE FROM mapping_libraries
WHERE mapping_name IN (l_mapping_1
,l_mapping_2);

DELETE FROM html_pages
WHERE name = l_web_page_1;

COMMIT;

dbms_output.put_line('- Done.');
dbms_output.put_line('-');
END;
/

Get HTML - CTL_GET_HTML

Scripts that support this object:

Tuesday, August 01, 2006

Get HTML - get_html_test.sql

NOTE: Add the passwords for the user connect statements before running.

CONNECT ctl/

SET SERVEROUTPUT ON SIZE 1000000;

BEGIN
dbms_output.put_line('-');
dbms_output.put_line('Initialise CTL...');
END;
/

DELETE FROM html_page_access
WHERE html_page_name = 'GOOGLE'
/

COMMIT
/

BEGIN
dbms_output.put_line('Done...');
dbms_output.put_line('-');
END;
/

CONNECT sta/

SET SERVEROUTPUT ON SIZE 1000000;

BEGIN
dbms_output.put_line('-');
dbms_output.put_line('Initialise STA...');
END;
/

DELETE FROM html_pages
WHERE name = 'GOOGLE'
/

INSERT INTO html_pages
(
name
,url
,username
,password
)
VALUES
(
'GOOGLE'
,'http://www.google.com/'
,NULL
,NULL
)
/

COMMIT
/

BEGIN
dbms_output.put_line('Done...');
dbms_output.put_line('-');
END;
/

DECLARE
l_web_page_1 VARCHAR2(30) := 'GOOGLE';
l_raw_run_no NUMBER;
l_html_page_access_count NUMBER;
l_raw_html_count NUMBER;
l_row_data VARCHAR2(4000);
l_char VARCHAR2(1);
l_string VARCHAR2(4000);
l_passed BOOLEAN := TRUE;
BEGIN
dbms_output.put_line('-');
dbms_output.put_line('+- PROCEDURE fetch_html test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: General test.');
dbms_output.put_line
('| Pass : web_page = ' || l_web_page_1);
dbms_output.put_line
('| Expected result: Inserts a row into HTML_PAGE_ACCESS final status' ||
' = FETCHED');
dbms_output.put_line
('| Inserts two rows into RAW_HTML.');

get_html.fetch_html(l_web_page_1);

SELECT MAX(run_no)
INTO l_raw_run_no
FROM mapping_histories;

SELECT COUNT(*)
INTO l_html_page_access_count
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line('| Result: HTML_PAGE_ACCESS row count = ' ||
l_html_page_access_count);
dbms_output.put_line('| HTML_PAGE_ACCESS data :');

SELECT '| [' || run_no || ', ' || status || ', ' ||
TO_CHAR(start_tsp, 'DD-MON-YYYY HH24:MI:SS') || ']'
INTO l_row_data
FROM html_page_access
WHERE run_no = l_raw_run_no;

dbms_output.put_line(l_row_data);

SELECT COUNT(*)
INTO l_raw_html_count
FROM raw_html
WHERE run_no = l_raw_run_no;

dbms_output.put_line('| RAW_HTML row count = ' || l_raw_html_count);
dbms_output.put_line('|');

dbms_output.put_line('| +----------+');
IF l_html_page_access_count = 1
AND
l_raw_html_count = 2
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE fetch_html test end:');
dbms_output.put_line('-');

dbms_output.put_line('+- PROCEDURE parse_html test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: General test.');
dbms_output.put_line
('| Pass : NO PARAMETERS');
dbms_output.put_line
('| Expected result: Runs without error.');
dbms_output.put_line('|');

BEGIN
get_html.parse_html;
EXCEPTION
WHEN OTHERS THEN
dbms_output.put_line('| +----------+');
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
dbms_output.put_line('| +----------+');
END;

dbms_output.put_line('| +----------+');
dbms_output.put_line('| | PASSED |');
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE parse_html test end:');
dbms_output.put_line('-');

dbms_output.put_line('+- PROCEDURE get_scec test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: General test.');
dbms_output.put_line
('| Pass : ''amp''');
dbms_output.put_line
('| Expected result: Returns ''&''' || '.');
dbms_output.put_line('|');

l_char := get_html.get_scec('amp');

dbms_output.put_line('| Result: returned = ' || l_char);
dbms_output.put_line('|');

dbms_output.put_line('| +----------+');
IF l_char = '&'
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE get_scec test end:');
dbms_output.put_line('-');

dbms_output.put_line('+- PROCEDURE replace_scec test start:');
dbms_output.put_line('|');

dbms_output.put_line
('| Test 1: General test.');
dbms_output.put_line
('| Pass : ''start &' || 'amp; middle &' || 'amp; end''');
dbms_output.put_line
('| Expected result: Returns ''start ''&'' middle ''&'' end''.');
dbms_output.put_line('|');

l_string := get_html.replace_scec('start &' || 'amp; middle &' ||
'amp; end');

dbms_output.put_line('| Result: returned = [' || l_string || ']');
dbms_output.put_line('|');

dbms_output.put_line('| +----------+');
IF l_string = 'start &' || ' middle &' || ' end'
THEN
dbms_output.put_line('| | PASSED |');
ELSE
dbms_output.put_line('| | FAILED |');
l_passed := FALSE;
END IF;
dbms_output.put_line('| +----------+');

dbms_output.put_line('|');
dbms_output.put_line('+- PROCEDURE get_scec test end:');
dbms_output.put_line('-');

dbms_output.put_line('- Overall result:');
dbms_output.put_line('-');
dbms_output.put_line('- +----------+');
IF l_passed = TRUE
THEN
dbms_output.put_line('- | PASSED |');
ELSE
dbms_output.put_line('- | FAILED |');
END IF;
dbms_output.put_line('- +----------+');
dbms_output.put_line('-');

dbms_output.put_line('-');
dbms_output.put_line('- Clean up STA...');

DELETE FROM parsed_html
WHERE raw_run_no IN
(SELECT run_no
FROM html_page_access
WHERE html_page_name = 'GOOGLE');

DELETE FROM raw_html
WHERE run_no IN
(SELECT run_no
FROM html_page_access
WHERE html_page_name = 'GOOGLE');

COMMIT;

dbms_output.put_line('- Done.');
dbms_output.put_line('-');
END;
/

CONNECT ctl/

SET SERVEROUTPUT ON SIZE 1000000;

BEGIN
dbms_output.put_line('-');
dbms_output.put_line('- Clean up CTL...');
END;
/

DELETE FROM html_parse_passes
WHERE raw_run_no IN
(SELECT run_no
FROM html_page_access
WHERE html_page_name = 'GOOGLE');

DELETE FROM html_page_access
WHERE html_page_name = 'GOOGLE'
/

DELETE FROM html_pages
WHERE name = 'GOOGLE'
/

COMMIT
/

BEGIN
dbms_output.put_line('- Done.');
dbms_output.put_line('-');
END;
/

CONNECT sta/

SET SERVEROUTPUT ON SIZE 1000000;