1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
|
SELECT COUNT(*) FROM HR_DATA_MAIN;
DROP TABLE HR_DATA_TRAINING; CREATE TABLE HR_DATA_TRAINING AS SELECT * FROM HR_DATA_MAIN WHERE EMP_ID < 10500;
DROP TABLE HR_DATA_TEST;
CREATE TABLE HR_DATA_TEST AS SELECT * FROM HR_DATA_MAIN WHERE EMP_ID >= 10500;
DROP TABLE DTSETTINGS; CREATE TABLE DTSETTINGS AS SELECT * FROM TABLE (DBMS_DATA_MINING.GET_DEFAULT_SETTINGS) WHERE SETTING_NAME LIKE '%GLM%';
BEGIN INSERT INTO DTSETTINGS VALUES ('ALGO_NAME', 'ALGO_DECISION_TREE');
INSERT INTO DTSETTINGS VALUES (DBMS_DATA_MINING.TREE_IMPURITY_METRIC, 'TREE_IMPURITY_ENTROPY'); COMMIT; END; /
BEGIN DBMS_DATA_MINING.DROP_MODEL('DT_MODEL'); END; /
BEGIN DBMS_DATA_MINING.CREATE_MODEL ( MODEL_NAME => 'DT_MODEL', MINING_FUNCTION => DBMS_DATA_MINING.CLASSIFICATION, DATA_TABLE_NAME => 'HR_DATA_TRAINING', CASE_ID_COLUMN_NAME => 'EMP_ID', TARGET_COLUMN_NAME => 'LEFT', SETTINGS_TABLE_NAME => 'DTSETTINGS'); END; /
SELECT MODEL_NAME, ALGORITHM, MINING_FUNCTION FROM ALL_MINING_MODELS WHERE MODEL_NAME = 'DT_MODEL';
SELECT SETTING_NAME, SETTING_VALUE FROM ALL_MINING_MODEL_SETTINGS WHERE MODEL_NAME = 'DT_MODEL';
SELECT EMP_ID, T.LEFT 실제값, PREDICTION (DT_MODEL USING *) 예측값, PREDICTION_PROBABILITY (DT_MODEL USING *) "모델이 예측한 확률" FROM HR_DATA_TEST T;
DROP TABLE HR_DATA_TEST_MATRIX_2; CREATE OR REPLACE VIEW VIEW_HR_DATA_TEST AS SELECT EMP_ID, PREDICTION(DT_MODEL USING *) PREDICTED_VALUE, PREDICTION_PROBABILITY(DT_MODEL USING * ) PROBABILITY FROM HR_DATA_TEST; SET SERVEROUTPUT ON
DECLARE V_ACCURACY NUMBER; BEGIN DBMS_DATA_MINING.COMPUTE_CONFUSION_MATRIX ( ACCURACY => V_ACCURACY, APPLY_RESULT_TABLE_NAME => 'VIEW_HR_DATA_TEST', TARGET_TABLE_NAME => 'HR_DATA_TEST', CASE_ID_COLUMN_NAME => 'EMP_ID', TARGET_COLUMN_NAME => 'LEFT', CONFUSION_MATRIX_TABLE_NAME => 'HR_DATA_TEST_MATRIX_2', SCORE_COLUMN_NAME => 'PREDICTED_VALUE', SCORE_CRITERION_COLUMN_NAME => 'PROBABILITY', COST_MATRIX_TABLE_NAME => NULL, APPLY_RESULT_SCHEMA_NAME => NULL, TARGET_SCHEMA_NAME => NULL, COST_MATRIX_SCHEMA_NAME => NULL, SCORE_CRITERION_TYPE => 'PROBABILITY'); DBMS_OUTPUT.PUT_LINE('**** MODEL ACCURACY ****: ' || ROUND(V_ACCURACY,4)); END; /
|