- Autonomous DatabaseのOMLノートブックでOML4PyのAutoMLを実行して、機械学習モデルを作成します。作成したモデルはシリアライズした形式でエクスポートします。
- Autonomous DatabaseのAutoML UIで機械学習のモデルを作成します。作成したモデルを一つ選び、シリアライズした形式でエクスポートします。
- オンプレミスのデータベースでOML4PyのAutoMLを実行して、機械学習モデルを作成します。作成したモデルはシリアライズした形式でエクスポートします。
- それぞれエクスポートした機械学習モデルを別のデータベースにインポートし、予測を行います。
%sql
drop table if exists BREAST_CANCER
%python
import oml
import pandas as pd
import numpy as np
from sklearn import datasets
# Load the breast cancer dataset into the database
bc = datasets.load_breast_cancer()
bc_data = bc.data.astype(float)
X = pd.DataFrame(bc_data, columns = bc.feature_names)
y = pd.DataFrame(bc.target, columns = ['TARGET'])
row_id = pd.DataFrame(np.arange(bc_data.shape[0]), columns = ['CASE_ID'])
df = oml.create(pd.concat([row_id, X, y], axis=1), table = 'BREAST_CANCER')
%sql
select * from breast_cancer
%python
import oml
from oml import automl
import pandas as pd
import numpy as np
df = oml.sync(table='BREAST_CANCER')
%python
# Split dataset into train and test
train, test = df.split(ratio=(0.8, 0.2), seed = 1234, hash_cols=['CASE_ID'])
X, y = train.drop('TARGET'), train['TARGET']
X_test, y_test = test.drop('TARGET'), test['TARGET']
%python
# Create an automated machine learning pipeline object with f1_macro score_metric
pipeline = automl.Pipeline(mining_function='classification', score_metric='f1_macro', parallel=2)
# Fit the pipeline to perform automated algorithm selection, feature selection, and model tuning on the dataset
pipeline = pipeline.fit(X, y, case_id='CASE_ID')
%python
# Use the pipeline for prediction
predictions = pipeline.predict(X_test, supplemental_cols=y_test)
print(f"{predictions}")
# For classification tasks, the pipeline can also predict class probabilities
probabilities = pipeline.predict_proba(X_test, supplemental_cols=y_test)
print(f"{probabilities}")
%sql
drop table if exists SERMODEL
%python
# Inspect the best tuned and fitted model produced by the AutoML pipeline
best_model = pipeline.top_k_tuned_models[0]['fitted_model']
print(f"{best_model}")
export = best_model.export_sermodel('SERMODEL')
declare
l_model_data blob;
begin
select serval into l_model_data from sermodel;
dbms_data_mining.import_sermodel(
model_data => l_model_data,
model_name => 'ADB_OML4PY_AUTOML_MODEL'
);
end;
select * from (
select case_id,
prediction(ADB_OML4PY_AUTOML_MODEL using *) prediction,
target,
prediction_probability(ADB_OML4PY_AUTOML_MODEL using *) pred_probability
from breast_cancer
)
where prediction <> target
select * from (
select case_id,
prediction(ADB_AUTOMLUI_MODEL using *) prediction,
target,
prediction_probability(ADB_AUTOMLUI_MODEL using *) pred_probability
from breast_cancer
)
where prediction <> target
create table oml_serialized_models (
id number generated by default on null as identity
constraint oml_serialized_models_id_pk primary key,
model_name varchar2(128 char) not null,
model_data blob
);
declare
l_model_data blob;
begin
select serval into l_model_data from sermodel;
insert into oml_serialized_models(model_name, model_data) values('ADB_OML4PY_AUTOML_MODEL', l_model_data);
commit;
end;
declare
l_blob blob;
l_model_name user_mining_models.model_name%type;
begin
dbms_lob.createTemporary(l_blob, false);
l_model_name := 'ADB_AUTOMLUI_MODEL';
dbms_data_mining.export_sermodel(
model_data => l_blob,
model_name => l_model_name
);
insert into oml_serialized_models(model_name, model_data) values('ADB_AUTOMLUI_MODEL', l_blob);
dbms_lob.freeTemporary(l_blob);
end;
select
sys_context('userenv','proxy_user') as proxy_user,
sys_context('userenv','session_user') as session_user,
sys_context('userenv','current_user') as current_user
from dual
podman exec -it apex-db bash
apex-podman-setup % podman exec -it apex-db bash
bash-4.4$
execute dbms_connection_pool.start_pool();
bash-4.4$ sqlplus / as sysdba
SQL*Plus: Release 23.0.0.0.0 - Production on Tue Jul 29 08:56:40 2025
Version 23.8.0.25.04
Copyright (c) 1982, 2025, Oracle. All rights reserved.
Connected to:
Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
SQL> execute dbms_connection_pool.start_pool();
PL/SQL procedure successfully completed.
SQL> exit
Disconnected from Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
bash-4.4$
export PYTHONHOME=$HOME/python
export PATH=$PYTHONHOME/bin:$PATH
export LD_LIBRARY_PATH=$PYTHONHOME/lib:$LD_LIBRARY_PATH
unset PYTHONPATH
bash-4.4$ export PYTHONHOME=$HOME/python
bash-4.4$ export PATH=$PYTHONHOME/bin:$PATH
bash-4.4$ export LD_LIBRARY_PATH=$PYTHONHOME/lib:$LD_LIBRARY_PATH
bash-4.4$ unset PYTHONPATH
bash-4.4$
scikit-learnの乳がんの診断データより表BREAST_CANCERを作成するスクリプトをcreate.pyとして作成し、実行します。oml.connectの引数のuserやpasswordは、使用するデータベース・ユーザーに合わせて変更します。また引数にautoml=Trueを含めます。
import oml
from oml import automl
import pandas as pd
import numpy as np
from sklearn import datasets
oml.connect(user="wksp_apexdev",password="パスワード",port=1521,host="localhost",service_name="freepdb1",automl=True)
# Load the breast cancer dataset into the database
bc = datasets.load_breast_cancer()
bc_data = bc.data.astype(float)
X = pd.DataFrame(bc_data, columns = bc.feature_names)
y = pd.DataFrame(bc.target, columns = ['TARGET'])
row_id = pd.DataFrame(np.arange(bc_data.shape[0]), columns = ['CASE_ID'])
df = oml.create(pd.concat([row_id, X, y], axis=1), table = 'BREAST_CANCER')
上記のスクリプトを実行します。bash-4.4$ python3 create.py
bash-4.4$
select count(*) from breast_cancer;
bash-4.4$ sqlplus wksp_apexdev/********@localhost/freepdb1
SQL*Plus: Release 23.0.0.0.0 - Production on Tue Jul 29 09:03:04 2025
Version 23.8.0.25.04
Copyright (c) 1982, 2025, Oracle. All rights reserved.
Last Successful login time: Tue Jul 29 2025 09:02:28 +00:00
Connected to:
Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
SQL> select count(*) from breast_cancer;
COUNT(*)
----------
569
SQL> exit
Disconnected from Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
bash-4.4$
drop table if exists SERMODEL;
bash-4.4$ sqlplus wksp_apexdev/*********@localhost/freepdb1
SQL*Plus: Release 23.0.0.0.0 - Production on Wed Jul 30 02:25:43 2025
Version 23.8.0.25.04
Copyright (c) 1982, 2025, Oracle. All rights reserved.
Last Successful login time: Wed Jul 30 2025 02:24:48 +00:00
Connected to:
Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
SQL> drop table if exists SERMODEL;
Table dropped.
SQL> exit
Disconnected from Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
bash-4.4$
AutoMLを実行するスクリプトをautoml.pyとして作成し、実行します。スクリプト自体はAutonomous DatabaseのOMLノートブックで実行した内容と同じですが、最後にファイルLOCAL_OML4PY_AUTOML_MODELにエクスポートしたモデルを出力しています。
import oml
from oml import automl
import pandas as pd
import numpy as np
oml.connect(user="wksp_apexdev",password="パスワード",port=1521,host="localhost",service_name="freepdb1",automl=True)
df = oml.sync(table = 'BREAST_CANCER')
# Split dataset into train and test
train, test = df.split(ratio=(0.8, 0.2), seed = 1234, hash_cols=['CASE_ID'])
X, y = train.drop('TARGET'), train['TARGET']
X_test, y_test = test.drop('TARGET'), test['TARGET']
# Create an automated machine learning pipeline object with f1_macro score_metric
pipeline = automl.Pipeline(mining_function='classification',
score_metric='f1_macro', parallel=4)
# Fit the pipeline to perform automated algorithm selection, feature selection, and model tuning on the dataset
pipeline = pipeline.fit(X, y, case_id='CASE_ID')
# Use the pipeline for prediction
predictions = pipeline.predict(X_test, supplemental_cols=y_test)
print(f"{predictions}")
# For classification tasks, the pipeline can also predict class probabilities
probabilities = pipeline.predict_proba(X_test, supplemental_cols=y_test)
print(f"{probabilities}")
# Inspect the best tuned and fitted model produced by the AutoML pipeline
best_model = pipeline.top_k_tuned_models[0]['fitted_model']
print(f"{best_model}")
export = best_model.export_sermodel(table='SERMODEL')
with open('LOCAL_OML4PY_AUTOML_MODEL', 'wb') as f:
for item in export.pull():
f.write(item)
python3 automl.py
bash-4.4$ python3 automl.py
TARGET PREDICTION
0 0 0
1 0 0
2 1 1
3 0 0
4 1 0
.. ... ...
109 1 1
110 0 0
111 1 1
112 0 0
113 1 1
[114 rows x 2 columns]
TARGET PROBABILITY_OF_0 PROBABILITY_OF_1
0 0 0.999860 0.000140
1 0 0.997039 0.002961
2 1 0.001444 0.998556
3 0 0.984740 0.015260
4 1 0.762688 0.237312
.. ... ... ...
109 1 0.003550 0.996450
110 0 0.992593 0.007407
111 1 0.001383 0.998617
112 0 0.986737 0.013263
113 1 0.003410 0.996590
[114 rows x 3 columns]
Algorithm Name: Support Vector Machine
Mining Function: CLASSIFICATION
Target: TARGET
Settings:
setting name setting value
0 ALGO_NAME ALGO_SUPPORT_VECTOR_MACHINES
1 CLAS_WEIGHTS_BALANCED OFF
2 ODMS_DETAILS ODMS_ENABLE
3 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO
4 ODMS_SAMPLING ODMS_SAMPLING_DISABLE
5 PREP_AUTO ON
6 SVMS_COMPLEXITY_FACTOR 10
7 SVMS_CONV_TOLERANCE .0001
8 SVMS_KERNEL_FUNCTION SVMS_GAUSSIAN
9 SVMS_NUM_PIVOTS 200
10 SVMS_STD_DEV 3.6742346141747673
Computed Settings:
setting name setting value
0 SVMS_NUM_ITERATIONS 30
1 SVMS_SOLVER SVMS_SOLVER_IPM
Global Statistics:
attribute name attribute value
0 CONVERGED YES
1 ITERATIONS 13
2 NUM_ROWS 455
Attributes:
area error
compactness error
concave points error
concavity error
fractal dimension error
mean area
mean compactness
mean concave points
mean concavity
mean fractal dimension
mean perimeter
mean radius
mean smoothness
mean symmetry
mean texture
perimeter error
radius error
worst area
worst compactness
worst concave points
worst concavity
worst fractal dimension
worst perimeter
worst radius
worst smoothness
worst symmetry
worst texture
Partition: NO
bash-4.4$
bash-4.4$ ls -l LOCAL_OML4PY_AUTOML_MODEL
-rw-r--r--. 1 oracle oinstall 233783 Jul 30 02:20 LOCAL_OML4PY_AUTOML_MODEL
bash-4.4$
declare
l_model_data blob;
begin
select serval into l_model_data
from sermodel;
dbms_data_mining.import_sermodel(
model_data => l_model_data,
model_name => 'LOCAL_OML4PY_AUTOML_MODEL'
);
end;
/
select * from user_mining_models;
bash-4.4$ sqlplus wksp_apexdev/*******@localhost/freepdb1
SQL*Plus: Release 23.0.0.0.0 - Production on Wed Jul 30 02:30:28 2025
Version 23.8.0.25.04
Copyright (c) 1982, 2025, Oracle. All rights reserved.
Last Successful login time: Wed Jul 30 2025 02:27:49 +00:00
Connected to:
Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
SQL> declare
2 l_model_data blob;
3 begin
4 select serval into l_model_data
5 from sermodel;
6 dbms_data_mining.import_sermodel(
7 model_data => l_model_data,
8 model_name => 'LOCAL_OML4PY_AUTOML_MODEL'
9 );
10 end;
11 /
PL/SQL procedure successfully completed.
SQL> select * from user_mining_models;
MODEL_NAME
--------------------------------------------------------------------------------
MINING_FUNCTION ALGORITHM ALGORITHM_
------------------------------ ------------------------------ ----------
CREATION_ BUILD_DURATION MODEL_SIZE PAR
--------- -------------- ---------- ---
BUILD_SOURCE
--------------------------------------------------------------------------------
COMMENTS
--------------------------------------------------------------------------------
LOCAL_OML4PY_AUTOML_MODEL
CLASSIFICATION SUPPORT_VECTOR_MACHINES NATIVE
30-JUL-25 1 230971 NO
MODEL_NAME
--------------------------------------------------------------------------------
MINING_FUNCTION ALGORITHM ALGORITHM_
------------------------------ ------------------------------ ----------
CREATION_ BUILD_DURATION MODEL_SIZE PAR
--------- -------------- ---------- ---
BUILD_SOURCE
--------------------------------------------------------------------------------
COMMENTS
--------------------------------------------------------------------------------
select * from "WKSP_APEXDEV"."OPE$55_7110_01167"
SQL>
select * from (
select case_id,
prediction(LOCAL_OML4PY_AUTOML_MODEL using *) prediction,
target,
prediction_probability(LOCAL_OML4PY_AUTOML_MODEL using *) pred_probability
from breast_cancer
)
where prediction <> target;
SQL> select * from (
select case_id,
prediction(LOCAL_OML4PY_AUTOML_MODEL using *) prediction,
target,
prediction_probability(LOCAL_OML4PY_AUTOML_MODEL using *) pred_probability
from breast_cancer
)
where prediction <> target; 2 3 4 5 6 7 8
CASE_ID PREDICTION TARGET PRED_PROBABILITY
---------- ---------- ---------- ----------------
68 0 1 7.627E-001
73 1 0 8.335E-001
135 1 0 8.036E-001
215 1 0 8.967E-001
152 0 1 8.058E-001
40 1 0 9.518E-001
363 0 1 6.287E-001
504 0 1 5.761E-001
255 1 0 5.357E-001
263 1 0 8.017E-001
290 0 1 5.091E-001
CASE_ID PREDICTION TARGET PRED_PROBABILITY
---------- ---------- ---------- ----------------
297 1 0 9.423E-001
12 rows selected.
SQL> exit
Disconnected from Oracle Database 23ai Free Release 23.0.0.0.0 - Develop, Learn, and Run for Free
Version 23.8.0.25.04
bash-4.4$
bash-4.4$ mv LOCAL_OML4PY_AUTOML_MODEL work
bash-4.4$
- Autonomous DatabaseのOML4Pyで作成したADB_OML4PY_AUTOML_MODEL
- Autonomous DatabaseのAutoML UIで作成したADB_AUTOMLUI_MODEL
- オンプレミスのOML4Pyで作成したLOCAL_OML4PY_AUTOML_MODEL
構成を開き、列のマッピングを設定します。
declare
l_model_data blob;
l_model_name varchar2(128);
begin
select filename, blob_content into l_model_name, l_model_data
from apex_application_temp_files where name = :P1_MODEL_FILE;
dbms_data_mining.import_sermodel(
model_data => l_model_data,
model_name => l_model_name
);
end;