{ "cells": [ { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "58fab4bb-231e-48cf-8ed4-fc15a1b22845", "showTitle": false, "title": "" } }, "source": [ "
This Notebook adds information related to the following requirements:
Download this notebook at format ipynb here.
\n", "Let's quickly train a model and programmaticaly register it to Model registry:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "f1f46d6d-4586-482b-ae50-60e2fe77ea64", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", "#\n", "import seaborn as sns\n", "#\n", "import mlflow\n", "#\n", "import logging\n", "import re" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "cd267f1b-93e2-4b87-9c8f-67a979660780", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "mlflow.sklearn.autolog(disable=True)\n", "logging.getLogger(\"mlflow\").setLevel(logging.FATAL)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "b8964a28-4864-413f-8a84-dba563093362", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "diamonds_df = sns.load_dataset(\"diamonds\").drop([\"cut\", \"color\", \"clarity\"], axis=1)\n", "#\n", "X_train, X_test, y_train, y_test = train_test_split(diamonds_df.drop([\"price\"], axis=1), diamonds_df[\"price\"], test_size=0.33)\n", "#\n", "model = LinearRegression().fit(X_train, y_train)\n", "model_path = \"sns_diamonds\"\n", "#\n", "with mlflow.start_run(run_name=\"register_diamonds\") as run:\n", " mlflow.sklearn.log_model(sk_model =model,\n", " artifact_path=model_path)" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, "nuid": "57292136-b15a-4c28-aa3f-fdeb5c7df0f9", "showTitle": false, "title": "" } }, "source": [ "Programmaticaly register the latest logged model:
\n", "Note that, running multiple times the command below automatically register a new model with a version number incremented by one from last registered version.
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "227f1c81-1c64-4ef2-acc8-8a31472a81ad", "showTitle": false, "title": "" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Registered model 'lr_sns_diamonds' already exists. Creating a new version of this model...\n", "Created version '11' of model 'lr_sns_diamonds'.\n" ] } ], "source": [ "# get the latest model\n", "latest_run_id = mlflow.search_runs().sort_values(by=\"end_time\", ascending=False).head(1)['run_id'][0]\n", "#\n", "mlflow.register_model(f\"runs:/{latest_run_id}/{model_path}\", name=\"lr_sns_diamonds\");" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, "nuid": "92504d9b-e5a2-445d-8fd5-8450bd258171", "showTitle": false, "title": "" } }, "source": [ "Alternatively, a newly logged model can be logged automatically by using parameter registered_model_name
in mlflow.sklearn.log_model
:
Alternatively, a new registered model can be created from scratch, and then filled with a model from an existing run:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "01b63980-94b5-49c3-91cd-cf244ad307bb", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "142b8774-5fbf-4de7-92c0-13e4f3ae731c", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "# Register model name in the model registry\n", "client = mlflow.MlflowClient()\n", "client.create_registered_model(\"sns_diamonds_create\")\n", "\n", "# Create a new version of the rfr model under the registered model name\n", "desc = \"A new version of sns diamonds dataset linear regressions model\"\n", "runs_uri = f\"runs:/{latest_run_id}/{model_path}\"\n", "model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)\n", "mv = client.create_model_version(\"sns_diamonds_create\", model_src, latest_run_id, description=desc)" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "881d8292-e64d-4ef3-9ed4-7be35a45f83b", "showTitle": false, "title": "" } }, "source": [ "\n", "Registered model metadata can be listed:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "85c05c1b-015d-405a-b6be-f8484a985d96", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "for val in client.get_registered_model(\"sns_diamonds_create\"):\n", " print(val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "db526dc9-c88c-4666-8532-0888c52a3efa", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "# Set registered model tag\n", "client.set_registered_model_tag(\"sns_diamonds_create\", \"task\", \"classification\")\n", "for val in client.get_registered_model(\"sns_diamonds_create\"):\n", " print(val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "3a553c4c-64f9-49c0-b5fd-4e7fbab7fade", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "# Set model version tag\n", "client.set_model_version_tag(\"sns_diamonds_create\", \"2\", \"validation_status\", \"approved\")\n", "for val in client.get_registered_model(\"sns_diamonds_create\"):\n", " print(val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "30836727-2f6d-46d5-bcfe-cdc65c5db4c5", "showTitle": false, "title": "" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Description of version 1 of the model: This is the first version of sns_diamonds_create model\n", "Description of version 2 of the model: A new version of sns diamonds dataset linear regressions model\n" ] } ], "source": [ "# Add or update description\n", "client.update_model_version(\n", " name=\"sns_diamonds_create\",\n", " version=1,\n", " description=\"This is the first version of sns_diamonds_create model\",\n", ")\n", "for version in client.search_model_versions(filter_string=\"name='sns_diamonds_create'\"):\n", " print(f\"Description of version {version.version} of the model: {version.description}\")" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "6bcae850-fdf3-4f95-82e4-30f46c1dca81", "showTitle": false, "title": "" } }, "source": [ "See more information on how to update registered model here.
" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "b5f6d0da-1d81-4fa0-9770-a9e4d6863534", "showTitle": false, "title": "" } }, "source": [ "\n", "By default, when registered, models are associated with tag None.
" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "39620a82-0176-40b9-971e-967fda9af39d", "showTitle": false, "title": "" } }, "source": [ "\n", "List registered models and their latest version:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "9871bfb9-fa66-4065-a171-39a6c96ad2ae", "showTitle": false, "title": "" } }, "outputs": [ { "data": { "text/html": [ "name | run_id | latest_version | current_stage | status | tags | source |
---|---|---|---|---|---|---|
lr_sns_diamonds | 622df96ffd7f48589094b425e71743e1 | 4 | Archived | READY | Map() | dbfs:/databricks/mlflow-tracking/3581746151855359/622df96ffd7f48589094b425e71743e1/artifacts/sns_diamonds |
lr_sns_diamonds_v1 | 38ddf31bed524b7baf66eb93a8962754 | 1 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3581746151855359/38ddf31bed524b7baf66eb93a8962754/artifacts/sns_diamonds |
ml_flow_AddN_test | 0620a4cfcdcd407d92bc7680cc6312f7 | 1 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3515657616447348/0620a4cfcdcd407d92bc7680cc6312f7/artifacts/add5_model |
mllib_model | 6ccc766ac64c4250b3302487dfabf2da | 2 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3601260417884136/6ccc766ac64c4250b3302487dfabf2da/artifacts/model |
register-LR-All-Features-test | 2 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/1878608598223959/512a0de8368b446bb663288c06ed0858/artifacts/model | |
registered-5-model | 17c2fb1fbdcf4bdb97cd9681cf9cca8d | 1 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3079599880510783/17c2fb1fbdcf4bdb97cd9681cf9cca8d/artifacts/pyfunc-model |
scikit-learn_model | 9403ddccf7b64568a462bbba2cdccaee | 5 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3601260417884136/9403ddccf7b64568a462bbba2cdccaee/artifacts/model |
sns_diamonds_create | 5f6052cad9f34deba6f934f69707a093 | 2 | None | READY | Map(validation_status -> approved) | dbfs:/databricks/mlflow-tracking/3581746151855359/5f6052cad9f34deba6f934f69707a093/artifacts/sns_diamonds |
trained_with_4_features | ce5848dbbaec406daa95f057f14722bb | 6 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3601260417884136/ce5848dbbaec406daa95f057f14722bb/artifacts/trained_with_4_features |
trained_with_all_features | ce5848dbbaec406daa95f057f14722bb | 5 | None | READY | Map() | dbfs:/databricks/mlflow-tracking/3601260417884136/ce5848dbbaec406daa95f057f14722bb/artifacts/trained_with_all_features |
webhook_diamonds | ed6f91126eb149e7bf39c024da865a00 | 1 | Staging | READY | Map() | dbfs:/databricks/mlflow-tracking/1352035400533066/ed6f91126eb149e7bf39c024da865a00/artifacts/webhook-model |
Info about one specific registered model and its latest version:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "90c4c973-5442-4a84-8314-70c92c8c3f81", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "client.get_registered_model('lr_sns_diamonds')" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, "nuid": "b4e996a0-b3fa-4135-b54e-eeaef8b54fde", "showTitle": false, "title": "" } }, "source": [ "Info about one specific model and a given version:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "be9ea997-228d-4692-934b-76b6513d3e59", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "client.get_model_version('lr_sns_diamonds', 2)" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, "nuid": "dff14a6e-6e4c-4f2b-8409-2e89eb1da7e1", "showTitle": false, "title": "" } }, "source": [ "Transition a specific version of a registered model to a given stage. Valid values for stage are: Production, Staging, Archived, None
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "e79ff6f4-0c37-479b-a7d3-e67c3911be20", "showTitle": false, "title": "" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Out[41]:Archive a specific version of a registered model is the same command:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "c462771f-b6cf-493c-b77b-13f4311006de", "showTitle": false, "title": "" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Out[42]:Delete a registered model version:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "ae7ce43b-efdd-4284-a4cf-84d2661f91f6", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "client.delete_model_version('lr_sns_diamonds', 6)" ] }, { "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, "nuid": "24e0e44b-1a98-44f4-8a39-ab026c1344d5", "showTitle": false, "title": "" } }, "source": [ "Get a list of available properties and methods:
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "43f65c9b-d3c0-456f-8d37-5d59c5e580d0", "showTitle": false, "title": "" } }, "outputs": [ { "data": { "text/html": [ "props_and_methods |
---|
create_experiment |
create_model_version |
create_registered_model |
create_run |
delete_experiment |
delete_model_version |
delete_model_version_tag |
delete_registered_model |
delete_registered_model_tag |
delete_run |
delete_tag |
download_artifacts |
get_experiment |
get_experiment_by_name |
get_latest_versions |
get_metric_history |
get_model_version |
get_model_version_download_uri |
get_model_version_stages |
get_registered_model |
get_run |
list_artifacts |
list_experiments |
list_registered_models |
list_run_infos |
log_artifact |
log_artifacts |
log_batch |
log_dict |
log_figure |
log_image |
log_metric |
log_param |
log_text |
rename_experiment |
rename_registered_model |
restore_experiment |
restore_run |
search_experiments |
search_model_versions |
search_registered_models |
search_runs |
set_experiment_tag |
set_model_version_tag |
set_registered_model_tag |
set_tag |
set_terminated |
tracking_uri |
transition_model_version_stage |
update_model_version |
update_registered_model |