{ "cells": [ { "cell_type": "markdown", "id": "8966a060", "metadata": {}, "source": [ "# Policy iteration\n", "In policy iteration, you start with an arbitrary policy.\n", "Then, the the policy is improved at every iteration by first creating a DTMC for the previous policy, and then applying whichever choice would be best in that DTMC for the updated policy." ] }, { "cell_type": "code", "execution_count": 1, "id": "2b549cc1", "metadata": { "execution": { "iopub.execute_input": "2026-03-26T10:47:37.903974Z", "iopub.status.busy": "2026-03-26T10:47:37.903805Z", "iopub.status.idle": "2026-03-26T10:47:38.100973Z", "shell.execute_reply": "2026-03-26T10:47:38.100342Z" } }, "outputs": [], "source": [ "from stormvogel import *\n", "from stormvogel.visualization import JSVisualization\n", "from time import sleep\n", "\n", "\n", "def arg_max(funcs, args):\n", " \"\"\"Takes a list of callables and arguments and return the argument that yields the highest value.\"\"\"\n", " executed = [f(x) for f, x in zip(funcs, args)]\n", " index = executed.index(max(executed))\n", " return args[index]\n", "\n", "\n", "def policy_iteration(\n", " model: Model,\n", " prop: str,\n", " visualize: bool = True,\n", " layout: Layout = stormvogel.layout.DEFAULT(),\n", " delay: int = 2,\n", " clear: bool = False,\n", ") -> Result:\n", " \"\"\"Performs policy iteration on the given mdp.\n", " Args:\n", " model (Model): MDP.\n", " prop (str): PRISM property string to maximize. Rembember that this is a property on the induced DTMC, not the MDP.\n", " visualize (bool): Whether the intermediate and final results should be visualized. Defaults to True.\n", " layout (Layout): Layout to use to show the intermediate results.\n", " delay (int): Seconds to wait between each iteration.\n", " clear (bool): Whether to clear the visualization of each previous iteration.\n", " \"\"\"\n", " old = None\n", " new = random_scheduler(model)\n", "\n", " while not old == new:\n", " old = new\n", "\n", " dtmc = old.generate_induced_dtmc()\n", " dtmc_result = model_checking(dtmc, prop=prop)\n", "\n", " if visualize:\n", " vis = JSVisualization(\n", " model, layout=layout, scheduler=old, result=dtmc_result\n", " )\n", " vis.show()\n", " sleep(delay)\n", " if clear:\n", " vis.clear()\n", "\n", " choices = {\n", " i: arg_max(\n", " [\n", " lambda a: sum(\n", " [\n", " (p * dtmc_result.get_result_of_state(s2.id))\n", " for p, s2 in s1.get_outgoing_transitions(a)\n", " ]\n", " )\n", " for _ in s1.available_actions()\n", " ],\n", " s1.available_actions(),\n", " )\n", " for i, s1 in model.states.items()\n", " }\n", " new = Scheduler(model, choices)\n", " if visualize:\n", " print(\"Value iteration done:\")\n", " show(model, layout=layout, scheduler=new, result=dtmc_result)\n", " return dtmc_result" ] }, { "cell_type": "code", "execution_count": 2, "id": "3145cda1", "metadata": { "execution": { "iopub.execute_input": "2026-03-26T10:47:38.103135Z", "iopub.status.busy": "2026-03-26T10:47:38.102886Z", "iopub.status.idle": "2026-03-26T10:47:42.368891Z", "shell.execute_reply": "2026-03-26T10:47:42.368324Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ac073ccc56ef48bcada58d68ca36eb05", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "87ae253a10c7404e8e97722c351a6214", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Value iteration done:\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", " Network\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lion = examples.create_lion_mdp()\n", "prop = 'P=?[F \"full\"]'\n", "res = policy_iteration(lion, prop, layout=Layout(\"layouts/lion_policy.json\"))" ] }, { "cell_type": "markdown", "id": "cfc0f640", "metadata": {}, "source": [ "Policy iteration is also available under `stormvogel.extensions.visual_algos`." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "020d8a88bacc4d7d82db0068255f7ee7": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0d8787ba46f0492594ffe39bdc235adc": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "185d7c58afa24072856b83ba61e059fc": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5d511243dfad452ab7a42cf2678ea2d9": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_185d7c58afa24072856b83ba61e059fc", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "87ae253a10c7404e8e97722c351a6214": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_020d8a88bacc4d7d82db0068255f7ee7", "msg_id": "", "outputs": [ { "data": { "text/html": "\n\n\n \n Network\n \n \n \n \n \n
\n \n \n \n\n", "text/plain": "" }, "metadata": {}, "output_type": "display_data" } ], "tabbable": null, "tooltip": null } }, "902ea508ea0c4083aadc47ed0a795498": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_0d8787ba46f0492594ffe39bdc235adc", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "942050fe33aa4be4ba23fcbec0b4faa5": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_c97838def0874bfa9e735c0eb64f3d79", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "ac073ccc56ef48bcada58d68ca36eb05": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_ef08ced98a6e4b86b395eda852f5f2bd", "msg_id": "", "outputs": [ { "data": { "text/html": "\n\n\n \n Network\n \n \n \n \n \n
\n \n \n \n\n", "text/plain": "" }, "metadata": {}, "output_type": "display_data" } ], "tabbable": null, "tooltip": null } }, "c97838def0874bfa9e735c0eb64f3d79": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ded592b89c27431b92d16006300ded97": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_f0d25c5abac84cf0ab004fedf29d916e", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "ef08ced98a6e4b86b395eda852f5f2bd": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f0d25c5abac84cf0ab004fedf29d916e": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }