{ "cells": [ { "cell_type": "markdown", "id": "de94f659", "metadata": {}, "source": [ "# Policy iteration\n", "In policy iteration, you start with an arbitrary policy.\n", "Then, the the policy is improved at every iteration by first creating a DTMC for the previous policy, and then applying whichever choice would be best in that DTMC for the updated policy." ] }, { "cell_type": "code", "execution_count": 1, "id": "232685bd", "metadata": { "execution": { "iopub.execute_input": "2026-03-26T10:42:03.361929Z", "iopub.status.busy": "2026-03-26T10:42:03.361678Z", "iopub.status.idle": "2026-03-26T10:42:03.596106Z", "shell.execute_reply": "2026-03-26T10:42:03.595584Z" } }, "outputs": [], "source": [ "from stormvogel import *\n", "from stormvogel.visualization import JSVisualization\n", "from time import sleep\n", "\n", "\n", "def arg_max(funcs, args):\n", " \"\"\"Takes a list of callables and arguments and return the argument that yields the highest value.\"\"\"\n", " executed = [f(x) for f, x in zip(funcs, args)]\n", " index = executed.index(max(executed))\n", " return args[index]\n", "\n", "\n", "def policy_iteration(\n", " model: Model,\n", " prop: str,\n", " visualize: bool = True,\n", " layout: Layout = stormvogel.layout.DEFAULT(),\n", " delay: int = 2,\n", " clear: bool = False,\n", ") -> Result:\n", " \"\"\"Performs policy iteration on the given mdp.\n", " Args:\n", " model (Model): MDP.\n", " prop (str): PRISM property string to maximize. Rembember that this is a property on the induced DTMC, not the MDP.\n", " visualize (bool): Whether the intermediate and final results should be visualized. Defaults to True.\n", " layout (Layout): Layout to use to show the intermediate results.\n", " delay (int): Seconds to wait between each iteration.\n", " clear (bool): Whether to clear the visualization of each previous iteration.\n", " \"\"\"\n", " old = None\n", " new = random_scheduler(model)\n", "\n", " while not old == new:\n", " old = new\n", "\n", " dtmc = old.generate_induced_dtmc()\n", " dtmc_result = model_checking(dtmc, prop=prop)\n", "\n", " if visualize:\n", " mapped_values = {\n", " model.states[i]: dtmc_result.values.get(dtmc.states[i])\n", " for i in range(len(model.states))\n", " }\n", " mapped_result = Result(model, mapped_values, old)\n", " vis = JSVisualization(model, scheduler=old, result=mapped_result)\n", " vis.show()\n", " sleep(delay)\n", " if clear:\n", " vis.clear()\n", "\n", " choices = {}\n", " for i, s1 in enumerate(model.states):\n", "\n", " def compute_val(a):\n", " val = 0\n", " for p, s2 in s1.get_outgoing_transitions(a):\n", " # We get the state index in the original model, and look up in DTMC\n", " s2_idx = model.get_state_index(s2)\n", " dtmc_s2 = dtmc.states[s2_idx]\n", " val += p * dtmc_result.get_result_of_state(dtmc_s2)\n", " return val\n", "\n", " # arg_max evaluates the functions over the arguments, so we pass a list of lambdas\n", " lambdas = [\n", " (lambda a, compute_val=compute_val: compute_val(a))\n", " for _ in s1.available_actions()\n", " ]\n", " best_action = arg_max(lambdas, s1.available_actions())\n", " choices[s1] = best_action\n", "\n", " new = Scheduler(model, choices)\n", " if visualize:\n", " print(\"Value iteration done:\")\n", " mapped_values = {\n", " model.states[i]: dtmc_result.values.get(dtmc.states[i])\n", " for i in range(len(model.states))\n", " }\n", " mapped_result = Result(model, mapped_values, new)\n", " show(model, scheduler=new, result=mapped_result)\n", " return dtmc_result" ] }, { "cell_type": "code", "execution_count": 2, "id": "5f94eba4", "metadata": { "execution": { "iopub.execute_input": "2026-03-26T10:42:03.598031Z", "iopub.status.busy": "2026-03-26T10:42:03.597795Z", "iopub.status.idle": "2026-03-26T10:42:07.884831Z", "shell.execute_reply": "2026-03-26T10:42:07.884212Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9b152e0ddea94aecb74349e651615ee0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "02674914776a43dcab267e0c463c76e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Value iteration done:\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", " Network\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lion = examples.create_lion_mdp()\n", "prop = 'P=?[F \"full\"]'\n", "res = policy_iteration(lion, prop)" ] }, { "cell_type": "markdown", "id": "aef421f1", "metadata": {}, "source": [ "Policy iteration is also available under `stormvogel.extensions.visual_algos`." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "02674914776a43dcab267e0c463c76e4": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_e157b2ff68c7434f9573972eecc3188e", "msg_id": "", "outputs": [ { "data": { "text/html": "\n\n\n \n Network\n \n \n \n \n \n
\n \n \n \n\n", "text/plain": "" }, "metadata": {}, "output_type": "display_data" } ], "tabbable": null, "tooltip": null } }, "08d6faa49d7844818a370df7bf248380": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_eead3601e266466eb099cee47eae06e9", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "28b713f88f5f402e89ff8e3bf6113424": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_ef863824bc414a79ad553ea1cbbd2196", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "462deea401604f4e8e36076536e238b1": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_923e1e6210e2447b8976040e900aaa28", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "6251c0e8f52c4456ac5b1361fa4d778f": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_d9aba7ea261a4c328119c89f4b0aee1a", "msg_id": "", "outputs": [], "tabbable": null, "tooltip": null } }, "7e4ff77754eb4f44b08a89fd89d57989": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "923e1e6210e2447b8976040e900aaa28": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9b152e0ddea94aecb74349e651615ee0": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_7e4ff77754eb4f44b08a89fd89d57989", "msg_id": "", "outputs": [ { "data": { "text/html": "\n\n\n \n Network\n \n \n \n \n \n
\n \n \n \n\n", "text/plain": "" }, "metadata": {}, "output_type": "display_data" } ], "tabbable": null, "tooltip": null } }, "d9aba7ea261a4c328119c89f4b0aee1a": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e157b2ff68c7434f9573972eecc3188e": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eead3601e266466eb099cee47eae06e9": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef863824bc414a79ad553ea1cbbd2196": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }