{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Example usage\n", "\n", "To use `okridge` in a project:" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Import Necessary packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2023-12-04T01:47:13.346122Z", "iopub.status.busy": "2023-12-04T01:47:13.341952Z", "iopub.status.idle": "2023-12-04T01:47:14.545116Z", "shell.execute_reply": "2023-12-04T01:47:14.544164Z" } }, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "from okridge.tree import BNBTree\n", "from okridge.utils import download_file_from_google_drive\n", "from pathlib import Path" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Download Sample Synthetic Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2023-12-04T01:47:14.551755Z", "iopub.status.busy": "2023-12-04T01:47:14.550288Z", "iopub.status.idle": "2023-12-04T01:47:14.654148Z", "shell.execute_reply": "2023-12-04T01:47:14.653631Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape of feature matrix is (6000, 3000)\n", "There are 6000 number of samples\n" ] } ], "source": [ "data_file_path = \"../tests/Synthetic_n=6000_p=3000_k=10_rho=0.5_snr=5.0_seed=0.npy\"\n", "\n", "if not os.path.isfile(data_file_path):\n", " download_file_from_google_drive('1lizlnufRBmEzMNpr0OlgE-P7otC8opkX', data_file_path)\n", "\n", "loaded_data = np.load(data_file_path, allow_pickle=True)\n", "X, y = loaded_data.item().get(\"X\"), loaded_data.item().get(\"y\")\n", "\n", "print(\"Shape of feature matrix is\", X.shape)\n", "print(\"There are {} number of samples\".format(len(y)))" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Apply OKRidge Software" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2023-12-04T01:47:14.668749Z", "iopub.status.busy": "2023-12-04T01:47:14.668542Z", "iopub.status.idle": "2023-12-04T01:47:48.068129Z", "shell.execute_reply": "2023-12-04T01:47:48.067705Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using max memory (300 GB)\n", "using breadth-first search\n", "'l' -> level(depth) of BnB tree, 'd' -> best dual bound, 'u' -> best upper(primal) bound, 'g' -> optimiality gap, 't' -> time\n", "l: 0, d: -659444.6478942606, u: -657798.0645024217, g: 0.0025031746, t: 2.66743 s\n", "l: 1, d: -659387.8270570670, u: -657798.0645024217, g: 0.0024167942, t: 5.36963 s\n", "l: 2, d: -659317.8396916100, u: -657798.0645024217, g: 0.0023103978, t: 8.06236 s\n", "l: 3, d: -659250.2763933846, u: -657798.0645024217, g: 0.0022076865, t: 10.70843 s\n", "l: 4, d: -659169.0430832551, u: -657798.0645024217, g: 0.0020841937, t: 13.27604 s\n", "l: 5, d: -659066.9599284586, u: -657798.0645024217, g: 0.0019290045, t: 15.77262 s\n", "l: 6, d: -658941.7627230188, u: -657798.0645024217, g: 0.0017386768, t: 18.27392 s\n", "l: 7, d: -658798.4741379283, u: -657798.0645024217, g: 0.0015208461, t: 20.69671 s\n", "l: 8, d: -658604.8032165651, u: -657798.0645024217, g: 0.0012264231, t: 23.12765 s\n", "l: 9, d: -658331.2630668270, u: -657798.0645024217, g: 0.0008105809, t: 25.51869 s\n" ] } ], "source": [ "k = 10 # cardinality constraint\n", "lambda2 = 0.1 # l2 regularization parameter\n", "gap_tol = 1e-4 # optimality gap tolerance\n", "verbose = True # print out the progress\n", "time_limit = 180 # time limit in seconds\n", "\n", "BnB_optimizer = BNBTree(X=X, y=y, lambda2=lambda2)\n", "\n", "upper_bound, betas, optimality_gap, max_lower_bound, running_time = BnB_optimizer.solve(k = k, gap_tol = gap_tol, verbose = verbose, time_limit = time_limit)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2023-12-04T01:47:48.071516Z", "iopub.status.busy": "2023-12-04T01:47:48.070706Z", "iopub.status.idle": "2023-12-04T01:47:48.075783Z", "shell.execute_reply": "2023-12-04T01:47:48.075432Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loss of best solution is -657798.0645024217\n", "Best lower bound is -657798.0645024217\n", "indices of nonzero coefficients are [ 0 300 600 900 1200 1500 1800 2100 2400 2700]\n", "Optimality gap is 0.0%\n", "Running time is 27.08906078338623 seconds\n" ] } ], "source": [ "print(\"Loss of best solution is\", upper_bound)\n", "print(\"Best lower bound is\", max_lower_bound)\n", "print(\"indices of nonzero coefficients are\", np.where(betas != 0)[0])\n", "print(\"Optimality gap is {}%\".format(optimality_gap * 100))\n", "print(\"Running time is {} seconds\".format(running_time))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 4 }