From aba5d29c657b9fc074ae099a0e8e1fa9953f9164 Mon Sep 17 00:00:00 2001 From: Sungjin Lee Date: Tue, 4 Jun 2019 10:05:02 -0700 Subject: [PATCH 1/3] add header back in --- convlab/agent/__init__.py | 3 + convlab/agent/algorithm/policy_util.py | 1 + convlab/agent/algorithm/ppo.py | 3 + convlab/agent/algorithm/random.py | 3 + convlab/agent/algorithm/reinforce.py | 3 + convlab/agent/algorithm/sarsa.py | 3 + convlab/agent/algorithm/sil.py | 3 + convlab/agent/memory/base.py | 3 + convlab/agent/memory/onpolicy.py | 3 + convlab/agent/memory/prioritized.py | 3 + convlab/agent/memory/replay.py | 3 + convlab/agent/net/base.py | 3 + convlab/agent/net/conv.py | 3 + convlab/agent/net/mlp.py | 3 + convlab/agent/net/net_util.py | 3 + convlab/agent/net/recurrent.py | 3 + convlab/env/base.py | 3 + convlab/env/movie.py | 2 +- convlab/env/multiwoz.py | 2 +- convlab/experiment/__init__.py | 3 + convlab/experiment/analysis.py | 3 + convlab/experiment/control.py | 3 + convlab/experiment/retro_analysis.py | 3 + convlab/experiment/search.py | 3 + convlab/lib/__init__.py | 2 + convlab/lib/decorator.py | 3 + convlab/lib/distribution.py | 3 + convlab/lib/file_util.py | 3 +- convlab/lib/logger.py | 3 + convlab/lib/math_util.py | 3 + convlab/lib/optimizer.py | 3 + convlab/lib/util.py | 3 + convlab/lib/viz.py | 3 + convlab/spec/demo.json | 97 ++++++++++++++++++++++++++ 34 files changed, 188 insertions(+), 3 deletions(-) diff --git a/convlab/agent/__init__.py b/convlab/agent/__init__.py index 0eb715b..bde4646 100644 --- a/convlab/agent/__init__.py +++ b/convlab/agent/__init__.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The agent module import numpy as np import pandas as pd diff --git a/convlab/agent/algorithm/policy_util.py b/convlab/agent/algorithm/policy_util.py index 4213190..fa58605 100644 --- a/convlab/agent/algorithm/policy_util.py +++ b/convlab/agent/algorithm/policy_util.py @@ -1,5 +1,6 @@ # Modified by Microsoft Corporation. # Licensed under the MIT license. + # Action policy module # Constructs action probability distribution used by agent to sample action and calculate log_prob, entropy, etc. from gym import spaces diff --git a/convlab/agent/algorithm/ppo.py b/convlab/agent/algorithm/ppo.py index 6ff7336..8ea27b2 100644 --- a/convlab/agent/algorithm/ppo.py +++ b/convlab/agent/algorithm/ppo.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from copy import deepcopy from convlab.agent import net from convlab.agent.algorithm import policy_util diff --git a/convlab/agent/algorithm/random.py b/convlab/agent/algorithm/random.py index db00bb6..4239338 100644 --- a/convlab/agent/algorithm/random.py +++ b/convlab/agent/algorithm/random.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The random agent algorithm # For basic dev purpose from convlab.agent.algorithm.base import Algorithm diff --git a/convlab/agent/algorithm/reinforce.py b/convlab/agent/algorithm/reinforce.py index 0a41aef..3f2b29d 100644 --- a/convlab/agent/algorithm/reinforce.py +++ b/convlab/agent/algorithm/reinforce.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent import net from convlab.agent.algorithm import policy_util from convlab.agent.algorithm.base import Algorithm diff --git a/convlab/agent/algorithm/sarsa.py b/convlab/agent/algorithm/sarsa.py index 91c019f..49a26e6 100644 --- a/convlab/agent/algorithm/sarsa.py +++ b/convlab/agent/algorithm/sarsa.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent import net from convlab.agent.algorithm import policy_util from convlab.agent.algorithm.base import Algorithm diff --git a/convlab/agent/algorithm/sil.py b/convlab/agent/algorithm/sil.py index 3a5922e..3670a30 100644 --- a/convlab/agent/algorithm/sil.py +++ b/convlab/agent/algorithm/sil.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent import net, memory from convlab.agent.algorithm import policy_util from convlab.agent.algorithm.actor_critic import ActorCritic diff --git a/convlab/agent/memory/base.py b/convlab/agent/memory/base.py index b09e3a3..199c6b1 100644 --- a/convlab/agent/memory/base.py +++ b/convlab/agent/memory/base.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from abc import ABC, abstractmethod from collections import deque from convlab.lib import logger, util diff --git a/convlab/agent/memory/onpolicy.py b/convlab/agent/memory/onpolicy.py index da5033a..e6e9e0c 100644 --- a/convlab/agent/memory/onpolicy.py +++ b/convlab/agent/memory/onpolicy.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from collections import deque from copy import deepcopy from convlab.agent.memory.base import Memory diff --git a/convlab/agent/memory/prioritized.py b/convlab/agent/memory/prioritized.py index c1cbb23..9c3712f 100644 --- a/convlab/agent/memory/prioritized.py +++ b/convlab/agent/memory/prioritized.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent.memory.replay import Replay from convlab.lib import util from convlab.lib.decorator import lab_api diff --git a/convlab/agent/memory/replay.py b/convlab/agent/memory/replay.py index 2a543e8..e06583a 100644 --- a/convlab/agent/memory/replay.py +++ b/convlab/agent/memory/replay.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from collections import deque from copy import deepcopy from convlab.agent.memory.base import Memory diff --git a/convlab/agent/net/base.py b/convlab/agent/net/base.py index 719cbc5..e0b2349 100644 --- a/convlab/agent/net/base.py +++ b/convlab/agent/net/base.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from abc import ABC, abstractmethod from convlab.agent.net import net_util import pydash as ps diff --git a/convlab/agent/net/conv.py b/convlab/agent/net/conv.py index 93e1a09..1307121 100644 --- a/convlab/agent/net/conv.py +++ b/convlab/agent/net/conv.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent.net import net_util from convlab.agent.net.base import Net from convlab.lib import math_util, util diff --git a/convlab/agent/net/mlp.py b/convlab/agent/net/mlp.py index fff7026..1b2fa81 100644 --- a/convlab/agent/net/mlp.py +++ b/convlab/agent/net/mlp.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent.net import net_util from convlab.agent.net.base import Net from convlab.lib import math_util, util diff --git a/convlab/agent/net/net_util.py b/convlab/agent/net/net_util.py index 753ee88..f1ecb1c 100644 --- a/convlab/agent/net/net_util.py +++ b/convlab/agent/net/net_util.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from functools import partial, wraps from convlab.lib import logger, optimizer, util import os diff --git a/convlab/agent/net/recurrent.py b/convlab/agent/net/recurrent.py index 3423b84..fd8a4bb 100644 --- a/convlab/agent/net/recurrent.py +++ b/convlab/agent/net/recurrent.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.agent.net import net_util from convlab.agent.net.base import Net from convlab.lib import util diff --git a/convlab/env/base.py b/convlab/env/base.py index 5c476a7..cc6845e 100644 --- a/convlab/env/base.py +++ b/convlab/env/base.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from abc import ABC, abstractmethod from gym import spaces from convlab.lib import logger, util diff --git a/convlab/env/movie.py b/convlab/env/movie.py index 1ddca63..b7908e8 100644 --- a/convlab/env/movie.py +++ b/convlab/env/movie.py @@ -1,4 +1,4 @@ -# Modified by Microsoft Corporation. +# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import io diff --git a/convlab/env/multiwoz.py b/convlab/env/multiwoz.py index a8c665d..8b7cb96 100644 --- a/convlab/env/multiwoz.py +++ b/convlab/env/multiwoz.py @@ -1,4 +1,4 @@ -# Modified by Microsoft Corporation. +# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. from copy import deepcopy diff --git a/convlab/experiment/__init__.py b/convlab/experiment/__init__.py index a9d8b91..922402f 100644 --- a/convlab/experiment/__init__.py +++ b/convlab/experiment/__init__.py @@ -1,2 +1,5 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The experiment module # Handles experimentation logic: control, analysis diff --git a/convlab/experiment/analysis.py b/convlab/experiment/analysis.py index 89c02e1..edd7c0c 100644 --- a/convlab/experiment/analysis.py +++ b/convlab/experiment/analysis.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.lib import logger, util, viz from convlab.spec import random_baseline import numpy as np diff --git a/convlab/experiment/control.py b/convlab/experiment/control.py index d8921a4..4240b33 100644 --- a/convlab/experiment/control.py +++ b/convlab/experiment/control.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The control module # Creates and runs control loops at levels: Experiment, Trial, Session from copy import deepcopy diff --git a/convlab/experiment/retro_analysis.py b/convlab/experiment/retro_analysis.py index 270f402..e5a5a99 100644 --- a/convlab/experiment/retro_analysis.py +++ b/convlab/experiment/retro_analysis.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The retro analysis module # Runs analysis post-hoc using existing data files # example: yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/ diff --git a/convlab/experiment/search.py b/convlab/experiment/search.py index 03f6745..19ce350 100644 --- a/convlab/experiment/search.py +++ b/convlab/experiment/search.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from copy import deepcopy from convlab.lib import logger, util import numpy as np diff --git a/convlab/lib/__init__.py b/convlab/lib/__init__.py index e69de29..b53c17a 100644 --- a/convlab/lib/__init__.py +++ b/convlab/lib/__init__.py @@ -0,0 +1,2 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. diff --git a/convlab/lib/decorator.py b/convlab/lib/decorator.py index aa563a8..360cf6c 100644 --- a/convlab/lib/decorator.py +++ b/convlab/lib/decorator.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from functools import wraps from convlab.lib import logger import time diff --git a/convlab/lib/distribution.py b/convlab/lib/distribution.py index 6fed228..36d6ef2 100644 --- a/convlab/lib/distribution.py +++ b/convlab/lib/distribution.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # Custom PyTorch distribution classes to be registered in policy_util.py # Mainly used by policy_util action distribution from torch import distributions diff --git a/convlab/lib/file_util.py b/convlab/lib/file_util.py index 016eb5f..86c746b 100644 --- a/convlab/lib/file_util.py +++ b/convlab/lib/file_util.py @@ -1,5 +1,6 @@ -# Modified by Microsoft Corporation. +# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. + from pathlib import Path from allennlp.common.file_utils import cached_path as allennlp_cached_path diff --git a/convlab/lib/logger.py b/convlab/lib/logger.py index ee98a0d..dbfa2bb 100644 --- a/convlab/lib/logger.py +++ b/convlab/lib/logger.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from convlab.lib import util import colorlog import logging diff --git a/convlab/lib/math_util.py b/convlab/lib/math_util.py index ebff8bb..542b00a 100644 --- a/convlab/lib/math_util.py +++ b/convlab/lib/math_util.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # Various math calculations used by algorithms import numpy as np import torch diff --git a/convlab/lib/optimizer.py b/convlab/lib/optimizer.py index fecb379..8912c94 100644 --- a/convlab/lib/optimizer.py +++ b/convlab/lib/optimizer.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # Custom PyTorch optimizer classes, to be registered in net_util.py import math import torch diff --git a/convlab/lib/util.py b/convlab/lib/util.py index df3879d..461215d 100644 --- a/convlab/lib/util.py +++ b/convlab/lib/util.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + from collections import deque from contextlib import contextmanager from datetime import datetime diff --git a/convlab/lib/viz.py b/convlab/lib/viz.py index 815c6f8..f738bad 100644 --- a/convlab/lib/viz.py +++ b/convlab/lib/viz.py @@ -1,3 +1,6 @@ +# Modified by Microsoft Corporation. +# Licensed under the MIT license. + # The data visualization module # Defines plotting methods for analysis from plotly import graph_objs as go, io as pio, tools diff --git a/convlab/spec/demo.json b/convlab/spec/demo.json index b84891a..0224bb0 100644 --- a/convlab/spec/demo.json +++ b/convlab/spec/demo.json @@ -486,6 +486,103 @@ } } }, + "milu_rule_wreinforce_template": { + "agent": [{ + "name": "DialogAgent", + "nlu": { + "name": "MILU", + "model_file": "https://convlab.blob.core.windows.net/models/milu.tar.gz" + }, + "dst": { + "name": "RuleDST" + }, + "nlg": { + "name": "MultiwozTemplateNLG", + "is_user": false + }, + "state_encoder": { + "name": "MultiWozStateEncoder" + }, + "action_decoder": { + "name": "MultiWozVocabActionDecoder" + }, + "algorithm": { + "name": "WarmUpReinforce", + "action_pdtype": "default", + "action_policy": "warmup_default", + "warmup_epi": 1000, + "explore_var_spec": null, + "gamma": 0.98, + "entropy_coef_spec": { + "name": "linear_decay", + "start_val": 0.01, + "end_val": 0.001, + "start_step": 1000, + "end_step": 500000 + }, + "training_frequency": 1 + }, + "memory": { + "name": "OnPolicyReplay" + }, + "net": { + "type": "MLPNet", + "hid_layers": [100], + "hid_layers_activation": "relu", + "clip_grad_val": null, + "loss_spec": { + "name": "MSELoss" + }, + "optim_spec": { + "name": "Adam", + "lr": 0.001 + }, + "lr_scheduler_spec": { + "name": "StepLR", + "step_size": 1000, + "gamma": 0.999, + }, + "gpu": false + } + }], + "env": [{ + "name": "multiwoz", + "action_dim": 300, + "observation_dim": 393, + "max_t": 40, + "max_frame": 500000, + "nlu": { + "name": "MILU", + "model_file": "https://convlab.blob.core.windows.net/models/milu.tar.gz" + }, + "user_policy": { + "name": "UserPolicyAgendaMultiWoz" + }, + "sys_policy": { + "name": "RuleBasedMultiwozBot" + }, + "nlg": { + "name": "MultiwozTemplateNLG", + "is_user": true + } + }], + "body": { + "product": "outer", + "num": 1 + }, + "meta": { + "distributed": false, + "num_eval": 100, + "eval_frequency": 1000, + "max_tick_unit": "total_t", + "max_trial": 1, + "max_session": 3, + "resources": { + "num_cpus": 1, + "num_gpus": 0 + } + } + }, "onenet_rule_ppo_template": { "agent": [{ "name": "DialogAgent", From 42738a02fa00a2d79e1efb383fc5fd6aeaf64270 Mon Sep 17 00:00:00 2001 From: Sungjin Lee Date: Tue, 4 Jun 2019 10:10:04 -0700 Subject: [PATCH 2/3] add milu_rule_wreinforce --- convlab/spec/demo.json | 89 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/convlab/spec/demo.json b/convlab/spec/demo.json index 0224bb0..ee5f12c 100644 --- a/convlab/spec/demo.json +++ b/convlab/spec/demo.json @@ -583,6 +583,95 @@ } } }, + "milu_rule_wreinforce": { + "agent": [{ + "name": "DialogAgent", + "nlu": { + "name": "MILU", + "model_file": "https://convlab.blob.core.windows.net/models/milu.tar.gz" + }, + "dst": { + "name": "RuleDST" + }, + "state_encoder": { + "name": "MultiWozStateEncoder" + }, + "action_decoder": { + "name": "MultiWozVocabActionDecoder" + }, + "algorithm": { + "name": "WarmUpReinforce", + "action_pdtype": "default", + "action_policy": "warmup_default", + "warmup_epi": 1000, + "explore_var_spec": null, + "gamma": 0.98, + "entropy_coef_spec": { + "name": "linear_decay", + "start_val": 0.01, + "end_val": 0.001, + "start_step": 1000, + "end_step": 500000 + }, + "training_frequency": 1 + }, + "memory": { + "name": "OnPolicyReplay" + }, + "net": { + "type": "MLPNet", + "hid_layers": [100], + "hid_layers_activation": "relu", + "clip_grad_val": null, + "loss_spec": { + "name": "MSELoss" + }, + "optim_spec": { + "name": "Adam", + "lr": 0.001 + }, + "lr_scheduler_spec": { + "name": "StepLR", + "step_size": 1000, + "gamma": 0.999, + }, + "gpu": false + } + }], + "env": [{ + "name": "multiwoz", + "action_dim": 300, + "observation_dim": 393, + "max_t": 40, + "max_frame": 500000, + "user_policy": { + "name": "UserPolicyAgendaMultiWoz" + }, + "sys_policy": { + "name": "RuleBasedMultiwozBot" + }, + "nlg": { + "name": "MultiwozTemplateNLG", + "is_user": true + } + }], + "body": { + "product": "outer", + "num": 1 + }, + "meta": { + "distributed": false, + "num_eval": 100, + "eval_frequency": 1000, + "max_tick_unit": "total_t", + "max_trial": 1, + "max_session": 3, + "resources": { + "num_cpus": 1, + "num_gpus": 0 + } + } + }, "onenet_rule_ppo_template": { "agent": [{ "name": "DialogAgent", From 403c6df3a7e561b427e43c51858dd63ef007beef Mon Sep 17 00:00:00 2001 From: Sungjin Lee Date: Tue, 4 Jun 2019 10:15:14 -0700 Subject: [PATCH 3/3] removed printouts --- convlab/modules/policy/system/multiwoz/util.py | 1 - convlab/modules/state_encoder/multiwoz/multiwoz_state_encoder.py | 1 - convlab/modules/word_policy/multiwoz/mdrg/predict.py | 1 - 3 files changed, 3 deletions(-) diff --git a/convlab/modules/policy/system/multiwoz/util.py b/convlab/modules/policy/system/multiwoz/util.py index 051078f..afc7188 100644 --- a/convlab/modules/policy/system/multiwoz/util.py +++ b/convlab/modules/policy/system/multiwoz/util.py @@ -248,7 +248,6 @@ def get_info_state(belief_state): booking.append(0) else: if belief_state[domain]['book'][slot] != "": - print("domain {} booking set".format(domain)) booking.append(1) else: booking.append(0) diff --git a/convlab/modules/state_encoder/multiwoz/multiwoz_state_encoder.py b/convlab/modules/state_encoder/multiwoz/multiwoz_state_encoder.py index d16a041..8f3cff1 100644 --- a/convlab/modules/state_encoder/multiwoz/multiwoz_state_encoder.py +++ b/convlab/modules/state_encoder/multiwoz/multiwoz_state_encoder.py @@ -92,7 +92,6 @@ def get_info_state(self, belief_state): booking.append(0) else: if belief_state[domain]['book'][slot] != "": - print("domain {} booking set".format(domain)) booking.append(1) else: booking.append(0) diff --git a/convlab/modules/word_policy/multiwoz/mdrg/predict.py b/convlab/modules/word_policy/multiwoz/mdrg/predict.py index 3399cbb..a3b86b2 100644 --- a/convlab/modules/word_policy/multiwoz/mdrg/predict.py +++ b/convlab/modules/word_policy/multiwoz/mdrg/predict.py @@ -249,7 +249,6 @@ def get_summary_bstate(bstate): booking.append(0) else: if bstate[domain]['book'][slot] != "": - print("domain {} booking set".format(domain)) booking.append(1) else: booking.append(0)