What is the notebook about?¶
Problem - YouTube¶
This problem deals with a Youtuber, having to employ someone to edit videos
- Formulate the problem as an MDP
- Use dynamic programming to find out the optimal policy and optimal values for each month
- Visualize and explain the results
How to use this notebook? 📝¶
This is a shared template and any edits you make here will not be saved.You should make a copy in your own drive. Click the "File" menu (top-left), then "Save a Copy in Drive". You will be working in your copy however you like.
Update the config parameters. You can define the common variables here
Variable | Description |
---|---|
AICROWD_DATASET_PATH |
Path to the file containing test data. This should be an absolute path. |
AICROWD_RESULTS_DIR |
Path to write the output to. |
AICROWD_ASSETS_DIR |
In case your notebook needs additional files (like model weights, etc.,), you can add them to a directory and specify the path to the directory here (please specify relative path). The contents of this directory will be sent to AIcrowd for evaluation. |
AICROWD_API_KEY |
In order to submit your code to AIcrowd, you need to provide your account's API key. This key is available at https://www.aicrowd.com/participants/me |
- Installing packages. Please use the Install packages 🗃 section to install the packages
Setup AIcrowd Utilities 🛠¶
We use this to bundle the files for submission and create a submission on AIcrowd. Do not edit this block.
!pip install -U aicrowd-cli > /dev/null
AIcrowd Runtime Configuration 🧷¶
Define configuration parameters.
import os
AICROWD_DATASET_PATH = os.getenv("DATASET_PATH", os.getcwd()+"/61c5aa77-62c0-48c9-afef-96d618708b43_data_youtube.zip")
AICROWD_RESULTS_DIR = os.getenv("OUTPUTS_DIR", "results")
API_KEY = "" # Get your key from https://www.aicrowd.com/participants/me (ctrl + click the link)
!aicrowd login --api-key $API_KEY
!aicrowd dataset download -c rl-assignment-2-youtube
DATASET_DIR = 'data_youtube'
!unzip $AICROWD_DATASET_PATH
Install packages 🗃¶
Please add all package installations in this section
Import packages 💻¶
import numpy as np
import matplotlib.pyplot as plt
import os
# ADD ANY IMPORTS YOU WANT HERE
Prediction Phase¶
class YouTuberEnv:
def __init__(self,kwargs):
self._verify_params(kwargs)
self.low_salary = kwargs["low_salary"]
self.high_salary = kwargs["high_salary"]
self.low_quit_prob = kwargs["low_quit_prob"]
self.high_quit_prob = kwargs["high_quit_prob"]
self.self_edit_cost = kwargs["self_edit_cost"]
self.low_add_cost = kwargs["low_add_cost"]
self.high_add_cost = kwargs["high_add_cost"]
self.low_add_success_prob = kwargs["low_add_success_prob"]
self.high_add_success_prob = kwargs["high_add_success_prob"]
def _verify_params(self,kwargs):
assert "low_salary" in kwargs, "no param for low_salary"
assert "high_salary" in kwargs, "no param for high_salary"
assert "low_quit_prob" in kwargs, "no param for low_quit_prob"
assert "high_quit_prob" in kwargs, "no param for high_quit_prob"
assert "self_edit_cost" in kwargs, "no param for self_edit_cost"
assert "low_add_cost" in kwargs, "no param for low_add_cost"
assert "high_add_cost" in kwargs, "no param for high_add_cost"
assert "low_add_success_prob" in kwargs, "no param for low_add_success_prob"
assert "high_add_success_prob" in kwargs, "no param for high_add_success_prob"
def MDP(env):
states = [0,1] ### DO NOT MODIFY
actions = []
rewards = []
probabilities = []
extra_info = {}
####### INSERT YOUR CODE BELOW. DO NOT EDIT ABOVE THIS LINE ########
####### DO NOT EDIT BELOW THIS LINE ########
mdp = {
"states":states,
"actions":actions,
"rewards":rewards,
"probabilities":probabilities
}
return mdp, extra_info
def DP(mdp):
states = mdp["states"]
actions = mdp["actions"]
rewards = mdp["rewards"]
probabilties = mdp["probabilities"]
N = 12 # horizon for 1 year
n_states = len(states)
values = np.zeros((N+1, n_states))
policy = np.random.choice(['L','H'], size = (N,n_states))
### Note: Each value in policy should either be a 'H' or 'L'
### Modify the contents of the above 'policy' array
extra_info = {}
####### INSERT YOUR CODE BELOW. DO NOT EDIT ABOVE THIS LINE ########
####### DO NOT EDIT BELOW THIS LINE ########
result = {
"Values":values,
"Policy":policy
}
return result, extra_info
# DO NOT EDIT THIS CELL
def verify_results(results):
assert "Values" in results
assert "Policy" in results
values = results["Values"]
policy = results["Policy"]
N=12
n_states = 2
assert np.shape(values) == (N+1,n_states)
assert np.shape(policy) == (N,n_states)
unique_values = set(np.unique(policy))
allowed_values = {'L','H'}
assert unique_values <= allowed_values
def get_results(kwargs):
env = YouTuberEnv(kwargs)
mdp, mdp_info = MDP(env)
results, dp_info = DP(mdp)
verify_results(results)
return results, mdp_info, dp_info
def get_base_params():
params = {}
params["low_salary"] = 2300
params["high_salary"] = 3000
params["low_quit_prob"] = 0.6
params["high_quit_prob"] = 0.2
params["self_edit_cost"] = 4000
params["low_add_cost"] = 300
params["high_add_cost"] = 600
params["low_add_success_prob"] = 0.7
params["high_add_success_prob"] = 0.9
return params
base_params = get_base_params()
results, mdp_info, dp_info = get_results(base_params)
print(results)
if not os.path.exists(AICROWD_RESULTS_DIR):
os.mkdir(AICROWD_RESULTS_DIR)
if not os.path.exists(DATASET_DIR+'/inputs'):
os.mkdir(DATASET_DIR+'/inputs')
# Do not edit this cell, generate results with it as is
input_dir = os.path.join(DATASET_DIR, 'inputs')
if not os.path.exists(AICROWD_RESULTS_DIR):
os.mkdir(AICROWD_RESULTS_DIR)
for params_file in os.listdir(input_dir):
if ".npy" not in params_file:
continue
kwargs = np.load(os.path.join(input_dir, params_file), allow_pickle=True).item()
results, mdp_info, dp_info = get_results(kwargs)
idx = params_file.split('_')[-1][:-4]
np.save(os.path.join(AICROWD_RESULTS_DIR, 'results_' + idx), results)
# Check your score on the given test cases (There are more private test cases not provided)
result_folder = AICROWD_RESULTS_DIR
target_folder = os.path.join(DATASET_DIR, 'targets')
def check_algo_match(results, targets):
param_results = targets
param_targets = results
tv = param_targets['Values'].flatten('F')
rv_0 = param_results['Values'][:,0]
rv_1 = param_results['Values'][:,1]
rewards_match_0 = np.allclose(np.concatenate((rv_0, rv_1)), tv, atol=1e-1)
rv_0 = param_results['Values'][:,1]
rv_1 = param_results['Values'][:,0]
rewards_match_1 = np.allclose(np.concatenate((rv_0, rv_1)), tv, atol=1e-1)
tp = param_targets['Policy'].flatten('F')
rp_0 = param_results['Policy'][:,0]
rp_1 = param_results['Policy'][:,1]
policy_match_0 = np.concatenate((rp_0, rp_1)) == tp
rp_0 = param_results['Policy'][:,1]
rp_1 = param_results['Policy'][:,0]
policy_match_1 = np.concatenate((rp_0, rp_1)) == tp
equal = (rewards_match_0 and policy_match_0.all()) or (rewards_match_1 or policy_match_1.all())
return equal
def check_score(target_folder, result_folder):
match = []
for out_file in os.listdir(result_folder):
res_file = os.path.join(result_folder, out_file)
results = np.load(res_file, allow_pickle=True).item()
idx = out_file.split('_')[-1][:-4] # Extract the file number
target_file = os.path.join(target_folder, f"targets_{idx}.npy")
targets = np.load(target_file, allow_pickle=True)[0]
algo_results = results
algo_targets = targets
algo_match = check_algo_match(algo_results, algo_targets)
match.append(algo_match)
return np.mean(match)
if os.path.exists(target_folder):
print("Shared data Score (normalized to 1):", check_score(target_folder, result_folder))
Answer the following¶
Consider a policy where you always pay the employee low income and allocate a high advertising budget. Is it optimal? Justify your answer. (Based on the data provided in the assignment question)
Your answer:
Submit to AIcrowd 🚀¶
!DATASET_PATH=$AICROWD_DATASET_PATH \
aicrowd notebook submit \
-c rl-assignment-2-youtube -a assets
Content
Comments
You must login before you can post a comment.