In [None]:
!pip install space_bandits

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Загрузим наши данные
transactions = pd.read_csv('transactions.csv')
action_reward = pd.read_csv('actions_reward_transaction_df.csv')
predictions_for_transactions = pd.read_csv('predictions_for_transactions.csv')

In [3]:

transactions.dates = pd.to_datetime(transactions.dates)
predictions_for_transactions.dates = pd.to_datetime(predictions_for_transactions.dates) 
action_reward.dates = pd.to_datetime(action_reward.dates)

In [4]:
action_reward

Unnamed: 0,tr_id,dates,action,reward
0,1003923,2019-01-01,2,0.000
1,1018719,2019-01-01,3,96246.080
2,1028713,2019-01-01,3,98328.150
3,1030155,2019-01-01,3,96290.195
4,1047479,2019-01-01,2,0.000
...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675
9996,99979376,2019-12-30,3,98179.700
9997,99979915,2019-12-30,1,98274.760
9998,99981943,2019-12-30,1,96469.850


In [5]:
transactions

Unnamed: 0,tr_id,user_id,dates,sale_price
0,1018719,27185,2019-01-01,96246.080
1,1028713,42513,2019-01-01,98328.150
2,1030155,33953,2019-01-01,96290.195
3,1055428,28434,2019-01-01,97184.400
4,1058208,15628,2019-01-01,96948.420
...,...,...,...,...
6325,99949208,19090,2019-12-30,99624.545
6326,99977644,45377,2019-12-30,97569.675
6327,99979376,75419,2019-12-30,98179.700
6328,99979915,13167,2019-12-30,98274.760


In [6]:
predictions_for_transactions

Unnamed: 0,tr_id,dates,model_1,model_2,model_3
0,1003923,2019-01-01,98073.000,96380.000,96656.000
1,1018719,2019-01-01,95755.080,95924.080,96246.080
2,1028713,2019-01-01,96318.150,96729.150,98328.150
3,1030155,2019-01-01,96636.195,98557.195,96290.195
4,1047479,2019-01-01,96149.280,96850.280,96170.280
...,...,...,...,...,...
9995,99977644,2019-12-30,98539.675,96468.675,97569.675
9996,99979376,2019-12-30,97875.700,97139.700,98179.700
9997,99979915,2019-12-30,98274.760,98599.760,97072.760
9998,99981943,2019-12-30,96469.850,98086.850,99745.850


In [7]:
# Построим 1-ую модель
from space_bandits import LinearBandits

num_actions = 3 # three actions
num_features = 2 # two features

model = LinearBandits(num_actions, num_features)

In [8]:
action_reward['dates_int'] = action_reward.dates.astype(int)

In [9]:
# Обучение модели происходит через функцию update()
for index, row in action_reward.iterrows():
    context = row[['dates_int', 'tr_id']]
    action = row['action'] - 1
    reward = row['reward']
    model.update(context, action,reward)

In [10]:
# Чтобы получить рекомендацию по выбору какой именно action, надо выбрать в зависимости от контекста используйте action()
pred_context = action_reward.iloc[-2][['dates_int', 'tr_id']].values
model.action(pred_context)

  multivariates = [np.random.multivariate_normal(mus[j], covs[j]) for j in range(n_rows)]


0

In [11]:
# Сохраните модель
model.save('model_1.pkl')

In [11]:
# Постройте 2-у модель
# Добавим в контекст данные о времени, year/week и данные о user и данные о возможных вариантах
full_action_reward = action_reward.merge(predictions_for_transactions, how='left', on=['tr_id', 'dates'])
full_action_reward = full_action_reward.merge(transactions, how='left', on=['tr_id', 'dates'])

In [12]:
# Заполним пропуски
full_action_reward.user_id = full_action_reward.user_id.fillna(-1)
full_action_reward.sale_price = full_action_reward.sale_price.fillna(0)

In [13]:
# Добавим year, week
full_action_reward['week'] = full_action_reward.dates.dt.isocalendar().week
full_action_reward['year'] = full_action_reward.dates.dt.year

In [14]:
# Добавим информацию о последний покупки для user'а
full_action_reward['previous purchase'] = full_action_reward.sort_values('dates').groupby('user_id').sale_price.shift(1)
full_action_reward['previous purchase'] = full_action_reward['previous purchase'].fillna(0)

In [15]:
# Проверим что мы действительно добавили информацию о последней покупке
full_action_reward[full_action_reward.user_id == 46652]

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price,week,year,previous purchase
6297,63820317,2019-08-15,3,98994.73,1565827200000000000,98085.73,97723.73,98994.73,46652.0,98994.73,33,2019,0.0
9998,99981943,2019-12-30,1,96469.85,1577664000000000000,96469.85,98086.85,99745.85,46652.0,96469.85,1,2019,98994.73


In [17]:
full_action_reward

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price,week,year,previous purchase
0,1003923,2019-01-01,2,0.000,1546300800000000000,98073.000,96380.000,96656.000,-1.0,0.000,1,2019,0.00
1,1018719,2019-01-01,3,96246.080,1546300800000000000,95755.080,95924.080,96246.080,27185.0,96246.080,1,2019,0.00
2,1028713,2019-01-01,3,98328.150,1546300800000000000,96318.150,96729.150,98328.150,42513.0,98328.150,1,2019,0.00
3,1030155,2019-01-01,3,96290.195,1546300800000000000,96636.195,98557.195,96290.195,33953.0,96290.195,1,2019,0.00
4,1047479,2019-01-01,2,0.000,1546300800000000000,96149.280,96850.280,96170.280,-1.0,0.000,1,2019,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675,1577664000000000000,98539.675,96468.675,97569.675,45377.0,97569.675,1,2019,0.00
9996,99979376,2019-12-30,3,98179.700,1577664000000000000,97875.700,97139.700,98179.700,75419.0,98179.700,1,2019,98347.16
9997,99979915,2019-12-30,1,98274.760,1577664000000000000,98274.760,98599.760,97072.760,13167.0,98274.760,1,2019,0.00
9998,99981943,2019-12-30,1,96469.850,1577664000000000000,96469.850,98086.850,99745.850,46652.0,96469.850,1,2019,98994.73


In [18]:
full_action_reward.columns

Index(['tr_id', 'dates', 'action', 'reward', 'dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase'],
      dtype='object')

In [19]:
context_columns = ['tr_id','dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase']

In [20]:
# Создадим еще одну модель 
num_actions = 3 # three actions
num_features = 10 # ten features


model_2 = LinearBandits(num_actions, num_features)

In [21]:
# Обучим еще одну модель
for index, row in full_action_reward.iterrows():
    context = row[context_columns]
    action = row['action'] - 1
    reward = row['reward']
    model_2.update(context, action,reward)

In [22]:
# pred_context = full_action_reward.iloc[-2][context_columns].values
# model_2.action(pred_context)

In [23]:
# Сохраните модель
model_2.save('model_2.pkl')