Skip to content

Commit 1443407

Browse files
author
shixiaowen03
committed
MKR
1 parent 901ea35 commit 1443407

File tree

9 files changed

+782
-256
lines changed

9 files changed

+782
-256
lines changed

.idea/workspace.xml

Lines changed: 237 additions & 254 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nlp/RNN_dynamic_cell.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def gen_epochs(n):
7272
b = tf.get_variable("b",[num_classes],initializer=tf.constant_initializer(0.0))
7373

7474

75-
logits = tf.reshape(tf.matmul(tf.reshape(rnn_outputs,[-1,batch_size]),W)+b,[batch_size,num_steps,num_classes])
75+
logits = tf.reshape(tf.matmul(tf.reshape(rnn_outputs,[-1,state_size]),W)+b,[batch_size,num_steps,num_classes])
7676
predictions = tf.nn.softmax(logits)
7777

7878
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,logits=predictions)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import numpy as np
2+
3+
def load_data(args):
4+
n_user,n_item,train_data,eval_data,test_data = load_rating(args)
5+
n_entity,n_relation,kg = load_kg(args)
6+
print('data loaded.')
7+
8+
return n_user,n_item,n_entity,n_relation,train_data,eval_data,test_data,kg
9+
10+
11+
def load_rating(args):
12+
print('reading rating file......')
13+
14+
rating_file = 'data/ratings_final.txt'
15+
rating_np = np.loadtxt(rating_file,dtype=np.int32)
16+
17+
n_user = len(set(rating_np[:,0]))
18+
n_item = len(set(rating_np[:,1]))
19+
20+
train_data,eval_data,test_data = dataset_split(rating_np)
21+
return n_user,n_item,train_data,eval_data,test_data
22+
23+
24+
def dataset_split(rating_np):
25+
print('spliting dataset ....')
26+
27+
eval_ratio = 0.2
28+
test_ratio = 0.2
29+
30+
n_ratings = rating_np.shape[0]
31+
32+
eval_indices = np.random.choice(list(range(n_ratings)),size = int(n_ratings * eval_ratio),replace=False)
33+
left = set(set(range(n_ratings))) - set(eval_indices)
34+
test_indices = np.random.choice(list(left),size = int(n_ratings * test_ratio),replace=False)
35+
train_indices = list(left - set(test_indices))
36+
37+
train_data = rating_np[train_indices]
38+
eval_data = rating_np[eval_indices]
39+
test_data = rating_np[test_indices]
40+
41+
return train_data,eval_data,test_data
42+
43+
def load_kg(args):
44+
print('reading KG file...')
45+
46+
kg_file = 'data/kg_final.txt'
47+
kg = np.loadtxt(kg_file,dtype=np.int32)
48+
n_entity = len(set(kg[:,0]) | set(kg[:,2]))
49+
n_relation = len(set(kg[:,1]))
50+
51+
return n_entity,n_relation,kg
52+
53+
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import tensorflow as tf
2+
from abc import abstractclassmethod
3+
4+
LAYER_IDS = {}
5+
6+
7+
def get_layer_id(layer_name=''):
8+
if layer_name not in LAYER_IDS:
9+
LAYER_IDS[layer_name] = 0
10+
return 0
11+
12+
else:
13+
LAYER_IDS[layer_name] += 1
14+
return LAYER_IDS[layer_name]
15+
16+
17+
class Layer(object):
18+
def __init__(self,name):
19+
if not name:
20+
layer = self.__class__.__name__.lower()
21+
name = layer + '_' + str(get_layer_id(layer))
22+
self.name = name
23+
self.vars = []
24+
25+
def __call__(self,inputs):
26+
outputs = self._call(inputs)
27+
return outputs
28+
29+
30+
@abstractclassmethod
31+
def _call(self,inputs):
32+
pass
33+
34+
35+
class Dense(Layer):
36+
def __init__(self,input_dim,output_dim,dropout=0.0,act = tf.nn.relu,name=None):
37+
super(Dense,self).__init__(name)
38+
self.input_dim = input_dim
39+
self.output_dim = output_dim
40+
self.dropout = dropout
41+
self.act =act
42+
with tf.variable_scope(self.name):
43+
self.weight = tf.get_variable(name='weight',shape=(input_dim,output_dim),dtype=tf.float32)
44+
self.bias = tf.get_variable(name='bias',shape=output_dim,initializer=tf.zeros_initializer())
45+
self.vars = [self.weight]
46+
47+
def _call(self,inputs):
48+
x = tf.nn.dropout(inputs,1-self.dropout)
49+
output = tf.matmul(x,self.weight) + self.bias
50+
return self.act(output)
51+
52+
53+
54+
class CrossCompressUnit(Layer):
55+
def __init__(self,dim,name=None):
56+
super(CrossCompressUnit,self).__init__(name)
57+
self.dim = dim
58+
with tf.variable_scope(self.name):
59+
self.weight_vv = tf.get_variable(name='weight_vv',shape=(dim,1),dtype=tf.float32)
60+
self.weight_ev = tf.get_variable(name='weight_ev',shape=(dim,1),dtype=tf.float32)
61+
self.weight_ve = tf.get_variable(name='weight_ve',shape=(dim,1),dtype=tf.float32)
62+
self.weight_ee = tf.get_variable(name='weight_ee',shape=(dim,1),dtype=tf.float32)
63+
64+
self.bias_v = tf.get_variable(name='bias_v',shape=dim,initializer=tf.zeros_initializer())
65+
self.bias_e = tf.get_variable(name='bias_e',shape=dim,initializer=tf.zeros_initializer())
66+
67+
self.vars = [self.weight_vv,self.weight_ev,self.weight_ve,self.weight_ee]
68+
69+
def _call(self,inputs):
70+
# [batch_size, dim]
71+
v,e = inputs
72+
73+
v = tf.expand_dims(v,dim=2)
74+
e = tf.expand_dims(e,dim=1)
75+
76+
77+
# [batch_size, dim, dim]
78+
c_matrix = tf.matmul(v, e)
79+
c_matrix_transpose = tf.transpose(c_matrix, perm=[0, 2, 1])
80+
81+
# [batch_size * dim, dim]
82+
c_matrix = tf.reshape(c_matrix, [-1, self.dim])
83+
c_matrix_transpose = tf.reshape(c_matrix_transpose, [-1, self.dim])
84+
85+
v_output = tf.reshape(tf.matmul(c_matrix,self.weight_vv) + tf.matmul(c_matrix_transpose,self.weight_ev),[-1,self.dim]) + self.bias_v
86+
87+
e_output = tf.reshape(tf.matmul(c_matrix, self.weight_ve) + tf.matmul(c_matrix_transpose, self.weight_ee),
88+
[-1, self.dim]) + self.bias_e
89+
90+
return v_output,e_output
91+
92+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import argparse
2+
import numpy as np
3+
from data_loader import load_data
4+
from train import train
5+
6+
np.random.seed(555)
7+
8+
9+
parser = argparse.ArgumentParser()
10+
11+
# movie
12+
parser.add_argument('--dataset', type=str, default='movie', help='which dataset to use')
13+
parser.add_argument('--n_epochs', type=int, default=20, help='the number of epochs')
14+
parser.add_argument('--dim', type=int, default=8, help='dimension of user and entity embeddings')
15+
parser.add_argument('--L', type=int, default=1, help='number of low layers')
16+
parser.add_argument('--H', type=int, default=1, help='number of high layers')
17+
parser.add_argument('--batch_size', type=int, default=4096, help='batch size')
18+
parser.add_argument('--l2_weight', type=float, default=1e-6, help='weight of l2 regularization')
19+
parser.add_argument('--lr_rs', type=float, default=0.02, help='learning rate of RS task')
20+
parser.add_argument('--lr_kge', type=float, default=0.01, help='learning rate of KGE task')
21+
parser.add_argument('--kge_interval', type=int, default=3, help='training interval of KGE task')
22+
23+
show_loss = False
24+
show_topk = False
25+
26+
args = parser.parse_args()
27+
data = load_data(args)
28+
train(args, data, show_loss, show_topk)
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
from sklearn.metrics import roc_auc_score
4+
from layers import Dense,CrossCompressUnit
5+
6+
7+
class MKR(object):
8+
def __init__(self,args,n_users,n_items,n_entities,n_relations):
9+
self._parse_args(n_users, n_items, n_entities, n_relations)
10+
self._build_inputs()
11+
self._build_model(args)
12+
self._build_loss(args)
13+
self._build_train(args)
14+
15+
def _parse_args(self, n_users, n_items, n_entities, n_relations):
16+
self.n_user = n_users
17+
self.n_item = n_items
18+
self.n_entity = n_entities
19+
self.n_relation = n_relations
20+
21+
# for computing l2 loss
22+
self.vars_rs = []
23+
self.vars_kge = []
24+
25+
def _build_inputs(self):
26+
self.user_indices = tf.placeholder(tf.int32,[None],'user_indices')
27+
self.item_indices = tf.placeholder(tf.int32,[None],'item_indices')
28+
self.labels = tf.placeholder(tf.float32,[None],'labels')
29+
self.head_indices = tf.placeholder(tf.int32,[None],'head_indices')
30+
self.tail_indices = tf.placeholder(tf.int32,[None],'tail_indices')
31+
self.relation_indices = tf.placeholder(tf.int32,[None],'relation_indices')
32+
33+
def _build_model(self,args):
34+
self._build_low_layers(args)
35+
self._build_high_layers(args)
36+
37+
38+
def _build_low_layers(self,args):
39+
self.user_emb_matrix = tf.get_variable('user_emb_matrix', [self.n_user, args.dim])
40+
self.item_emb_matrix = tf.get_variable('item_emb_matrix', [self.n_item, args.dim])
41+
self.entity_emb_matrix = tf.get_variable('entity_emb_matrix', [self.n_entity, args.dim])
42+
self.relation_emb_matrix = tf.get_variable('relation_emb_matrix', [self.n_relation, args.dim])
43+
44+
# [batch_size, dim]
45+
self.user_embeddings = tf.nn.embedding_lookup(self.user_emb_matrix, self.user_indices)
46+
self.item_embeddings = tf.nn.embedding_lookup(self.item_emb_matrix, self.item_indices)
47+
self.head_embeddings = tf.nn.embedding_lookup(self.entity_emb_matrix, self.head_indices)
48+
self.relation_embeddings = tf.nn.embedding_lookup(self.relation_emb_matrix, self.relation_indices)
49+
self.tail_embeddings = tf.nn.embedding_lookup(self.entity_emb_matrix, self.tail_indices)
50+
51+
for _ in range(args.L):
52+
user_mlp = Dense(input_dim=args.dim,output_dim=args.dim)
53+
tail_mlp = Dense(input_dim=args.dim,output_dim = args.dim)
54+
cc_unit = CrossCompressUnit(args.dim)
55+
56+
self.user_embeddings = user_mlp(self.user_embeddings)
57+
self.item_embeddings,self.head_embeddings = cc_unit([self.item_embeddings,self.head_embeddings])
58+
self.tail_embeddings = tail_mlp(self.tail_embeddings)
59+
60+
self.vars_rs.extend(user_mlp.vars)
61+
self.vars_rs.extend(cc_unit.vars)
62+
self.vars_kge.extend(tail_mlp.vars)
63+
self.vars_kge.extend(cc_unit.vars)
64+
65+
def _build_high_layers(self,args):
66+
#RS
67+
use_inner_product = True
68+
if use_inner_product:
69+
self.scores = tf.reduce_sum(self.user_embeddings*self.item_embeddings,axis=1)
70+
else:
71+
self.user_item_concat = tf.concat([self.user_embeddings,self.item_embeddings],axis=1)
72+
for _ in range(args.H - 1):
73+
rs_mlp = Dense(input_dim = args.dim * 2 , output_dim = args.dim * 2)
74+
self.user_item_concat = rs_mlp(self.user_item_concat)
75+
self.vars_rs.extend(rs_mlp.vars)
76+
77+
rs_pred_mlp = Dense(input_dim=args.dim * 2,output_dim=1)
78+
self.scores = tf.squeeze(rs_pred_mlp(self.user_item_concat))
79+
self.vars_rs.extend(rs_pred_mlp)
80+
81+
self.scores_normalized = tf.nn.sigmoid(self.scores)
82+
83+
#KGE
84+
self.head_relation_concat = tf.concat([self.head_embeddings,self.relation_embeddings],axis=1)
85+
for _ in range(args.H - 1):
86+
kge_mlp = Dense(input_dim=args.dim * 2,output_dim = args.dim * 2)
87+
self.head_relation_concat = kge_mlp(self.head_relation_concat)
88+
self.vars_kge.extend(kge_mlp.vars)
89+
90+
kge_pred_mlp = Dense(input_dim=args.dim * 2,output_dim = args.dim)
91+
self.tail_pred = kge_pred_mlp(self.head_relation_concat)
92+
self.vars_kge.extend(kge_pred_mlp.vars)
93+
self.tail_pred = tf.nn.sigmoid(self.tail_pred)
94+
95+
self.scores_kge = tf.nn.sigmoid(tf.reduce_sum(self.tail_embeddings * self.tail_pred,axis=1))
96+
self.rmse = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(self.tail_embeddings - self.tail_pred),axis=1) / args.dim))
97+
98+
def _build_loss(self, args):
99+
# RS
100+
self.base_loss_rs = tf.reduce_mean(
101+
tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.scores))
102+
self.l2_loss_rs = tf.nn.l2_loss(self.user_embeddings) + tf.nn.l2_loss(self.item_embeddings)
103+
for var in self.vars_rs:
104+
self.l2_loss_rs += tf.nn.l2_loss(var)
105+
self.loss_rs = self.base_loss_rs + self.l2_loss_rs * args.l2_weight
106+
107+
# KGE
108+
self.base_loss_kge = -self.scores_kge
109+
self.l2_loss_kge = tf.nn.l2_loss(self.head_embeddings) + tf.nn.l2_loss(self.tail_embeddings)
110+
for var in self.vars_kge:
111+
self.l2_loss_kge += tf.nn.l2_loss(var)
112+
self.loss_kge = self.base_loss_kge + self.l2_loss_kge * args.l2_weight
113+
114+
def _build_train(self, args):
115+
self.optimizer_rs = tf.train.AdamOptimizer(args.lr_rs).minimize(self.loss_rs)
116+
self.optimizer_kge = tf.train.AdamOptimizer(args.lr_kge).minimize(self.loss_kge)
117+
118+
def train_rs(self, sess, feed_dict):
119+
return sess.run([self.optimizer_rs, self.loss_rs], feed_dict)
120+
121+
def train_kge(self, sess, feed_dict):
122+
return sess.run([self.optimizer_kge, self.rmse], feed_dict)
123+
124+
def eval(self, sess, feed_dict):
125+
labels, scores = sess.run([self.labels, self.scores_normalized], feed_dict)
126+
auc = roc_auc_score(y_true=labels, y_score=scores)
127+
predictions = [1 if i >= 0.5 else 0 for i in scores]
128+
acc = np.mean(np.equal(predictions, labels))
129+
return auc, acc
130+
131+
def get_scores(self, sess, feed_dict):
132+
return sess.run([self.item_indices, self.scores_normalized], feed_dict)

0 commit comments

Comments
 (0)