Skip to content

Commit 901ea35

Browse files
author
shixiaowen03
committed
rippleNet
1 parent 82b83b8 commit 901ea35

File tree

6 files changed

+642
-177
lines changed

6 files changed

+642
-177
lines changed

.idea/workspace.xml

Lines changed: 160 additions & 177 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import collections
2+
import os
3+
import numpy as np
4+
5+
def load_data(args):
6+
train_data,eval_data,test_data,user_history_dict = load_rating(args)
7+
n_entity,n_relation,kg = load_kg(args)
8+
ripple_set = get_ripple_set(args,kg,user_history_dict)
9+
return train_data,eval_data,test_data,n_entity,n_relation,ripple_set
10+
11+
def load_rating(args):
12+
print('reaing rating file ...')
13+
14+
rating_file = 'data/ratings_final.txt'
15+
rating_np = np.loadtxt(rating_file,dtype=np.int32)
16+
17+
18+
return dataset_split(rating_np)
19+
20+
21+
def dataset_split(rating_np):
22+
23+
print('splitint dataset ...')
24+
eval_ratio = 0.2
25+
test_ratio = 0.2
26+
27+
n_ratings = rating_np.shape[0]
28+
29+
eval_indices = np.random.choice(n_ratings,size = int(n_ratings * eval_ratio),replace=False)
30+
left = set(range(n_ratings)) - set(eval_indices)
31+
test_indices = np.random.choice(list(left),size = int(n_ratings * test_ratio),replace=False)
32+
train_indices = list(left - set(test_indices))
33+
34+
user_history_dict = dict()
35+
for i in train_indices:
36+
user = rating_np[i][0]
37+
item = rating_np[i][1]
38+
rating = rating_np[i][2]
39+
40+
if rating == 1:
41+
if user not in user_history_dict:
42+
user_history_dict[user] = []
43+
user_history_dict[user].append(item)
44+
45+
train_indices = [i for i in train_indices if rating_np[i][0] in user_history_dict]
46+
eval_indices = [i for i in eval_indices if rating_np[i][0] in user_history_dict]
47+
test_indices = [i for i in test_indices if rating_np[i][0] in user_history_dict]
48+
49+
train_data = rating_np[train_indices]
50+
eval_data = rating_np[eval_indices]
51+
test_data = rating_np[test_indices]
52+
53+
return train_data,eval_data,test_data,user_history_dict
54+
55+
def load_kg(args):
56+
print('reading KG file')
57+
kg_file = 'data/kg_final.txt'
58+
59+
kg_np = np.loadtxt(kg_file,dtype=np.int32)
60+
61+
n_entity = len(set(kg_np[:,0]) | set(kg_np[:,2]))
62+
n_relation = len(set(kg_np[:,1]))
63+
64+
kg = construct_kg(kg_np)
65+
66+
return n_entity,n_relation,kg
67+
68+
def construct_kg(kg_np):
69+
print('constructing knowledge graph ...')
70+
kg = collections.defaultdict(list)
71+
for head,relation,tail in kg_np:
72+
kg[head].append((tail,relation))
73+
return kg
74+
75+
76+
77+
def get_ripple_set(args,kg,user_history_dict):
78+
print('constructing ripple set')
79+
ripple_set = collections.defaultdict(list)
80+
81+
for user in user_history_dict:
82+
for h in range(args.n_hop):
83+
memories_h = []
84+
memories_r = []
85+
memories_t = []
86+
87+
if h == 0:
88+
tails_of_last_hop = user_history_dict[user]
89+
else:
90+
tails_of_last_hop = ripple_set[user][-1][2]
91+
92+
for entity in tails_of_last_hop:
93+
for tail_and_relation in kg[entity]:
94+
memories_h.append(entity)
95+
memories_r.append(tail_and_relation[1])
96+
memories_t.append(tail_and_relation[0])
97+
98+
99+
# if the current ripple set of the given user is empty, we simply copy the ripple set of the last hop here
100+
# this won't happen for h = 0, because only the items that appear in the KG have been selected
101+
# this only happens on 154 users in Book-Crossing dataset (since both BX dataset and the KG are sparse)
102+
if len(memories_h) == 0:
103+
ripple_set[user].append(ripple_set[user][-1])
104+
else:
105+
replace = len(memories_h) < args.n_memory
106+
indices = np.random.choice(len(memories_h),size = args.n_memory,replace= replace)
107+
memories_h = [memories_h[i] for i in indices]
108+
memories_r = [memories_r[i] for i in indices]
109+
memories_t = [memories_t[i] for i in indices]
110+
ripple_set[user].append((memories_h, memories_r, memories_t))
111+
112+
return ripple_set
113+
114+
115+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import argparse
2+
import numpy as np
3+
from data_loader import load_data
4+
from train import train
5+
6+
np.random.seed(555)
7+
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument('--dim', type=int, default=16, help='dimension of entity and relation embeddings')
10+
parser.add_argument('--n_hop', type=int, default=2, help='maximum hops')
11+
parser.add_argument('--kge_weight', type=float, default=0.01, help='weight of the KGE term')
12+
parser.add_argument('--l2_weight', type=float, default=1e-7, help='weight of the l2 regularization term')
13+
parser.add_argument('--lr', type=float, default=0.02, help='learning rate')
14+
parser.add_argument('--batch_size', type=int, default=1024, help='batch size')
15+
parser.add_argument('--n_epoch', type=int, default=10, help='the number of epochs')
16+
parser.add_argument('--n_memory', type=int, default=32, help='size of ripple set for each hop')
17+
parser.add_argument('--item_update_mode', type=str, default='plus_transform',
18+
help='how to update item at the end of each hop')
19+
parser.add_argument('--using_all_hops', type=bool, default=True,
20+
help='whether using outputs of all hops or just the last hop when making prediction')
21+
22+
'''
23+
# default settings for Book-Crossing
24+
parser = argparse.ArgumentParser()
25+
parser.add_argument('--dataset', type=str, default='book', help='which dataset to use')
26+
parser.add_argument('--dim', type=int, default=4, help='dimension of entity and relation embeddings')
27+
parser.add_argument('--n_hop', type=int, default=2, help='maximum hops')
28+
parser.add_argument('--kge_weight', type=float, default=1e-2, help='weight of the KGE term')
29+
parser.add_argument('--l2_weight', type=float, default=1e-5, help='weight of the l2 regularization term')
30+
parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
31+
parser.add_argument('--batch_size', type=int, default=1024, help='batch size')
32+
parser.add_argument('--n_epoch', type=int, default=10, help='the number of epochs')
33+
parser.add_argument('--n_memory', type=int, default=32, help='size of ripple set for each hop')
34+
parser.add_argument('--item_update_mode', type=str, default='plus_transform',
35+
help='how to update item at the end of each hop')
36+
parser.add_argument('--using_all_hops', type=bool, default=True,
37+
help='whether using outputs of all hops or just the last hop when making prediction')
38+
'''
39+
40+
args = parser.parse_args()
41+
42+
show_loss = False
43+
data_info = load_data(args)
44+
train(args, data_info, show_loss)
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
from sklearn.metrics import roc_auc_score
4+
5+
6+
class RippleNet(object):
7+
def __init__(self, args, n_entity, n_relation):
8+
self._parse_args(args, n_entity, n_relation)
9+
self._build_inputs()
10+
self._build_embeddings()
11+
self._build_model()
12+
self._build_loss()
13+
self._build_train()
14+
15+
def _parse_args(self, args, n_entity, n_relation):
16+
self.n_entity = n_entity
17+
self.n_relation = n_relation
18+
self.dim = args.dim
19+
self.n_hop = args.n_hop
20+
self.kge_weight = args.kge_weight
21+
self.l2_weight = args.l2_weight
22+
self.lr = args.lr
23+
self.n_memory = args.n_memory
24+
self.item_update_mode = args.item_update_mode
25+
self.using_all_hops = args.using_all_hops
26+
27+
def _build_inputs(self):
28+
self.items = tf.placeholder(dtype=tf.int32, shape=[None], name="items")
29+
self.labels = tf.placeholder(dtype=tf.float64, shape=[None], name="labels")
30+
self.memories_h = []
31+
self.memories_r = []
32+
self.memories_t = []
33+
34+
for hop in range(self.n_hop):
35+
self.memories_h.append(
36+
tf.placeholder(dtype=tf.int32, shape=[None, self.n_memory], name="memories_h_" + str(hop)))
37+
self.memories_r.append(
38+
tf.placeholder(dtype=tf.int32, shape=[None, self.n_memory], name="memories_r_" + str(hop)))
39+
self.memories_t.append(
40+
tf.placeholder(dtype=tf.int32, shape=[None, self.n_memory], name="memories_t_" + str(hop)))
41+
42+
def _build_embeddings(self):
43+
self.entity_emb_matrix = tf.get_variable(name="entity_emb_matrix", dtype=tf.float64,
44+
shape=[self.n_entity, self.dim],
45+
initializer=tf.contrib.layers.xavier_initializer())
46+
self.relation_emb_matrix = tf.get_variable(name="relation_emb_matrix", dtype=tf.float64,
47+
shape=[self.n_relation, self.dim, self.dim],
48+
initializer=tf.contrib.layers.xavier_initializer())
49+
50+
def _build_model(self):
51+
# transformation matrix for updating item embeddings at the end of each hop
52+
self.transform_matrix = tf.get_variable(name="transform_matrix", shape=[self.dim, self.dim], dtype=tf.float64,
53+
initializer=tf.contrib.layers.xavier_initializer())
54+
55+
# [batch size, dim]
56+
self.item_embeddings = tf.nn.embedding_lookup(self.entity_emb_matrix, self.items)
57+
58+
self.h_emb_list = []
59+
self.r_emb_list = []
60+
self.t_emb_list = []
61+
for i in range(self.n_hop):
62+
# [batch size, n_memory, dim]
63+
self.h_emb_list.append(tf.nn.embedding_lookup(self.entity_emb_matrix, self.memories_h[i]))
64+
65+
# [batch size, n_memory, dim, dim]
66+
self.r_emb_list.append(tf.nn.embedding_lookup(self.relation_emb_matrix, self.memories_r[i]))
67+
68+
# [batch size, n_memory, dim]
69+
self.t_emb_list.append(tf.nn.embedding_lookup(self.entity_emb_matrix, self.memories_t[i]))
70+
71+
o_list = self._key_addressing()
72+
73+
self.scores = tf.squeeze(self.predict(self.item_embeddings, o_list))
74+
self.scores_normalized = tf.sigmoid(self.scores)
75+
76+
def _key_addressing(self):
77+
o_list = []
78+
for hop in range(self.n_hop):
79+
# [batch_size, n_memory, dim, 1]
80+
h_expanded = tf.expand_dims(self.h_emb_list[hop], axis=3)
81+
# [batch_size, n_memory, dim]
82+
Rh = tf.squeeze(tf.matmul(self.r_emb_list[hop], h_expanded), axis=3)
83+
# [batch_size, dim, 1]
84+
v = tf.expand_dims(self.item_embeddings, axis=2)
85+
# [batch_size, n_memory]
86+
probs = tf.squeeze(tf.matmul(Rh, v), axis=2)
87+
# [batch_size, n_memory]
88+
probs_normalized = tf.nn.softmax(probs)
89+
# [batch_size, n_memory, 1]
90+
probs_expanded = tf.expand_dims(probs_normalized, axis=2)
91+
# [batch_size, dim]
92+
o = tf.reduce_sum(self.t_emb_list[hop] * probs_expanded, axis=1)
93+
94+
self.item_embeddings = self.update_item_embedding(self.item_embeddings, o)
95+
o_list.append(o)
96+
return o_list
97+
98+
def update_item_embedding(self, item_embeddings, o):
99+
if self.item_update_mode == "replace":
100+
item_embeddings = o
101+
elif self.item_update_mode == "plus":
102+
item_embeddings = item_embeddings + o
103+
elif self.item_update_mode == "replace_transform":
104+
item_embeddings = tf.matmul(o, self.transform_matrix)
105+
elif self.item_update_mode == "plus_transform":
106+
item_embeddings = tf.matmul(item_embeddings + o, self.transform_matrix)
107+
else:
108+
raise Exception("Unknown item updating mode: " + self.item_update_mode)
109+
return item_embeddings
110+
111+
def predict(self, item_embeddings, o_list):
112+
y = o_list[-1]
113+
if self.using_all_hops:
114+
for i in range(self.n_hop - 1):
115+
y += o_list[i]
116+
117+
# [batch_size]
118+
scores = tf.reduce_sum(item_embeddings * y, axis=1)
119+
return scores
120+
121+
def _build_loss(self):
122+
self.base_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.scores))
123+
124+
self.kge_loss = 0
125+
for hop in range(self.n_hop):
126+
h_expanded = tf.expand_dims(self.h_emb_list[hop], axis=2)
127+
t_expanded = tf.expand_dims(self.t_emb_list[hop], axis=3)
128+
hRt = tf.squeeze(tf.matmul(tf.matmul(h_expanded, self.r_emb_list[hop]), t_expanded))
129+
self.kge_loss += tf.reduce_mean(tf.sigmoid(hRt))
130+
self.kge_loss = -self.kge_weight * self.kge_loss
131+
132+
self.l2_loss = 0
133+
for hop in range(self.n_hop):
134+
self.l2_loss += tf.reduce_mean(tf.reduce_sum(self.h_emb_list[hop] * self.h_emb_list[hop]))
135+
self.l2_loss += tf.reduce_mean(tf.reduce_sum(self.t_emb_list[hop] * self.t_emb_list[hop]))
136+
self.l2_loss += tf.reduce_mean(tf.reduce_sum(self.r_emb_list[hop] * self.r_emb_list[hop]))
137+
if self.item_update_mode == "replace nonlinear" or self.item_update_mode == "plus nonlinear":
138+
self.l2_loss += tf.nn.l2_loss(self.transform_matrix)
139+
self.l2_loss = self.l2_weight * self.l2_loss
140+
141+
self.loss = self.base_loss + self.kge_loss + self.l2_loss
142+
143+
def _build_train(self):
144+
self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
145+
146+
def train(self, sess, feed_dict):
147+
return sess.run([self.optimizer, self.loss], feed_dict)
148+
149+
def eval(self, sess, feed_dict):
150+
labels, scores = sess.run([self.labels, self.scores_normalized], feed_dict)
151+
auc = roc_auc_score(y_true=labels, y_score=scores)
152+
predictions = [1 if i >= 0.5 else 0 for i in scores]
153+
acc = np.mean(np.equal(predictions, labels))
154+
return auc, acc

0 commit comments

Comments
 (0)