# PersonalRank算法以及实现

5,195次阅读

$PR(j)= \begin{cases} a*\sum_{i}\frac{PR(i)}{|out(i)|},\quad if(j \ne u) \\ (1-a)+a*\sum_{I}\frac{PR(i)}{|out(i)|},\quad if(j =u) \end{cases}$
u是待推荐的用户节点。

personalrank经过多次的迭代游走，使得各节点的重要度趋于稳定，实际上我们根据状态转移矩阵，经过一次矩阵运算就可以直接得到系统的稳态。公式(1)的矩阵表示形式为：

$r=(1−α)r0+αM^T(2)$

$Mij= \begin{cases} frac{1}{|out(i)|},\quad if(j \in out(i)) \\ 0,\quad else \end{cases}$

$(I−αM^T)r=(1−α)r0$(4)
$r=(I−αM^T)−1(1−α)r0$(5)

$r=(I−αM^T)^{−1}r0$(6)

# -*- coding: utf-8 -*-
# @Time    : 2018/4/16 上午10:55
# @Author  : Tomcj
# @File    : main.py
# @Software: PyCharm
# coding=utf-8
import numpy as np
import time
from scipy.sparse.linalg import gmres, lgmres
from scipy.sparse import csr_matrix
import  os
from collections import  defaultdict
class personalRank:

def __init__(self,path):
self.curr_dir = path
self.alpha = 0.8
self.zaful_data = []
self.node_degree = defaultdict(set)
self.data_for_csc = []
self.data_for_csc_row = []
self.data_for_csc_col = []
self.zaful_len=0
self.zaful_distinct_data=[]
self.users=set()
self.goods=set()
# 获取所有的数据
def getData(self):

# 实验测试使用zaful站采样的10000条数据
with open(os.path.join(self.curr_dir),'r') as fp:
for line in fp:
data=line.strip().split("\t")
self.zaful_data.append([data[0],data[1]])
#构造csr或则csc系数矩阵
self.zaful_distinct_data=self.node_degree.keys()

self.zaful_len=len(self.zaful_distinct_data)
zaful_data_map={ self.zaful_distinct_data[x]:x for x in xrange(self.zaful_len)}

for user,goods_id in self.zaful_data:
indexUser=zaful_data_map[user]
indexGoods=zaful_data_map[goods_id]
self.data_for_csc.append(-self.alpha*1.0/len(self.node_degree[user]))
self.data_for_csc.append(-self.alpha*1.0 / len(self.node_degree[goods_id]))
self.data_for_csc_row.append(indexUser)
self.data_for_csc_col.append(indexGoods)
self.data_for_csc_row.append(indexGoods)
self.data_for_csc_col.append(indexUser)
for x in xrange(self.zaful_len):
self.data_for_csc.append(1)
self.data_for_csc_row.append(x)
self.data_for_csc_col.append(x)

# 预测所有数据，基于item
def predictAll(self):

f=open('result.txt','w')
AA = csr_matrix((self.data_for_csc, (self.data_for_csc_row, self.data_for_csc_col)), shape=(self.zaful_len, self.zaful_len))
# 暴力穷举解法
try :
for  xda in xrange(self.zaful_len):
if self.zaful_distinct_data[xda] in self.goods:
curr_goods=self.zaful_distinct_data[xda]
b = np.zeros((self.zaful_len, 1))
b[xda] = 1 * (1 - self.alpha)
r = gmres(AA, b, tol=1e-08, maxiter=1)[0]
# r = lgmres(AA, (1 - alpha) * r0, tol=1e-08,maxiter=1)[0]  #lgmres用来克服gmres有时候不收敛的问题，会在更少的迭代次数内收敛
rank = {}
for j in xrange(self.zaful_len):
rank[self.zaful_distinct_data[j]] = r[j]
li = (sorted(rank.items(), cmp=lambda x, y: cmp(x[1], y[1]), reverse=True))
ele_cnt=0

for ele in li:
if ele[0] in self.goods and ele[0]!=curr_goods:
ele_cnt+=1
f.write(self.zaful_distinct_data[xda]+','+ele[0]+'\n')
finally:
f.close()

if __name__ == '__main__':
begin = time.time()

pr=personalRank('path')
pr.getData()
pr.predictAll()
end = time.time()
print end-begin

2018-10-29 15:50:17 回复

Windows  Chrome  中国山东省青岛市移动