Created
March 19, 2013 03:33
-
-
Save dengshilong/5193515 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from operator import itemgetter, attrgetter | |
from collections import defaultdict | |
def UserSimilarity(train): | |
#build inverse table for item_users | |
print "build inverse table for item_users" | |
item_users = defaultdict(set) | |
for u,items in train.iteritems(): | |
for i in items.keys(): | |
item_users[i].add(u) | |
#calculate co-rated items between users | |
print 'calculate co-rated items between users' | |
C = defaultdict(dict) | |
N = defaultdict(int) | |
for i,users in item_users.iteritems(): | |
for u in users: | |
N[u] += 1 | |
for v in users: | |
if u == v: | |
continue | |
if v not in C[u]: | |
C[u][v] = 0 | |
C[u][v] += 1 | |
#calculate finial similarity matrix W | |
print 'calculate finial similarity matrix W' | |
W = defaultdict(dict) | |
for u,related_users in C.items(): | |
for v,cuv in related_users.items(): | |
W[u][v] = cuv / math.sqrt(N[u] * N[v]) | |
return W | |
def LoadRating(trainFile): | |
print 'loading rating' | |
train = defaultdict(dict) | |
with open(trainFile,'r') as f: | |
for line in f: | |
values = line.split('\t') | |
uid = int(values[0]) | |
iid = int(values[1]) | |
rating = int(values[2]) | |
train[uid][iid] = rating | |
return train |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment