├── .gitignore ├── LICENSE ├── README.md ├── pip-req.txt ├── sim ├── Anime-iterate.png ├── assembly.py ├── construct.py ├── construct_mask.py ├── extract.py ├── fetch.py ├── pretraining.py ├── pretraining_compliment.py ├── reconstruct.py ├── settings.py └── training.py └── www ├── app ├── __init__.py ├── connector.py ├── data.py ├── datacenter.py ├── dbcom.py ├── model.py ├── static │ ├── my.js │ └── style.css ├── templates │ ├── about.html │ ├── couple.html │ ├── index.html │ ├── layout.html │ ├── plz_favorite_request.html │ ├── similarity.html │ └── single.html ├── util.py └── views.py ├── config.py └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # Data 57 | *.dat 58 | *.sql 59 | *.db 60 | *.hdf5 61 | 62 | # Virtual environment 63 | ve/ 64 | 65 | #ipython 66 | *.ipy* 67 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Ronnie Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chi 2 | 3 | Future Bangumi Laboratory 4 | 5 | ## Development log 6 | 7 | 31/01/2015 Version 0.2 online. 8 | 08/02/2015 Switch from global-centered to user-centered. 9 | 10 | ## Wish list 11 | 12 | * Preferred item feedback 13 | * Categorized sync rate 14 | * Linear model refinement (please be specific) 15 | * Statistical diagram 16 | -------------------------------------------------------------------------------- /pip-req.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | Flask-Cache 3 | Flask-Cors 4 | Flask-SQLAlchemy 5 | pylibmc 6 | mysql 7 | requests 8 | scipy 9 | numpy 10 | h5py 11 | -------------------------------------------------------------------------------- /sim/Anime-iterate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wattlebird/Chi/9ee361cc9c7f79e85a9a88fffb2c6e632c92ebfb/sim/Anime-iterate.png -------------------------------------------------------------------------------- /sim/assembly.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | 3 | tps = ['anime','book','music','game','real'] 4 | 5 | bucket=dict() 6 | 7 | for tp in tps: 8 | fr = open('dat/training-'+tp+'.dat','rb') 9 | S = cPickle.load(fr) 10 | States = cPickle.load(fr) 11 | fr.close() 12 | Stateslil=States.tolil() 13 | 14 | fr = open('dat/temp-'+tp+'.dat','rb') 15 | U = cPickle.load(fr) 16 | Vt = cPickle.load(fr) 17 | Bu = cPickle.load(fr) 18 | Bi = cPickle.load(fr) 19 | fr.close() 20 | 21 | I,J = States.row, States.col 22 | for i in xrange(States.getnnz()): 23 | s = Stateslil[I[i],J[i]] 24 | if S[I[i],J[i]]==0: 25 | S[I[i],J[i]]=U[I[i],:].dot(Vt[:,J[i]])+Bu[I[i],s]+Bi[J[i],s] 26 | 27 | bucket[tp]=S 28 | 29 | fw = open('dat/assembly.dat','wb') 30 | cPickle.dump(bucket,fw) 31 | fw.close() 32 | -------------------------------------------------------------------------------- /sim/construct.py: -------------------------------------------------------------------------------- 1 | # Construct utiliary mtx 2 | import numpy as np 3 | from scipy import sparse 4 | from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo 5 | from sqlalchemy.sql.expression import func 6 | from settings import TYPE_LIST 7 | import pickle 8 | 9 | #tableState = {'do':1,'collect':2,'wish':3,'on_hold':4,'dropped':5} 10 | tableUI=dict() 11 | tableII=dict() 12 | userAvg=dict() 13 | 14 | 15 | 16 | for usr in session.query(UserInfo.name, UserInfo.index, UserInfo.average).filter(UserInfo.index != None).all(): 17 | tableUI[usr.name]=usr.index 18 | 19 | for rec in session.query(ItemInfo.i_index, ItemInfo.index).all(): 20 | tableII[rec.i_index]=rec.index 21 | 22 | #nUsers=session.query(UserInfo).filter(UserInfo.index!=None).count() 23 | nItms=session.query(ItemInfo).count() 24 | 25 | gp=dict() 26 | gp['bias_states']={} 27 | global_avg=session.query(func.avg(Record.rate).label('average')).filter(Record.rate!=None).scalar(); 28 | for q in session.query(Record.state, func.avg(Record.rate).label('average')).\ 29 | filter(Record.rate != None).group_by(Record.state): 30 | gp['bias_states'][q.state]=float(q.average)-float(global_avg) 31 | 32 | nFaved = session.query(Record).count() 33 | data = np.zeros(nFaved) 34 | i = np.zeros(nFaved) 35 | j = np.zeros(nFaved) 36 | idx=0 37 | for q in session.query(Record.name, Record.iid, Record.state, Record.rate): 38 | p = session.query(UserInfo.name, UserInfo.sd, UserInfo.ratecount, UserInfo.average).filter(UserInfo.name==q.name).first() 39 | 40 | try: 41 | if q.rate!=None and p.ratecount>3 and p.sd>0.1: 42 | data[idx]=q.rate-p.average 43 | else: 44 | data[idx]=gp['bias_states'][q.state] 45 | except AttributeError, e: 46 | print "Exception: for "+q.name+" we cannot find the user.\n" 47 | continue 48 | 49 | i[idx]=tableUI[q.name] 50 | j[idx]=tableII[q.iid] 51 | idx+=1 52 | gp['utiliary_mtx']=sparse.coo_matrix((data,(i,j))).tolil() 53 | 54 | fw = open('dat/mat.dat','wb') 55 | pickle.dump(gp,fw) 56 | pickle.dump(tableUI,fw) 57 | pickle.dump(tableII,fw) 58 | fw.close() -------------------------------------------------------------------------------- /sim/construct_mask.py: -------------------------------------------------------------------------------- 1 | from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo 2 | from sqlalchemy.sql.expression import func 3 | from scipy.sparse import csr_matrix 4 | import cPickle 5 | 6 | fr = open('dat/mat.dat','rb') 7 | cPickle.load(fr) 8 | tableUI = cPickle.load(fr) 9 | tableII = cPickle.load(fr) 10 | fr.close() 11 | 12 | imask=dict() 13 | umask=dict() 14 | 15 | # all item mask 16 | count = session.query(Record.iid).group_by(Record.iid).count() 17 | data = [True]*count 18 | rowidx = [0]*count 19 | colidx = range(count) 20 | imask['all']=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) 21 | 22 | # other item mask 23 | for itp in ['anime','book','music','game','real']: 24 | scount = session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).count() 25 | data = [True]*scount 26 | rowidx = [0]*scount 27 | colidx = [0]*scount 28 | i=0 29 | for q in session.query(Record.iid).filter(Record.typ==itp).group_by(Record.iid).all(): 30 | colidx[i]=tableII[q.iid] 31 | i+=1 32 | imask[itp]=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) 33 | 34 | # all uses mask 35 | count = session.query(Record.name).group_by(Record.name).count() 36 | data = [True]*count 37 | rowidx = [0]*count 38 | colidx = range(count) 39 | umask['all']=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) 40 | umask[0] = umask['all'] 41 | 42 | # users mask, of different types 43 | for itp in ['anime','book','music','game','real']: 44 | scount = session.query(Record.name).filter(Record.typ==itp).group_by(Record.name).count() 45 | data = [True]*scount 46 | rowidx = [0]*scount 47 | colidx = [0]*scount 48 | i=0 49 | for q in session.query(Record.name).filter(Record.typ==itp).group_by(Record.name).all(): 50 | colidx[i]=tableUI[q.name] 51 | i+=1 52 | umask[itp]=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) 53 | 54 | # usermask, by active level 55 | j=1 56 | for m in ['2014-12-15','2014-07-15','2014-01-15']: 57 | scount = session.query(Record.name).filter(Record.adddate>=m).group_by(Record.name).count() 58 | data = [True]*scount 59 | rowidx = [0]*scount 60 | colidx = [0]*scount 61 | i=0 62 | for q in session.query(Record.name).filter(Record.adddate>=m).group_by(Record.name).all(): 63 | colidx[i]=tableUI[q.name] 64 | i+=1 65 | umask[j]=csr_matrix((data,(rowidx,colidx)),dtype='b',shape=(1,count)) 66 | j+=1 67 | 68 | fw = open('dat/db.dat','wb') 69 | cPickle.dump(imask,fw) 70 | cPickle.dump(umask,fw) 71 | fw.close() -------------------------------------------------------------------------------- /sim/extract.py: -------------------------------------------------------------------------------- 1 | # Extract UserInfo and ItemInfo 2 | from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo 3 | from sqlalchemy.sql.expression import func 4 | import numpy as np 5 | 6 | cnt=0; 7 | for usr in session.query(Users.name).order_by(Users.uid).all(): 8 | count = session.query(Record).filter(Record.name==usr.name).count() 9 | ratecount = session.query(Record).filter(Record.name==usr.name, Record.rate != None).count() 10 | average = session.query(func.avg(Record.rate).label('average')).\ 11 | filter(Record.name==usr.name, Record.rate != None).scalar(); 12 | temp = []; 13 | for q in session.query(Record.rate).filter(Record.name==usr.name, Record.rate != None): 14 | temp.append(q.rate) 15 | sd = np.std(temp) 16 | if count>0: 17 | if ratecount>0: 18 | itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount, \ 19 | average = average, sd=sd) 20 | else: 21 | itm = UserInfo(name=usr.name, index=cnt, count=count, ratecount=ratecount) 22 | session.add(itm) 23 | cnt+=1 24 | else: 25 | itm = UserInfo(name=usr.name, count=0, ratecount=0) 26 | session.add(itm) 27 | session.commit() 28 | nUsers=cnt+1 29 | 30 | cnt=0; 31 | for rec in session.query(Record.iid).group_by(Record.iid).order_by(Record.iid).all(): 32 | itm = ItemInfo(i_index=rec.iid, index=cnt) 33 | session.add(itm) 34 | cnt+=1 35 | session.commit() 36 | nItms = cnt+1 37 | 38 | # After that, you can check your database, and try to index some columns -------------------------------------------------------------------------------- /sim/fetch.py: -------------------------------------------------------------------------------- 1 | from settings import * 2 | from sqlalchemy import create_engine, Column, Integer, String, Float, Date, Text, ForeignKey 3 | from sqlalchemy.ext.declarative import declarative_base 4 | from sqlalchemy.orm import sessionmaker, relationship, backref 5 | from sqlalchemy.sql.expression import func 6 | 7 | engine = create_engine('mysql+mysqldb://%s:%s@%s/%s?charset=utf8&use_unicode=0'%(MYSQL_USER,MYSQL_PASSWD,MYSQL_HOST,MYSQL_DBNAME)) 8 | 9 | Base = declarative_base() 10 | 11 | class Users(Base): 12 | __tablename__= 'users' 13 | 14 | uid = Column(Integer,nullable=False) 15 | name = Column(String(30),primary_key=True,index=True) 16 | joindate = Column(Date,nullable=False) 17 | 18 | def __repr__(self): 19 | return "" % ( 20 | self.uid, self.name, self.joindate) 21 | 22 | class Record(Base): 23 | __tablename__ = 'record' 24 | 25 | #name = Column(String(100),primary_key=True,ForeignKey('users.name')) 26 | name = Column(String(30),primary_key=True,index=True) 27 | typ = Column(String(5),primary_key=True) 28 | iid = Column(Integer,primary_key=True) 29 | state = Column(String(7),nullable=False) 30 | adddate = Column(Date,nullable=False) 31 | rate = Column(Integer) 32 | comment = Column(String(401)) 33 | tags = Column(String(401)) 34 | 35 | #user = relationship("Users",backref=backref("record")) 36 | 37 | def __repr__(self): 38 | return "" % ( 39 | self.name, self.iid) 40 | 41 | class UserInfo(Base): 42 | __tablename__='userinfo' 43 | 44 | name = Column(String(30), primary_key=True) 45 | index = Column(Integer) 46 | count = Column(Integer) 47 | ratecount = Column(Integer) 48 | average = Column(Float) 49 | sd = Column(Float) 50 | 51 | def __repr__(self): 52 | return "" % ( 53 | self.name) 54 | 55 | class ItemInfo(Base): 56 | __tablename__='iteminfo' 57 | i_index = Column(Integer, primary_key=True) 58 | index = Column(Integer) 59 | 60 | def __repr__(self): 61 | return "" % ( 62 | self.i_index) 63 | 64 | #UserInfo.__table__.drop(engine, checkfirst=True) 65 | #ItemInfo.__table__.drop(engine, checkfirst=True) 66 | Base.metadata.create_all(engine) 67 | Session = sessionmaker(bind=engine) 68 | session = Session() 69 | 70 | if __name__=='__main__': 71 | for q in session.query(Record.name, Record.iid, Record.rate, Record.state).filter(Record.rate != None).limit(100).all(): 72 | print q.rate -------------------------------------------------------------------------------- /sim/pretraining.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | from numpy import array 3 | from scipy.sparse import coo_matrix 4 | from fetch import session, Base, engine, Users, Record, UserInfo, ItemInfo 5 | from random import seed, random, shuffle 6 | import numpy as np 7 | import h5py 8 | from scipy.sparse.linalg import svds 9 | 10 | fr = open('dat/db.dat','rb') 11 | imask = cPickle.load(fr) 12 | umask = cPickle.load(fr) 13 | fr.close() 14 | fr = open('dat/mat.dat','rb') 15 | cPickle.load(fr) 16 | tableUI = cPickle.load(fr) 17 | tableII = cPickle.load(fr) 18 | fr.close() 19 | 20 | seed() 21 | 22 | ### Phase of selecting an item type 23 | 24 | tp = 'real' 25 | 26 | states = ["wish","do","collect","on_hold","dropped","all","states"]; 27 | 28 | M = session.query(Record.name).filter(Record.typ==tp).group_by(Record.name).count() 29 | N = session.query(Record.iid).filter(Record.typ==tp).group_by(Record.iid).count() 30 | 31 | c = session.query(Record).filter(Record.typ==tp, Record.rate != None).count() 32 | irow=dict() 33 | icol=dict() 34 | data=dict() 35 | for s in states: 36 | irow[s]=[] 37 | icol[s]=[] 38 | data[s]=[] 39 | 40 | for q in session.query(Record.name, Record.iid, Record.rate, Record.state).filter(Record.typ==tp, Record.rate != None).all(): 41 | i = umask[tp][:,:tableUI[q.name]+1].sum()-1 42 | j = imask[tp][:,:tableII[q.iid]+1].sum()-1 43 | irow['all'].append(i) 44 | icol['all'].append(j) 45 | data['all'].append(float(q.rate)) 46 | irow[q.state].append(i) 47 | icol[q.state].append(j) 48 | data[q.state].append(True) 49 | 50 | S = coo_matrix((data['all'],(irow['all'],icol['all'])),dtype='f',shape=(M,N)) 51 | sm=dict() 52 | for i in xrange(5): 53 | sm[states[i]]=coo_matrix((data[states[i]],(irow[states[i]],icol[states[i]])),dtype='b',shape=(M,N)) 54 | 55 | d = {"wish":0,"do":1,"collect":2,"on_hold":3,"dropped":4} 56 | for q in session.query(Record.name, Record.iid, Record.state).filter(Record.typ==tp).all(): 57 | i = umask[tp][:,:tableUI[q.name]+1].sum()-1 58 | j = imask[tp][:,:tableII[q.iid]+1].sum()-1 59 | irow['states'].append(i) 60 | icol['states'].append(j) 61 | data['states'].append(d[q.state]) 62 | 63 | States = coo_matrix((data['states'],(irow['states'],icol['states'])),dtype='i',shape=(M,N)) 64 | 65 | #t = range(S.data.shape[0]) 66 | #shuffle(t) 67 | #tm=dict() 68 | #irow['train']=np.array(irow['all'])[t[:int(len(t)*0.7)]] 69 | #icol['train']=np.array(icol['all'])[t[:int(len(t)*0.7)]] 70 | #irow['validate']=np.array(irow['all'])[t[int(len(t)*0.7):int(len(t)*0.9)]] 71 | #icol['validate']=np.array(icol['all'])[t[int(len(t)*0.7):int(len(t)*0.9)]] 72 | #irow['test']=np.array(irow['all'])[t[int(len(t)*0.9):]] 73 | #icol['test']=np.array(icol['all'])[t[int(len(t)*0.9):]] 74 | #tm['train']=coo_matrix(([True]*irow['train'].shape[0],(irow['train'],icol['train'])),dtype='b',shape=(M,N)) 75 | #tm['validate']=coo_matrix(([True]*irow['validate'].shape[0],(irow['validate'],icol['validate'])),dtype='b',shape=(M,N)) 76 | #tm['test']=coo_matrix(([True]*irow['test'].shape[0],(irow['test'],icol['test'])),dtype='b',shape=(M,N)) 77 | 78 | #U=S.multiply(tm['train']).tolil() 79 | U=S.tolil() 80 | U_sum = U.sum(axis=1) 81 | U_cnt = U.getnnz(axis=1) 82 | 83 | for i in xrange(M): 84 | for j in U[i].rows[0]: 85 | if U_cnt[i]: 86 | U[i,j]-=U_sum[i,0]/U_cnt[i] 87 | 88 | #Uvalidate = S.multiply(tm['validate']).tolil() 89 | #U2_sum = Uvalidate.sum(axis=1) 90 | #U2_cnt = Uvalidate.getnnz(axis=1) 91 | #for i in xrange(M): 92 | # for j in Uvalidate[i].rows[0]: 93 | # if U_cnt[i]: 94 | # Uvalidate[i,j]-=U_sum[i,0]/U_cnt[i] 95 | # elif U2_cnt[i]: 96 | # Uvalidate[i,j]-=U2_sum[i,0]/U2_cnt[i] 97 | 98 | #Utest = S.multiply(tm['test']).tolil() 99 | #U2_sum = Utest.sum(axis=1) 100 | #U2_cnt = Utest.getnnz(axis=1) 101 | #for i in xrange(M): 102 | # for j in Utest[i].rows[0]: 103 | # if U_cnt[i]: 104 | # Utest[i,j]-=U_sum[i,0]/U_cnt[i] 105 | # elif U2_cnt[i]: 106 | # Utest[i,j]-=U2_sum[i,0]/U2_cnt[i] 107 | 108 | Bu = np.zeros((M,5)) 109 | Bi = np.zeros((N,5)) 110 | 111 | for i in xrange(5): 112 | U_temp = U.multiply(sm[states[i]]) 113 | U_sum = U_temp.sum(axis=1) 114 | U_cnt = U_temp.getnnz(axis=1) 115 | for j in xrange(M): 116 | if U_cnt[j]: 117 | Bu[j,i]=U_sum[j,0]/U_cnt[j] 118 | 119 | for i in xrange(5): 120 | U_temp = U.multiply(sm[states[i]]) 121 | U_sum = U_temp.sum(axis=0) 122 | U_cnt = U_temp.getnnz(axis=0) 123 | for j in xrange(N): 124 | if U_cnt[j]: 125 | Bi[j,i]=U_sum[0,j]/U_cnt[j] 126 | 127 | u,s,vt = svds(U,k=400) 128 | 129 | #f = h5py.File("data.hdf5","w") 130 | #f.create_dataset("Buser",(M,5),'f') 131 | #f['Buser'][:] = Bu 132 | #f.create_dataset("Bitem",(N,5),'f') 133 | #f['Bitem'][:] = Bi 134 | 135 | #u,s,vt = svds(U,k=500) 136 | #f.create_dataset("u",(M,500),'f') 137 | #f['u'][:]=u 138 | #f.create_dataset("vt",(500,N),'f') 139 | #f['vt'][:]=vt 140 | 141 | 142 | fw = open('dat/training-'+tp+'.dat','wb') 143 | #cPickle.dump(S,fw) 144 | #cPickle.dump(sm,fw) 145 | #cPickle.dump(tm,fw) 146 | cPickle.dump(U,fw) 147 | #cPickle.dump(Uvalidate,fw) 148 | #cPickle.dump(Utest,fw) 149 | cPickle.dump(States,fw) 150 | cPickle.dump(Bu,fw) 151 | cPickle.dump(Bi,fw) 152 | cPickle.dump(u,fw) 153 | cPickle.dump(vt,fw) 154 | fw.close() 155 | 156 | 157 | -------------------------------------------------------------------------------- /sim/pretraining_compliment.py: -------------------------------------------------------------------------------- 1 | from fetch import session, Record 2 | from scipy.sparse import coo_matrix 3 | import cPickle 4 | 5 | fr = open('dat/mat.dat','rb') 6 | cPickle.load(fr) 7 | tableUI = cPickle.load(fr) 8 | tableII = cPickle.load(fr) 9 | fr.close() 10 | 11 | M = session.query(Record.name).group_by(Record.name).count() 12 | N = session.query(Record.iid).group_by(Record.iid).count() 13 | 14 | irow=[] 15 | icol=[] 16 | data=[] 17 | 18 | d = {"wish":0,"do":1,"collect":2,"on_hold":3,"dropped":4} 19 | 20 | for q in session.query(Record.name, Record.iid, Record.state).all(): 21 | irow.append(tableUI[q.name]) 22 | icol.append(tableII[q.iid]) 23 | data.append(d[q.state]) 24 | 25 | S = coo_matrix((data,(irow,icol)),dtype='i',shape=(M,N)) 26 | 27 | fw = open('dat/training.dat','ab') 28 | cPickle.dump(S.tolil(),fw) 29 | fw.close() 30 | 31 | -------------------------------------------------------------------------------- /sim/reconstruct.py: -------------------------------------------------------------------------------- 1 | from fetch import session, Record 2 | from scipy.sparse import coo_matrix 3 | import cPickle 4 | 5 | fr = open('dat/mat.dat','rb') 6 | cPickle.load(fr) 7 | tableUI = cPickle.load(fr) 8 | tableII = cPickle.load(fr) 9 | fr.close() 10 | 11 | fr = open('dat/db.dat','rb') 12 | imask = cPickle.load(fr) 13 | umask = cPickle.load(fr) 14 | fr.close() 15 | 16 | fr = open('dat/assembly.dat','rb') 17 | bucket = cPickle.load(fr) 18 | fr.close() 19 | 20 | M = session.query(Record.name).group_by(Record.name).count() 21 | N = session.query(Record.iid).group_by(Record.iid).count() 22 | 23 | irow=[] 24 | icol=[] 25 | data=[] 26 | 27 | for q in session.query(Record.name, Record.iid, Record.typ).all(): 28 | tp = q.typ 29 | i = umask[tp][:,:tableUI[q.name]+1].sum()-1 30 | j = imask[tp][:,:tableII[q.iid]+1].sum()-1 31 | 32 | irow.append(tableUI[q.name]) 33 | icol.append(tableII[q.iid]) 34 | data.append(bucket[tp][i,j]) 35 | 36 | U = coo_matrix((data,(irow,icol)),shape=(M,N)) 37 | U = U.tocsr() 38 | U2 = U.multiply(U) 39 | 40 | fw = open('dat/a.dat','wb') 41 | cPickle.dump(U,fw) 42 | cPickle.dump(U2,fw) 43 | fw.close() -------------------------------------------------------------------------------- /sim/settings.py: -------------------------------------------------------------------------------- 1 | MYSQL_HOST = 'localhost' 2 | MYSQL_DBNAME = 'bgm-info' 3 | MYSQL_USER = 'bgmer' 4 | MYSQL_PASSWD = 'sai' 5 | 6 | TYPE_LIST=['anime','music','game','book','real'] 7 | STATE_LIST=['do','collect','wish','on_hold','dropped'] 8 | L = 0.1 9 | R = 2 -------------------------------------------------------------------------------- /sim/training.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import numpy as np 3 | import h5py 4 | from numpy.linalg import norm 5 | from numpy import sqrt 6 | import multiprocessing 7 | import time 8 | 9 | tp='real' 10 | 11 | #fr = open('dat/db.dat','rb') 12 | #imask = cPickle.load(fr) 13 | #umask = cPickle.load(fr) 14 | #fr.close() 15 | #fr = open('dat/training.dat','rb') 16 | #cPickle.load(fr) 17 | #sm = cPickle.load(fr) 18 | #tm = cPickle.load(fr) 19 | #fr.close() 20 | fr = open('dat/training-'+tp+'.dat','rb') 21 | S = cPickle.load(fr) 22 | #St = cPickle.load(fr) 23 | #Sv = cPickle.load(fr) 24 | States = cPickle.load(fr) 25 | Bu = cPickle.load(fr) 26 | Bi = cPickle.load(fr) 27 | U = cPickle.load(fr) 28 | Vt = cPickle.load(fr) 29 | fr.close() 30 | #fr = open('dat/temp-'+tp+'.dat','rb') 31 | #U = cPickle.load(fr) 32 | #Vt = cPickle.load(fr) 33 | #Bu = cPickle.load(fr) 34 | #Bi = cPickle.load(fr) 35 | #fr.close() 36 | States=States.tolil() 37 | #f = h5py.File("data.hdf5","r") 38 | #U = f['u'] 39 | #Vt = f['vt'] 40 | #Bu = f['Buser'] 41 | #Bi = f['Bitem'] 42 | 43 | def evaluate(S): 44 | """ 45 | S: M x N lil sparse matrix 46 | U: M x 500 users vectors 47 | Vt 500 x N items vectors 48 | Bu: M x 5 users states bias 49 | Bi: N x 5 items states bias 50 | """ 51 | 52 | I,J = S.nonzero() 53 | L = S.getnnz() 54 | rtn = 0 55 | for i in xrange(L): 56 | s = States[I[i],J[i]] 57 | rtn+=(S[I[i],J[i]]-U[I[i],:].dot(Vt[:,J[i]])-Bu[I[i],s]-Bi[J[i],s])**2 58 | return sqrt(rtn/L) 59 | 60 | def derivative1(S): 61 | dU = np.zeros((S.shape[0],400)) 62 | dVt = np.zeros((400, S.shape[1])) 63 | I,J = S.nonzero() 64 | L = S.getnnz() 65 | for i in xrange(L): 66 | s = States[I[i],J[i]] 67 | dU[I[i],:]+=-2*(S[I[i],J[i]]-U[I[i],:].dot(Vt[:,J[i]])-Bu[I[i],s]-Bi[J[i],s])*Vt[:,J[i]].T 68 | dVt[:,J[i]]+=-2*(S[I[i],J[i]]-U[I[i],:].dot(Vt[:,J[i]])-Bu[I[i],s]-Bi[J[i],s])*U[I[i],:].T 69 | dU+=U 70 | dVt+=Vt 71 | return (dU ,dVt) 72 | 73 | def derivative2(S): 74 | dBu = np.zeros((S.shape[0],5)) 75 | dBi = np.zeros((S.shape[1],5)) 76 | I,J = S.nonzero() 77 | L = S.getnnz() 78 | for i in xrange(L): 79 | s = States[I[i],J[i]] 80 | dBu[I[i],s]+=-2*(S[I[i],J[i]]-U[I[i],:].dot(Vt[:,J[i]])-Bu[I[i],s]-Bi[J[i],s]) 81 | dBi[J[i],s]+=-2*(S[I[i],J[i]]-U[I[i],:].dot(Vt[:,J[i]])-Bu[I[i],s]-Bi[J[i],s]) 82 | dBu+=Bu 83 | dBi+=Bi 84 | return (dBu, dBi) 85 | 86 | 87 | def worker1(d,S): 88 | dU ,dVt = derivative1(S) 89 | d['dU']=dU 90 | d['dVt']=dVt 91 | 92 | def worker2(d,S): 93 | print evaluate(S) 94 | dBu, dBi = derivative2(S) 95 | d['dBu'] = dBu 96 | d['dBi'] = dBi 97 | 98 | 99 | if __name__=='__main__': 100 | try: 101 | for i in xrange(70): 102 | mgr = multiprocessing.Manager() 103 | d = mgr.dict() 104 | p1 = multiprocessing.Process(target=worker1, args=(d,S)) 105 | p2 = multiprocessing.Process(target=worker2, args=(d,S)) 106 | 107 | p1.start() 108 | p2.start() 109 | p1.join() 110 | p2.join() 111 | U-=0.0002*d['dU'] 112 | Vt-=0.0002*d['dVt'] 113 | Bu-=0.0002*d['dBu'] 114 | Bi-=0.0002*d['dBi'] 115 | finally: 116 | fw = open('dat/temp-'+tp+'.dat','wb') 117 | cPickle.dump(U,fw) 118 | cPickle.dump(Vt,fw) 119 | cPickle.dump(Bu,fw) 120 | cPickle.dump(Bi,fw) 121 | fw.close() -------------------------------------------------------------------------------- /www/app/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import Flask 3 | from flask.ext.sqlalchemy import SQLAlchemy 4 | from flask.ext.cors import CORS 5 | from flask.ext.cache import Cache 6 | from config import * 7 | 8 | app = Flask(__name__) 9 | app.config.from_object('config') 10 | app.config['SQLALCHEMY_DATABASE_URI']='mysql+mysqldb://%s:%s@%s/%s?charset=utf8&use_unicode=0'%(MYSQL_USER,MYSQL_PASSWD,MYSQL_HOST,MYSQL_DBNAME) 11 | app.secret_key=os.urandom(24) 12 | app.config['CORS_HEADERS'] = 'Content-Type' 13 | app.config['CORS_RESOURCES'] = {r"/similarity/*": {"origins": r"http://api.bgm.tv/*"}} 14 | 15 | cache = Cache(app,config={'CACHE_TYPE': 'memcached', 'CACHE_MEMCACHED_SERVERS': ['127.0.0.1:11211']}) 16 | 17 | cors = CORS(app) 18 | 19 | db = SQLAlchemy(app) 20 | db.create_all() 21 | 22 | from app import views,model -------------------------------------------------------------------------------- /www/app/connector.py: -------------------------------------------------------------------------------- 1 | from dbcom import Connector 2 | from datacenter import DataCenter 3 | 4 | class Controller: 5 | """Interface to datacenter.""" 6 | def __init__(self): 7 | self.com = Connector() 8 | self.data = DataCenter() 9 | 10 | def UserExist(self, username): 11 | """True if user registered before 15/01/2015""" 12 | return self.com.CheckUserexists(username) 13 | 14 | def UserRecords(self, username): 15 | """True if user faved at least one item.""" 16 | return not self.com.CheckUid(username) is None 17 | 18 | def GetTopRank(self, username, typ, acl): 19 | uid = self.com.CheckUid(username); 20 | maskItem = self.com.GenerateItemMask(typ); 21 | maskUser = self.com.GenerateUserMask(typ, acl=acl); 22 | lst = self.data.TopRank(uid, maskItem, maskUser) 23 | 24 | rtn = []; 25 | for x in lst: 26 | un = self.com.CheckUsername(x[0]) 27 | unn = self.com.CheckNickname(un) 28 | pc = DataCenter.Normalize(x[1]) 29 | rtn.append([un, unn, pc]) 30 | return rtn 31 | 32 | def GetCouple(self, ua, ub, typ): 33 | """ 34 | return(NicknameA, NicknameB, Similarity_in_percent, rankA, rankB) 35 | """ 36 | una = self.com.CheckNickname(ua); 37 | unb = self.com.CheckNickname(ub); 38 | uida = self.com.CheckUid(ua); 39 | uidb = self.com.CheckUid(ub); 40 | maskItem = self.com.GenerateItemMask(typ); 41 | maskUser = self.com.GenerateUserMask(typ); 42 | sim = self.data.GetSimilarity(uida, uidb, maskItem) 43 | pr = DataCenter.Normalize(sim) 44 | ra = self.data.GetRankOf(uida, uidb, sim, maskItem, maskUser) # From A's view, B's rank 45 | rb = self.data.GetRankOf(uidb, uida, sim, maskItem, maskUser) # From B's view, A's rank 46 | return (una, unb, pr, ra, rb) 47 | 48 | def GetFeedback(self, ua, ub, typ): 49 | uida = self.com.CheckUid(ua); 50 | uidb = self.com.CheckUid(ub); 51 | maskItem = self.com.GenerateItemMask(typ); 52 | maskUser = self.com.GenerateUserMask(typ); 53 | lst = self.data.GetPosItem(uida, uidb, maskItem, maskUser) 54 | rtn = []; 55 | for x in lst: 56 | iid = self.com.CheckItemid(x) 57 | iname = self.com.CheckItemName(iid) 58 | rtn.append([iid,iname]) 59 | return rtn; 60 | 61 | def GetNegFeedback(self, ua, ub, typ): 62 | uida = self.com.CheckUid(ua); 63 | uidb = self.com.CheckUid(ub); 64 | maskItem = self.com.GenerateItemMask(typ); 65 | maskUser = self.com.GenerateUserMask(typ); 66 | lst = self.data.GetNegItem(uida, uidb, maskItem, maskUser) 67 | rtn = []; 68 | for x in lst: 69 | iid = self.com.CheckItemid(x) 70 | iname = self.com.CheckItemName(iid) 71 | rtn.append([iid,iname]) 72 | return rtn; 73 | 74 | def GetCount(self, typ): 75 | return self.com.GetTypeCount(typ) 76 | 77 | def GetUsernickname(self, username): 78 | return self.com.CheckNickname(username) 79 | -------------------------------------------------------------------------------- /www/app/data.py: -------------------------------------------------------------------------------- 1 | from app import db, cache 2 | from model import UserInfo 3 | import pickle 4 | from random import seed, randint 5 | from heapq import nlargest 6 | 7 | seed() 8 | cache.clear() 9 | 10 | fr = open('dat/a.dat','rb') 11 | U = pickle.load(fr) # a user_num x 100 mat 12 | unorm = pickle.load(fr) 13 | fr.close() 14 | 15 | #for i in xrange(len(unorm)): 16 | # unorm[i]+=1 17 | 18 | class DUser: 19 | def __init__(self, id, sim): 20 | self.id=id 21 | self.sim=sim 22 | 23 | def __lt__(self, other): 24 | return self.simother.sim 34 | 35 | def __gt__(self, other): 36 | return self.sim>other.sim 37 | 38 | def __ge__(self, other): 39 | return self.sim>=other.sim 40 | 41 | @cache.memoize(timeout=600) 42 | def qualified(db, username): 43 | q=UserInfo.query.filter_by(name=username).first() 44 | # q=db.session.query(UserInfo.name, UserInfo.count).filter(UserInfo.name=username).first() 45 | if q and q.count: 46 | return 1 47 | elif q: 48 | return 0 49 | else: 50 | return -1 51 | 52 | @cache.memoize(timeout=600) 53 | def similarlist(db, username): 54 | q=UserInfo.query.filter_by(name=username).first() 55 | simv=U.dot(U[q.index,:].T).toarray() 56 | qlist=[] 57 | for i in xrange(U.shape[0]): 58 | qlist.append(DUser(id=i, 59 | sim=simv[i][0]/(unorm[q.index]*unorm[i]))) 60 | slist=nlargest(11,qlist) 61 | rlist=[] 62 | for i in xrange(1,11): 63 | q=UserInfo.query.filter_by(index=slist[i].id).first() 64 | rlist.append((q.name,round(_normalize(slist[i].sim),4))) 65 | return rlist 66 | 67 | @cache.memoize(timeout=600) 68 | def getsim(db, username, candidate): 69 | q=UserInfo.query.filter_by(name=username).first() 70 | u=U[q.index,:] 71 | p=UserInfo.query.filter_by(name=candidate).first() 72 | v=U[p.index,:] 73 | return round(_normalize(u.dot(v.T).toarray()[0][0]/(unorm[q.index]*unorm[p.index])),4) 74 | 75 | @cache.memoize(timeout=600) 76 | def getrank(db, username, candidate): 77 | q=UserInfo.query.filter_by(name=username).first() 78 | simv=U.dot(U[q.index,:].T).toarray() 79 | p=UserInfo.query.filter_by(name=candidate).first() 80 | cnt=0 81 | candidatesim = simv[p.index][0]/(unorm[q.index]*unorm[p.index]) 82 | for i in xrange(U.shape[0]): 83 | if candidatesim11: 108 | i=randint(b,e-1) 109 | a[b],a[i]=a[i],a[b] 110 | i=j=1 111 | while i!=e: 112 | if a[b]11: 120 | _qpick_ten(a,b,j) 121 | else: 122 | _qpick_ten(a,j,e) 123 | 124 | def _insort(a,b,e): 125 | for i in xrange(b+1,e): 126 | key=a[i] 127 | j=i-1 128 | while j>=0: 129 | if a[j]>key: 130 | break 131 | a[j+1]=a[j] 132 | j-=1 133 | a[j+1]=key 134 | 135 | def _normalize(num): 136 | return (num+1)/2 -------------------------------------------------------------------------------- /www/app/datacenter.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | from heapq import nlargest,nsmallest 3 | from numpy import sqrt, array, nonzero 4 | from app import cache 5 | 6 | fr = open('dat/a.dat','rb') 7 | U = cPickle.load(fr) 8 | U2 = cPickle.load(fr) 9 | fr.close() 10 | 11 | class Node: 12 | def __init__(self, id, sim): 13 | self.id=id 14 | self.sim=sim 15 | 16 | def __lt__(self, other): 17 | return self.simother.sim 27 | 28 | def __gt__(self, other): 29 | return self.sim>other.sim 30 | 31 | def __ge__(self, other): 32 | return self.sim>=other.sim 33 | 34 | class DataCenter: 35 | def __init__(self): 36 | pass 37 | 38 | @staticmethod 39 | def Normalize(sim): 40 | return round((sim+1.)/2,4)*100 41 | 42 | @cache.memoize(600) 43 | def TopRank(self, uid, itemmask, usermask): 44 | 45 | M=U[:,itemmask.indices] 46 | 47 | v = M[uid] 48 | vnorm = sqrt(U2[:,itemmask.indices][uid].sum(axis=1)[0,0])+1 49 | M=M[usermask.indices] 50 | 51 | unorm = self._getnorm(itemmask, usermask) 52 | # Now standardize information to lst, include normalize and packaging. 53 | simv = array(M.dot(v.T)/unorm/vnorm).squeeze(1) 54 | 55 | lst = self._generatenodelist(simv, usermask.indices) 56 | 57 | rt = nlargest(11,lst) 58 | rlist = [] 59 | # if uid have favorated some items, it returns a list. 60 | # Otherwise, returns an empty list. 61 | if uid in usermask.indices: 62 | for x in rt[1:]: 63 | rlist.append([x.id, x.sim]) 64 | return rlist 65 | 66 | def GetSimilarity(self, uida, uidb, itemmask): 67 | 68 | M=U[:,itemmask.indices] 69 | M2=U2[:,itemmask.indices] 70 | 71 | va = M[uida] 72 | vb = M[uidb] 73 | n = (sqrt(M2[uida].sum(axis=1)[0,0])+1)*(sqrt(M2[uidb].sum(axis=1)[0,0])+1) 74 | v = va.dot(vb.T) 75 | if v.getnnz()==1: 76 | return v.data[0]/n 77 | else: 78 | return 0 79 | 80 | @cache.memoize(600) 81 | def GetRankOf(self, uida, uidb, sim, itemmask, usermask): 82 | 83 | M=U[:,itemmask.indices] 84 | 85 | va = M[uida] 86 | vnorm = sqrt(U2[:,itemmask.indices][uida].sum(axis=1)[0,0])+1 87 | M=M[usermask.indices] 88 | 89 | simv = M.dot(va.T)/self._getnorm(itemmask, usermask)/vnorm 90 | return nonzero(simv>sim)[0].shape[1]; 91 | 92 | 93 | def GetPosItem(self, uida, uidb, itemmask, usermask): 94 | 95 | va = U[uida] 96 | vb = U[uidb] 97 | va = va.multiply(itemmask) 98 | vb = vb.multiply(itemmask) 99 | x = va.multiply(vb) 100 | x.data=x.data*(x.data>2) 101 | x.eliminate_zeros() 102 | if x.getnnz()==0: 103 | return [] 104 | lst = self._generatenodelist(x.data, x.indices) 105 | rt = nlargest(3,lst) 106 | rtlst = [] 107 | for x in rt: 108 | rtlst.append(x.id) 109 | return rtlst 110 | 111 | 112 | def GetNegItem(self, uida, uidb, itemmask, usermask): 113 | 114 | va = U[uida] 115 | vb = U[uidb] 116 | va = va.multiply(itemmask) 117 | vb = vb.multiply(itemmask) 118 | x = va.multiply(vb) 119 | x.data=x.data*(x.data<-2) 120 | x.eliminate_zeros() 121 | if x.getnnz()==0: 122 | return [] 123 | lst = self._generatenodelist(x.data, x.indices) 124 | rt = nsmallest(3,lst) 125 | rtlst = [] 126 | for x in rt: 127 | rtlst.append(x.id) 128 | return rtlst 129 | 130 | def _generatenodelist(self, data, index): 131 | """ data is an one dim array, index is also an one dim array.""" 132 | lst = [] 133 | for i in xrange(index.shape[0]): 134 | lst.append(Node(id = index[i], sim = data[i])) 135 | return lst 136 | 137 | @cache.memoize(60000) 138 | def _getnorm(self, itemmask, usermask): 139 | """ return a matrix sum(usermask) x 1 """ 140 | M=U2[:,itemmask.indices] 141 | 142 | M=M[usermask.indices] 143 | 144 | return sqrt(M.sum(axis=1))+1 145 | 146 | -------------------------------------------------------------------------------- /www/app/dbcom.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | from model import UserInfo 3 | from model import ItemInfo 4 | from util import getnickname, getitemname 5 | from app import cache 6 | 7 | fr = open('dat/db.dat','rb') 8 | imask = cPickle.load(fr) 9 | umask = cPickle.load(fr) 10 | fr.close() 11 | 12 | class Connector(object): 13 | @cache.memoize(60000) 14 | def CheckUserexists(self, username): 15 | q = UserInfo.query.get(username) 16 | return not q is None 17 | 18 | @cache.memoize(60000) 19 | def CheckUid(self, username): 20 | q = UserInfo.query.get(username) 21 | return q.index 22 | 23 | @cache.memoize(60000) 24 | def CheckUsername(self, uid): 25 | q = UserInfo.query.filter(UserInfo.index == uid).first() 26 | return q.name 27 | 28 | @cache.memoize(60000) 29 | def CheckNickname(self, username): 30 | return getnickname(username) 31 | 32 | def GenerateItemMask(self, typ): 33 | if typ is None: 34 | typ='all' 35 | return imask[typ] 36 | 37 | def GenerateUserMask(self, typ, acl=None): 38 | if typ is None: 39 | typ='all' 40 | if acl is None: 41 | acl=0 42 | return umask[typ].multiply(umask[int(acl)]) 43 | 44 | @cache.memoize(60000) 45 | def CheckItemid(self, iindex): 46 | q = ItemInfo.query.filter(ItemInfo.index == iindex).first() 47 | return q.i_index 48 | 49 | @cache.memoize(60000) 50 | def CheckItemName(self, iid): 51 | return getitemname(iid) 52 | 53 | def GetTypeCount(self, typ): 54 | if typ is None: 55 | return umask["all"].getnnz() 56 | return umask[typ].getnnz() 57 | -------------------------------------------------------------------------------- /www/app/model.py: -------------------------------------------------------------------------------- 1 | from app import db 2 | 3 | class UserInfo(db.Model): 4 | __tablename__ = 'userinfo' 5 | 6 | name = db.Column(db.String(30), primary_key=True) 7 | index = db.Column(db.Integer) 8 | count = db.Column(db.Integer) 9 | ratecount = db.Column(db.Integer) 10 | average = db.Column(db.Float) 11 | sd = db.Column(db.Float) 12 | 13 | def __repr__(self): 14 | return "" % (self.name) 15 | 16 | class ItemInfo(db.Model): 17 | __tablename__='iteminfo' 18 | i_index = db.Column(db.Integer, primary_key=True) 19 | index = db.Column(db.Integer) 20 | 21 | def __repr__(self): 22 | return "" % ( 23 | self.i_index) -------------------------------------------------------------------------------- /www/app/static/my.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | $(document).ready(function (){ 3 | $("#candidate-switch").click(function (){ 4 | $(this).hide() 5 | }) 6 | }); 7 | })(); -------------------------------------------------------------------------------- /www/app/static/style.css: -------------------------------------------------------------------------------- 1 | html { 2 | min-height: 100%; 3 | position: relative; 4 | } 5 | body { 6 | margin-bottom: 60px; 7 | } 8 | .footer { 9 | background-color: #f5f5f5; 10 | bottom: 0; 11 | height: 60px; 12 | position: absolute; 13 | width: 100%; 14 | } 15 | .container { 16 | max-width: 1080px; 17 | padding: 0 15px; 18 | width: auto; 19 | } 20 | .container .text-muted { 21 | margin: 20px 0; 22 | } 23 | 24 | #content { 25 | padding-top: 60px; 26 | } 27 | 28 | 29 | .container .text-muted { 30 | margin: 20px 0; 31 | } 32 | 33 | #sim-title { 34 | padding: 0 0 20px 0; 35 | } 36 | 37 | #sim-info{ 38 | padding: 0 0 20px 0; 39 | } 40 | 41 | .jumbotron > h1{ 42 | font-family: 'Righteous', cursive; 43 | } 44 | 45 | .jumbotron > h1 small{ 46 | font-family: 'Poiret One', cursive; 47 | } 48 | 49 | .sharegrp { 50 | display: inline-block; 51 | margin-top: 20px; 52 | } 53 | 54 | #title { 55 | padding-top: 20px; 56 | padding-bottom: 20px; 57 | } 58 | 59 | #quotation { 60 | padding-left: 20px; 61 | margin-top: 20px; 62 | } 63 | 64 | #typenav { 65 | margin-bottom: 10px; 66 | } -------------------------------------------------------------------------------- /www/app/templates/about.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 | 7 |
    8 |
  • 9 |

    Q: 什么是 Chi?

    10 |

    A: Chi 是 Future Bangumi Labatoratory 的一个中二代号,取名于 Chii,读音为/ˈkaɪ/。同时 Chi 也代表著名的 Chi-squared distribution,意在使用统计学习的方法进行社区网络实验。

    11 |
  • 12 |
  • 13 |

    Q: 什么是 Bangumi?Bangumi 和 Chi 是什么关系?

    14 |

    A: Bangumi 是由 Sai 于桂林发起的 ACG 分享与交流项目,致力于让阿宅们在欣赏ACG作品之余拥有一个轻松便捷独特的交流与沟通环境。
    Chi 是 Bangumi 用户 Genius、小乖 的个人项目,和 Bangumi 官方没有任何关系。其个人项目的动机只是喜欢 Bangumi 而已。当然,Chi 的取名受到了 Bangumi 的启发。

    15 |
  • 16 |
  • 17 |

    Q: 使用 Chi 的条件是什么?

    18 |

    A: 您必须是 Bangumi 用户才能获得自己在 Bangumi 社区的信息。

    19 |
  • 20 |
  • 21 |

    Q: 为什么我已经是 Bangumi 注册用户了,为什么我无法查看我的同步率?

    22 |

    A: 首先请确保您是在 2015年1月15日前注册 Bangumi。由于所有的数据都是在这个日期前爬取的,我们无法对此日期之后的用户收藏和改动进行任何更新。
    然后请您确保在 2015年1月15日前您在 Bangumi 收藏至少一部作品。
    如果您确定您符合以上两条,却仍然无法查看同步率,请务必告诉我们,谢谢!

    23 |
  • 24 |
  • 25 |

    Q: 2015年1月15日之后注册的 Bangumi 用户怎么办?

    26 |

    A: 唔,Chi 归根结底还是一个个人项目,所以我们无法实时对用户收藏的更改进行同步率的更改。目前我们的计划是在一月、四月、七月和十月的第二个星期日开始重新爬取 Bangumi 数据,在这之后一两个星期 Chi 会作出例行更新,届时您就可以查看到自己的社区信息了。

    27 |
  • 28 |
  • 29 |

    Q: 我发现了一个 Bug,怎样报错?

    30 |

    A: 您可以前往该项目的 Github 页面报错,也可以直接在 Bangumi上联系 Genius、小乖

    31 |
  • 32 |
  • 33 |

    Q: 未来 Chi 会推出什么新功能?

    34 |

    A: 我也不知道,大概把 Mining Massive Datasets 上的算法都轮一遍吧。

    35 |
  • 36 |
37 |
38 |

The only thing worse than being talked about is not being talked about.

39 |
Oscar Wilde
40 |
41 |
42 | 53 | {% endblock %} -------------------------------------------------------------------------------- /www/app/templates/couple.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |

Hi {{ usernickname }}! 你与 {{ couplenickname }} 的同步率如下:

5 | 13 |
14 | 17 |
18 |
19 | {% if similarity>95 %} 20 |
22 | {{ similarity }}% 23 |
24 | {% elif similarity>80 %} 25 |
27 | {{ similarity }}% 28 |
29 | {% elif similarity<50 %} 30 |
32 | {{ similarity }}% 33 |
34 | {% elif similarity<0.5 %} 35 |
37 | {{ similarity*100 }}% 38 |
39 | {% else %} 40 |
42 | {{ similarity }}% 43 |
44 | {% endif %} 45 |
46 |
47 |
48 | {{ usernickname }} 49 |
50 |
51 | {% if rank %} 52 | 53 |
54 |
55 |

{{ couplenickname }} 在 {{ usernickname }} 和全站有收藏{% if typ=="anime" %}动画{% elif typ=='book' %}书籍{% elif typ=='music' %}音乐{% elif typ=='game' %}游戏{% elif typ=='real' %}三次元{% endif %}条目的 BGMer 同步率中排名处于排名第 {{ rank }} 位的位置,超过了 {{ rankpercent }}% 的 BGMer!

56 |
57 |
59 | {{ rankpercent }}% 60 |
61 |
62 |
63 |
64 | 65 | 66 | 67 |
68 |
69 |

与此同时,{{ usernickname }} 在 {{ couplenickname }} 的全站有收藏{% if typ=="anime" %}动画{% elif typ=='book' %}书籍{% elif typ=='music' %}音乐{% elif typ=='game' %}游戏{% elif typ=='real' %}三次元{% endif %}条目的用户同步率排名中名列第 {{ inverserank }} 位,击败了 {{ inverserankpercent }}% 的基/姬友!(喂

70 |
71 |
73 | {{ inverserankpercent }}% 74 |
75 |
76 |
77 |
78 | 79 |
80 |
81 | {% endif %} 82 | {% if similarity>=50 %} 83 | {% if feedbacklst.__len__()==3 %} 84 |

你们在{{ feedbacklst[0][1] }}{{ feedbacklst[1][1] }}{{ feedbacklst[2][1] }}等作品上看法一致。

85 | {% elif feedbacklst.__len__()==2 %} 86 |

你们在{{ feedbacklst[0][1] }}{{ feedbacklst[1][1] }}两部作品上看法一致。

87 | {% elif feedbacklst.__len__()==1 %} 88 |

你们在{{ feedbacklst[0][1] }}一部作品上看法一致。

89 | {% endif %} 90 | {% else %} 91 | {% if feedbacklst.__len__()==3 %} 92 |

你们在{{ feedbacklst[0][1] }}{{ feedbacklst[1][1] }}{{ feedbacklst[2][1] }}等作品上持有相反评价。

93 | {% elif feedbacklst.__len__()==2 %} 94 |

你们在{{ feedbacklst[0][1] }}{{ feedbacklst[1][1] }}两部作品上持有相反评价。

95 | {% elif feedbacklst.__len__()==1 %} 96 |

你们在{{ feedbacklst[0][1] }}一部作品上持有相反评价。

97 | {% endif %} 98 | {% endif %} 99 |
100 |
101 | 102 |
103 |
104 | 105 | 106 | 107 | 108 |
109 | 110 | 113 | 114 | 115 | 116 | 117 |
118 |
119 | {% endblock %} -------------------------------------------------------------------------------- /www/app/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |
5 |

Chi Future Bangumi laboratory

6 |

Chi 是一个致力于网络社区实验的个人项目。该项目拥有者为 BGMer Genius、小乖。该项目将利用当前成熟的机器学习研究成果为 BGMer 带来更好的社区用户体验。

7 |

了解更多……

8 |
9 |
10 |
11 |

同步率・改

12 |

利用您的收藏和评分信息为您寻找 bgm 同好,查看您与您好友之间的同步率。

13 |

了解更多……

14 |
15 |
16 |

快将推出……

17 |

了解更多……

18 |
19 |
20 |

快将推出……

21 |

了解更多……

22 |
23 |
24 |
25 |

‚Alle gebruiksters van Bangumi-chan zijn vriendinnen.’

26 | 27 |
28 | {% endblock %} -------------------------------------------------------------------------------- /www/app/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Chi: Future Bangumi Laboratory 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 40 |
41 | {% block content %}{% endblock %} 42 |
43 |
44 |
45 |
46 |

© 2015 Ronnie Wang avec l'aide de Bootstrap.

47 |
48 |
49 | 50 | 51 | 52 | 53 | 54 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /www/app/templates/plz_favorite_request.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |

同步率・改

5 |

对不起……

6 |

  嗯,看来你还没有收藏过任何条目!这样你与任何人的同步率都是 50%。快去 Bangumi 收藏几个条目,让大家更好地认识你。

7 | {% endblock %} -------------------------------------------------------------------------------- /www/app/templates/similarity.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |

同步率・改

5 |
6 |

以您在 Bangumi 的作品收藏和评分信息为依据,用前沿的算法计算出您与其他 BGMer 的更加准确的同步率。

7 |
8 | {% if error %} 9 | 13 | {% endif%} 14 |
15 |
16 | 17 |
18 | 19 |
20 | 23 | 26 | 29 |

注:活跃的时间计算起点是2015年1月15日,在此日基础上往前推移。活跃的标准是在时间段内至少收藏过一部作品。

30 |
31 |
32 |
33 | 34 |
35 | 36 |
37 | 38 |
39 |
40 |
41 |
42 | 43 | 44 |
45 |
46 |
47 | {% endblock %} -------------------------------------------------------------------------------- /www/app/templates/single.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block content %} 4 |

同步率・改

5 |

Hi {{ usernickname }}! 与你同步率最高的前十位 BGMer 如下:

6 | 14 | {% if simlist.__len__() %} 15 | {% for x in simlist %} 16 |
17 |
18 | {{ x[1] }} 19 |
20 |
21 |
22 | {% if x[2]>95 %} 23 |
25 | {{ x[2] }}% 26 |
27 | {% elif x[2]>80 %} 28 |
30 | {{ x[2] }}% 31 |
32 | {% elif x[2]<50 %} 33 |
35 | {{ x[2] }}% 36 |
37 | {% else %} 38 |
40 | {{ x[2] }}% 41 |
42 | {% endif %} 43 |
44 |
45 |
46 | {% endfor %} 47 | {% endif %} 48 |
49 |
50 | 53 | 54 | 71 |
72 | 73 |
74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 |
83 | 84 | 87 |
88 |
89 | {% endblock %} -------------------------------------------------------------------------------- /www/app/util.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urlparse import urlparse 3 | import requests # Yes, you need to install that 4 | from app import cache 5 | 6 | def validateform(username): 7 | 8 | if username==None: 9 | return None 10 | else: 11 | if username.startswith('http://') or username.startswith('https://'): 12 | try: 13 | username = urlparse(username).path.split('/')[-1] 14 | except IndexError: 15 | return None 16 | if not re.match(r'^(?!_)[a-zA-Z0-9_]+$',username): 17 | return None 18 | else: 19 | return username 20 | 21 | @cache.memoize(timeout=172800) 22 | def getnickname(username): 23 | r = requests.get("http://api.bgm.tv/user/"+username) 24 | return r.json()['nickname'] 25 | 26 | @cache.memoize(timeout=172800) 27 | def getitemname(itemidx): 28 | r = requests.get("http://api.bgm.tv/subject/"+str(itemidx)) 29 | j = r.json() 30 | if len(j['name_cn']): 31 | return j['name_cn'] 32 | else: 33 | return j['name'] -------------------------------------------------------------------------------- /www/app/views.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from flask import render_template, flash, redirect, session, url_for, request, g 3 | from flask.ext.cors import cross_origin 4 | from app import app, cors, cache 5 | from util import validateform, getnickname 6 | from connector import Controller 7 | 8 | c=Controller() 9 | cache.clear() 10 | 11 | @app.route('/') 12 | def index(): 13 | return render_template("index.html"); 14 | 15 | @app.route('/about/') 16 | def about(): 17 | return render_template("about.html") 18 | 19 | @app.route('/similarity/', methods=['POST', 'GET']) 20 | def similarity(): 21 | if request.method=='POST': 22 | username = request.form.get('username').strip() 23 | candidate = request.form.get('candidate').strip() 24 | acl = request.form.get('acl') 25 | if not validateform(username): 26 | return render_template('similarity.html', error=u'请输入正确的用户名或时光机 URL!') 27 | 28 | username = validateform(username) 29 | if not candidate: 30 | if c.UserExist(username): 31 | return redirect(url_for('user', username=username, acl=acl)) 32 | else: 33 | return render_template('similarity.html', error=u'啊,非常抱歉,我们找不到您的记录。有可能是由于我们数据库未及时更新或者您未注册 Bangumi。') 34 | elif not validateform(candidate): 35 | return render_template('similarity.html', username=username, error=u'请输入正确的用户名或时光机 URL!') 36 | else: 37 | if c.UserExist(username) and c.UserExist(validateform(candidate)): 38 | return redirect(url_for('user', username=username, candidate=validateform(candidate), acl=acl)) 39 | else: 40 | return render_template('similarity.html', error=u'啊,非常抱歉,我们找不到您的记录。有可能是由于我们数据库未及时更新或者您未注册 Bangumi。') 41 | else: 42 | error=session.get("error") 43 | if error: 44 | session.pop("error") 45 | return render_template('similarity.html',error=error) 46 | 47 | 48 | 49 | 50 | @app.route('/similarity/') 51 | @cross_origin() 52 | def user(username): 53 | typ = request.args.get('typ') 54 | if typ not in ['anime','book','music','game','real']: 55 | typ=None 56 | acl = request.args.get('acl') 57 | if acl not in ['1','2','3']: 58 | acl=None 59 | #acl=int(acl) 60 | if not request.args.get('candidate'): 61 | 62 | if c.UserRecords(username): 63 | 64 | lst = c.GetTopRank(username, typ, acl) 65 | un = c.GetUsernickname(username) 66 | return render_template('single.html',username=username, usernickname=un, simlist=lst, typ=typ, acl=acl) 67 | else: 68 | un = c.GetUsernickname(username) 69 | return render_template("single.html",username=username, usernickname=un, simlist=[], typ=typ, acl=acl) 70 | 71 | else: 72 | candidate = request.args['candidate'] 73 | if c.UserRecords(username) and c.UserRecords(candidate): 74 | ntotal = c.GetCount(typ) 75 | (nu,nc,sim,ru,rc) = c.GetCouple(username, candidate, typ) 76 | if ru==0 or rc==0: 77 | return render_template('couple.html',username=username, \ 78 | candidate=candidate, \ 79 | typ = typ, \ 80 | usernickname=nu, \ 81 | couplenickname=nc, \ 82 | similarity=sim) 83 | else: 84 | if sim>50.0: 85 | feedbacklst = c.GetFeedback(username, candidate, typ) 86 | else: 87 | feedbacklst = c.GetNegFeedback(username, candidate, typ) 88 | return render_template('couple.html',username=username, \ 89 | candidate=candidate, \ 90 | typ = typ, \ 91 | usernickname=nu, \ 92 | couplenickname=nc, \ 93 | similarity=sim, \ 94 | rank=ru, rankpercent=round((ntotal-ru)*100./ntotal,2), \ 95 | inverserank=rc, inverserankpercent=round((ntotal-rc)*100./ntotal,2), \ 96 | feedbacklst=feedbacklst) 97 | else: 98 | nu = c.GetUsernickname(username) 99 | nc = c.GetUsernickname(candidate) 100 | return render_template('couple.html',username=username, \ 101 | candidate=candidate, \ 102 | typ = typ, \ 103 | usernickname=nu, \ 104 | couplenickname=nc, \ 105 | similarity=50.00) 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /www/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | basedir = os.path.abspath(os.path.dirname(__file__)) 4 | 5 | MYSQL_HOST = 'localhost' 6 | MYSQL_DBNAME = 'bgm-info' 7 | MYSQL_USER = 'bgmer' 8 | MYSQL_PASSWD = 'sai' -------------------------------------------------------------------------------- /www/run.py: -------------------------------------------------------------------------------- 1 | #!ve/bin/python 2 | from app import app 3 | 4 | if __name__=="__main__": 5 | app.run(host="0.0.0.0",debug=True) 6 | 7 | --------------------------------------------------------------------------------