├── models
    ├── __init__.py
    ├── taster_exceptions.py
    └── data_models.py
├── utils
    ├── __init__.py
    ├── config_utils.py
    ├── db_utils.py
    ├── cloudmusic_dao.py
    ├── encrypt_utils.py
    ├── logger_utils.py
    └── cloudmusic_api.py
├── api_server
    ├── __init__.py
    ├── flask_app.py
    ├── static
    │   └── js
    │   │   └── music_taster.js
    └── templates
    │   └── demo.html
├── pipelines
    ├── __init__.py
    ├── fetch_data.py
    ├── update_infos.py
    └── fetch_user_data.py
├── .gitignore
├── requirements.txt
├── song2vec
    ├── __init__.py
    ├── song2vec_operator.py
    └── rock_gensim.py
├── LICENSE
└── readme.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/api_server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.json
3 | log/
4 | *.log
5 | .idea/
6 | datas/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pyopenssl
3 | pycrypto
4 | gensim==0.12.4
5 | sklearn


--------------------------------------------------------------------------------
/song2vec/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | 
3 | """
4 | Created by jayvee on 17/2/16.
5 | """


--------------------------------------------------------------------------------
/pipelines/fetch_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import sys
 4 | 
 5 | abs_path = os.path.dirname(os.path.abspath(__file__))
 6 | abs_father_path = os.path.dirname(abs_path)
 7 | PROJECT_PATH = abs_father_path
 8 | print 'Used file: %s\n project path=%s' % (__file__, PROJECT_PATH)
 9 | sys.path.append(PROJECT_PATH)
10 | 


--------------------------------------------------------------------------------
/models/taster_exceptions.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | """
 4 | Created by jayvee on 16/12/22.
 5 | """
 6 | 
 7 | 
 8 | class NonDataException(IOError):
 9 |     """
10 |     无法获取到数据时的异常
11 |     """
12 | 
13 |     def __init__(self, msg):
14 |         self.message = msg
15 | 
16 |     def __str__(self):
17 |         return self.message
18 | 


--------------------------------------------------------------------------------
/utils/config_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import sys
 5 | 
 6 | abs_path = os.path.dirname(os.path.abspath(__file__))
 7 | abs_father_path = os.path.dirname(abs_path)
 8 | PROJECT_PATH = abs_father_path
 9 | print 'Used file: %s\n project path=%s' % (__file__, PROJECT_PATH)
10 | sys.path.append(PROJECT_PATH)
11 | 
12 | 
13 | # config_info = {}
14 | 
15 | 
16 | def get_config():
17 |     with open('%s/config.json' % PROJECT_PATH, 'r') as fin:
18 |         config_info = json.loads(fin.read())
19 |         return config_info
20 | 
21 | 
22 | def get_db_config():
23 |     with open('%s/db_config.json' % PROJECT_PATH, 'r') as fin:
24 |         db_config_info = json.loads(fin.read())
25 |         return db_config_info
26 | 


--------------------------------------------------------------------------------
/utils/db_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pymongo
 3 | 
 4 | from utils.config_utils import get_db_config
 5 | 
 6 | __author__ = 'jayvee'
 7 | 
 8 | db_config = get_db_config()
 9 | DB_IP = db_config['db_ip']
10 | DB_PORT = db_config['db_port']
11 | 
12 | 
13 | def get_db_inst(db_name, collection_name):
14 |     """
15 |     get mongoDB instance by db name and collection name
16 |     Args:
17 |         db_name:
18 |         collection_name:
19 | 
20 |     Returns:
21 |         db instance
22 | 
23 |     """
24 |     client = pymongo.MongoClient(DB_IP, DB_PORT)
25 |     try:
26 |         db_inst = client.get_database(db_name).get_collection(collection_name)
27 |         return db_inst
28 |     except Exception, e:
29 |         print 'error, details=%s' % (e)
30 | 
31 | 
32 | def create_index(db_name, collection_name, index_conf):
33 |     db_inst = get_db_inst(db_name, collection_name)
34 |     print db_inst.create_indexes(index_conf)
35 | 
36 | 
37 | def find_all(find_filter, db_inst, sort_filter=None):
38 |     MAX_COUNT = db_inst.find(find_filter).count()
39 | 
40 |     if not sort_filter:
41 |         db_inst.find(find_filter)
42 | 
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jayvee He
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Music Taster
 2 | 顾名思义,这个项目用来挖掘音乐风格
 3 | 大部分是基于歌单进行的关系挖掘,暂不涉及音频分析
 4 | 
 5 | ### 动机
 6 | 网易云音乐的红心歌单曲目太多了,想做点归类。
 7 | 
 8 | ### 包含功能
 9 | 1. 实现了歌单、歌曲详情的爬取与存储
10 | 2. 实现了Song2Vec、Artist2Vec
11 | 3. 实现歌曲、歌手的风格近似计算
12 | 4. 实现歌单下歌曲、歌手的聚类
13 | 5. 附带一个基于flask的API webserver
14 | 
15 | ## 目录结构
16 | 1. models——用于存储数据类型对象的相关类文件
17 | 2. utils——基本的工具类
18 | 3. pipelines——存放各工作流程的脚本
19 | 4. datas——存放训练后的模型数据文件
20 | 5. api_server——基于flask的api server
21 | 
22 | ## 环境需求
23 | 1. 如果进行数据爬取,则需要一个MongoDB实例进行数据管理
24 | 2. 安装`requirements.txt`下的依赖包
25 | 
26 | ## Demo
27 | 
28 | [http://api.jayveehe.com/musictaster](http://api.jayveehe.com/musictaster)
29 | 
30 | ## Data
31 | Google Drive
32 | - [Artists Seq Data](https://drive.google.com/file/d/1fO4BkXBB9Rf5DsF7kggr6lROA4gmAB3Z/view?usp=sharing)
33 | - [Songs Seq Data](https://drive.google.com/file/d/1_kwmQ87kz3kHIRcAUdFaXY0_x2KCMyBw/view?usp=sharing)
34 | - [Artists x Songs Seq Data](https://drive.google.com/file/d/1IHetYu7Lrd_6jVurmq3_0oZ-OalEk5w2/view?usp=sharing)
35 | 
36 | 使用方法:
37 | 1. 下载对应的dat文件
38 | 2. cPickle.load()
39 | 
40 | ## Demo API Doc
41 | [https://github.com/JayveeHe/MusicTaster/wiki/Music-Taster-Demo-API-Doc](https://github.com/JayveeHe/MusicTaster/wiki/Music-Taster-Demo-API-Doc)
42 | 
43 | ## 使用MIT License
44 | [MIT License](https://github.com/JayveeHe/MusicTaster/blob/master/LICENSE)
45 | 
46 | 


--------------------------------------------------------------------------------
/utils/cloudmusic_dao.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | """
 4 | 负责进行云音乐的数据库相关操作
 5 | 
 6 | Created by jayvee on 16/12/24.
 7 | """
 8 | from utils.db_utils import get_db_inst
 9 | from utils.logger_utils import data_process_logger
10 | 
11 | 
12 | class CloudMusicDAO:
13 |     def __init__(self, db_name, collection_name):
14 |         self.db_name = db_name
15 |         self.collection_name = collection_name
16 |         self.db_inst = get_db_inst(self.db_name, self.collection_name)
17 | 
18 |     def save_unique_item(self, data_obj, primary_key='userId', is_overwrite=False, is_inform=False):
19 |         """
20 |         存储数据对象,并避免重复存储
21 |         Args:
22 |             data_obj:
23 |             primary_key:
24 |             is_overwrite:
25 | 
26 |         Returns:
27 | 
28 |         """
29 |         find_result = self.db_inst.find_one({primary_key: data_obj[primary_key]}, {primary_key: 1})
30 |         # is_exist = user_dbinst.find({'userId': userinfo['userId']}).count() != 0
31 |         # print find_result.count()
32 | 
33 |         if not find_result:
34 |             self.db_inst.insert(data_obj)
35 |         elif is_overwrite:
36 |             self.db_inst.update({primary_key: data_obj[primary_key]}, data_obj)
37 |             if is_inform:
38 |                 data_process_logger.warn(
39 |                     'overwrite item %s in %s' % (data_obj[primary_key], self.collection_name))
40 |         else:
41 |             if is_inform:
42 |                 data_process_logger.warn(
43 |                     'Item %s exist! in %s' % (data_obj[primary_key], self.collection_name))
44 | 


--------------------------------------------------------------------------------
/utils/encrypt_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import json
 3 | import os
 4 | 
 5 | from Crypto.Cipher import AES
 6 | 
 7 | modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
 8 | nonce = '0CoJUm6Qyw8W8jud'
 9 | pubKey = '010001'
10 | 
11 | 
12 | def createSecretKey(size):
13 |     return (''.join(map(lambda xx: (hex(ord(xx))[2:]), os.urandom(size))))[0:16]
14 | 
15 | 
16 | def aesEncrypt(text, secKey):
17 |     pad = 16 - len(text) % 16
18 |     text = text + pad * chr(pad)
19 |     encryptor = AES.new(secKey, 2, '0102030405060708')
20 |     ciphertext = encryptor.encrypt(text)
21 |     ciphertext = base64.b64encode(ciphertext)
22 |     return ciphertext
23 | 
24 | 
25 | def rsaEncrypt(text, pubKey, modulus):
26 |     text = text[::-1]
27 |     rs = int(text.encode('hex'), 16) ** int(pubKey, 16) % int(modulus, 16)
28 |     return format(rs, 'x').zfill(256)
29 | 
30 | 
31 | def encrypted_request(text):
32 |     text = json.dumps(text)
33 |     secKey = createSecretKey(16)
34 |     encText = aesEncrypt(aesEncrypt(text, nonce), secKey)
35 |     encSecKey = rsaEncrypt(secKey, pubKey, modulus)
36 |     data = {
37 |         'params': encText,
38 |         'encSecKey': encSecKey
39 |     }
40 |     return data
41 | 
42 | 
43 | def test():
44 |     print encrypted_request(
45 |         'htPNp6MUYqXd/c2YsIovhhmQnn/5Y62aIPK6CTyRDLAA8okWvdwz6UC58AC2pe+tk6A9B9DgEG9H6m9Yt7mzRQyB3nWjvdvrXSiUxxY0BzMzilEP+2RO7LToRLfPpLRY9Y7+/YuO/9iIoPFzArhn8pnAS+r5UbQ7wZTWQ6iUd1cfu+A557dS5w2GmybQuXOYq8BFva9j3vj/4Cy4k3s4fmnj4z2XrZmFZn9Ngdy2ppY=')
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     test()
50 | 


--------------------------------------------------------------------------------
/pipelines/update_infos.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | """
 4 | Created by jayvee on 17/2/14.
 5 | """
 6 | import random
 7 | 
 8 | import time
 9 | 
10 | from utils.cloudmusic_api import song_comments
11 | from utils.cloudmusic_dao import CloudMusicDAO
12 | from utils.logger_utils import data_process_logger
13 | 
14 | 
15 | def update_userinfo():
16 |     """
17 |     临时更新数据库的脚本
18 |     Returns:
19 | 
20 |     """
21 |     DAO_inst = CloudMusicDAO('MusicTaster', 'UserInfos')
22 |     uids = DAO_inst.db_inst.distinct('userId')
23 |     count = 0
24 |     for uid in uids:
25 |         userinfo = DAO_inst.db_inst.find_one({'userId': uid})
26 |         userinfo['follow_count'] = len(userinfo['follow_ids'])
27 |         userinfo['fan_count'] = len(userinfo['fan_ids'])
28 |         DAO_inst.save_unique_item(userinfo, primary_key='userId', is_overwrite=True)
29 |         data_process_logger.info('No.%s %s-%s' % (count, userinfo['userId'], userinfo['nickname']))
30 |         count += 1
31 |     print 'done'
32 | 
33 | 
34 | def fill_song_comments():
35 |     """
36 |     填充歌曲的评论详情
37 |     Returns:
38 | 
39 |     """
40 |     dao_inst = CloudMusicDAO('MusicTaster', 'SongInfos')
41 |     find_result = dao_inst.db_inst.find({'commentInfo': {'$exists': False}})
42 |     count = 0
43 |     for song_item in find_result:
44 |         comm_data = song_comments(song_item['commentThreadId'], limit=10)
45 |         if comm_data:  # 确保评论详情读取正确
46 |             del comm_data['code']
47 |             # del comm_data['userId']
48 |             song_item['commentInfo'] = comm_data
49 |             song_item['commentCount'] = comm_data['total']
50 |         dao_inst.db_inst.save(song_item)
51 |         data_process_logger.info(
52 |             'No.%s %s, comments: %s done' % (count, song_item['name'], song_item['commentCount']))
53 |         count += 1
54 |         slp = random.random() * 2 + 1
55 |         data_process_logger.info('sleep %s sec' % slp)
56 |         time.sleep(slp)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     while 1:
61 |         try:
62 |             fill_song_comments()
63 |         except Exception, e:
64 |             print 'error %s' % e
65 |             continue
66 | 


--------------------------------------------------------------------------------
/models/data_models.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | from utils.cloudmusic_api import user_playlist, user_profile, song_detail
 3 | from utils.logger_utils import data_process_logger
 4 | 
 5 | """
 6 | Created by jayvee on 16/12/22.
 7 | """
 8 | 
 9 | 
10 | class InfoObj:
11 |     """
12 |     基础的信息类
13 |     """
14 | 
15 |     def __init__(self):
16 |         pass
17 | 
18 |     def fill_details(self):
19 |         """
20 |         填充信息类的信息
21 |         :return:
22 |         """
23 |         pass
24 | 
25 | 
26 | class User(InfoObj):
27 |     """
28 |     用户类
29 | 
30 |     """
31 | 
32 |     def __init__(self, uid):
33 |         InfoObj.__init__(self)
34 |         self.uid = uid
35 |         self.playlist = []
36 |         self.details = {}
37 |         self.__has_details = False
38 |         # get user info
39 | 
40 |     def __fill_user_playlist(self):
41 |         """
42 |         填充用户歌单信息
43 |         :return: None
44 |         """
45 |         # get user playlist
46 |         pl = user_playlist(self.uid)
47 |         if pl != -1:
48 |             self.playlist = pl
49 |         else:
50 |             data_process_logger.error('cannot get the playlist of user %s' % self.uid)
51 | 
52 |     def __fill_user_details(self):
53 |         """
54 |         填充用户信息
55 |         :return: None
56 |         """
57 |         u_details = user_profile(self.uid)
58 |         if u_details != -1:
59 |             self.details = u_details
60 |             self.__has_details = True
61 |         else:
62 |             data_process_logger.error('cannot get the details of user %s' % self.uid)
63 | 
64 |     def fill_details(self):
65 |         self.__fill_user_details()
66 |         self.__fill_user_playlist()
67 | 
68 |     def __str__(self):
69 |         return str(self.details)
70 | 
71 | 
72 | 
73 | class Song(InfoObj):
74 |     """
75 |     歌曲类
76 |     """
77 | 
78 |     def __init__(self, sid):
79 |         InfoObj.__init__(self)
80 |         self.sid = sid
81 |         self.details = {}
82 |         self.__has_details = False
83 | 
84 |     def __fill_song_details(self):
85 |         sd = song_detail(self.sid)
86 |         if sd != -1:
87 |             self.details = sd
88 |             self.__has_details = True
89 |         else:
90 |             data_process_logger.error('cannot get the details of song %s' % self.sid)
91 | 
92 |     def fill_details(self):
93 |         self.__fill_song_details()
94 | 
95 |     def __str__(self):
96 |         return str(self.details)
97 | 


--------------------------------------------------------------------------------
/utils/logger_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import logging
 3 | from logging.handlers import RotatingFileHandler
 4 | import os
 5 | 
 6 | # 用以控制是否输出到屏幕，线上环境不输出到屏幕
 7 | DebugConf = True
 8 | # DebugConf = False
 9 | 
10 | abs_path = os.path.dirname(os.path.abspath(__file__))
11 | abs_father_path = os.path.dirname(abs_path)
12 | log_dir_path = abs_father_path + '/log'
13 | if not os.path.exists(log_dir_path):
14 |     os.makedirs(log_dir_path)
15 | 
16 | data_analysis_logger = logging.getLogger('data_analysis')
17 | data_process_logger = logging.getLogger('data_process')
18 | model_logger = logging.getLogger('model')
19 | 
20 | formatter = logging.Formatter(
21 |     '[%(asctime)s][pid:%(process)s-tid:%(thread)s] %(module)s.%(funcName)s: %(levelname)s: %(message)s')
22 | 
23 | # StreamHandler for print log to console
24 | hdr = logging.StreamHandler()
25 | hdr.setFormatter(formatter)
26 | hdr.setLevel(logging.DEBUG)
27 | 
28 | # RotatingFileHandler
29 | fhr_ana = RotatingFileHandler('%s/analysis.log' % (log_dir_path), maxBytes=10 * 1024 * 1024, backupCount=3)
30 | fhr_ana.setFormatter(formatter)
31 | fhr_ana.setLevel(logging.DEBUG)
32 | 
33 | # RotatingFileHandler
34 | fhr_pro = RotatingFileHandler('%s/process.log' % (log_dir_path), maxBytes=10 * 1024 * 1024, backupCount=3)
35 | fhr_pro.setFormatter(formatter)
36 | fhr_pro.setLevel(logging.DEBUG)
37 | 
38 | # RotatingFileHandler
39 | fhr_model = RotatingFileHandler('%s/model.log' % (log_dir_path), maxBytes=10 * 1024 * 1024, backupCount=3)
40 | fhr_model.setFormatter(formatter)
41 | fhr_model.setLevel(logging.DEBUG)
42 | 
43 | data_analysis_logger.addHandler(fhr_ana)
44 | if DebugConf:
45 |     data_analysis_logger.addHandler(hdr)
46 |     data_analysis_logger.setLevel(logging.DEBUG)  # lowest debug level for logger
47 | else:
48 |     data_analysis_logger.setLevel(logging.ERROR)  # lowest debug level for logger
49 | 
50 | data_process_logger.addHandler(fhr_pro)
51 | if DebugConf:
52 |     data_process_logger.addHandler(hdr)
53 |     data_process_logger.setLevel(logging.DEBUG)
54 | else:
55 |     data_process_logger.setLevel(logging.ERROR)
56 | 
57 | model_logger.addHandler(fhr_model)
58 | if DebugConf:
59 |     model_logger.addHandler(hdr)
60 |     model_logger.setLevel(logging.DEBUG)
61 | else:
62 |     model_logger.setLevel(logging.ERROR)
63 | 
64 | if __name__ == '__main__':
65 |     '''
66 |     Usage:
67 |     from tools.log_tools import data_process_logger as logger
68 |     logger.debug('debug debug')
69 |     '''
70 |     data_analysis_logger.debug('My logger configure success')
71 |     data_analysis_logger.info('My logger configure success')
72 |     data_analysis_logger.error('analysis error test')
73 | 
74 |     data_process_logger.info('My logger configure success~~')
75 |     data_process_logger.error('process error test test')
76 | 
77 |     model_logger.info('Ohhh model')
78 |     model_logger.error('error model')
79 | 


--------------------------------------------------------------------------------
/api_server/flask_app.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | """
  4 | Created by jayvee on 17/2/19.
  5 | https://github.com/JayveeHe
  6 | """
  7 | import json
  8 | 
  9 | import re
 10 | from flask import Flask, render_template, request, make_response
 11 | 
 12 | import os
 13 | import sys
 14 | 
 15 | abs_path = os.path.dirname(os.path.abspath(__file__))
 16 | abs_father_path = os.path.dirname(abs_path)
 17 | PROJECT_PATH = abs_father_path
 18 | print 'Used file: %s\nProject path=%s' % (__file__, PROJECT_PATH)
 19 | sys.path.append(PROJECT_PATH)
 20 | # add flask path
 21 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 22 | 
 23 | from song2vec.song2vec_operator import Song2VecOperator
 24 | from utils.logger_utils import data_process_logger
 25 | 
 26 | app = Flask(__name__)
 27 | 
 28 | data_process_logger.info('initing song2vec operator')
 29 | s2v_operator = Song2VecOperator(
 30 |     song2vec_model_path='%s/datas/[full]50d_20iter_10win_5min_song2vec.model' % PROJECT_PATH,
 31 |     artist2vec_model_path='%s/datas/[full]50d_20iter_10win_5min_artist2vec.model' % PROJECT_PATH)
 32 | data_process_logger.info('complete init song2vec')
 33 | 
 34 | 
 35 | @app.route('/musictaster')
 36 | def hello_world():
 37 |     return render_template("demo.html")
 38 | 
 39 | 
 40 | @app.route('/musictaster/similar/song', methods=['POST'])
 41 | @app.route('/musictaster/similar/song/<song_name>', methods=['GET'])
 42 | def query_similar_songs(song_name=None):
 43 |     """
 44 |     查询最近似的歌曲,方法可以为GET或POST
 45 |     Args:
 46 |         song_name:
 47 | 
 48 |     Returns:
 49 | 
 50 |     """
 51 |     try:
 52 |         if request.method == 'GET':
 53 |             top_n = int(request.args.get('top_n')) if request.args.get('top_n') else 10
 54 |             sim_res = s2v_operator.song2vec_model.most_similar(song_name.lower(), topn=top_n)
 55 |         elif request.method == 'POST':
 56 |             req_data_obj = json.loads(request.data)
 57 |             # 获取各组加减信息,并取小写字母(英文)
 58 |             positive_songs = lower_array(req_data_obj.get('positive_songs')) if req_data_obj.get(
 59 |                 'positive_songs') else []
 60 |             negative_songs = lower_array(req_data_obj.get('negative_songs')) if req_data_obj.get(
 61 |                 'negative_songs') else []
 62 |             positive_artists = lower_array(req_data_obj.get('positive_artists')) if req_data_obj.get(
 63 |                 'positive_artists') else []
 64 |             negative_artists = lower_array(req_data_obj.get('negative_artists')) if req_data_obj.get(
 65 |                 'negative_artists') else []
 66 |             top_n = int(req_data_obj.get('top_n')) if req_data_obj.get('top_n') else 10
 67 |             sim_res = s2v_operator.calc_song_similar(positive_songs=positive_songs,
 68 |                                                      negative_songs=negative_songs,
 69 |                                                      positive_artists=positive_artists,
 70 |                                                      negative_artists=negative_artists,
 71 |                                                      topn=top_n)
 72 |         else:
 73 |             sim_res = []
 74 |         # parse similar result
 75 |         parsed_sim_res = [{'name': a[0], 'similarity': a[1]} for a in sim_res]
 76 |         result = {'code': 200, 'result': parsed_sim_res}
 77 |         resp = make_response(json.dumps(result, ensure_ascii=False), 200)
 78 |     except Exception, e:
 79 |         res = {'code': 400, 'error_msg': e.message}
 80 |         resp = make_response(json.dumps(res, ensure_ascii=False), 200)
 81 |     resp.mimetype = 'application/json'
 82 |     return resp
 83 | 
 84 | 
 85 | @app.route('/musictaster/similar/artist', methods=['POST'])
 86 | @app.route('/musictaster/similar/artist/<artist_name>', methods=['GET'])
 87 | def query_similar_artist(artist_name=None):
 88 |     try:
 89 |         if request.method == 'GET':
 90 |             top_n = int(request.args.get('top_n')) if request.args.get('top_n') else 10
 91 |             sim_res = s2v_operator.artist2vec_model.most_similar(artist_name.lower(), topn=top_n)
 92 |         elif request.method == 'POST':
 93 |             req_data_obj = json.loads(request.data)
 94 |             # 获取各组加减信息,并取小写字母(英文)
 95 |             positive_songs = lower_array(req_data_obj.get('positive_songs')) if req_data_obj.get(
 96 |                 'positive_songs') else []
 97 |             negative_songs = lower_array(req_data_obj.get('negative_songs')) if req_data_obj.get(
 98 |                 'negative_songs') else []
 99 |             positive_artists = lower_array(req_data_obj.get('positive_artists')) if req_data_obj.get(
100 |                 'positive_artists') else []
101 |             negative_artists = lower_array(req_data_obj.get('negative_artists')) if req_data_obj.get(
102 |                 'negative_artists') else []
103 |             top_n = req_data_obj.get('top_n') if req_data_obj.get('top_n') else 10
104 |             sim_res = s2v_operator.calc_artist_similar(positive_songs=positive_songs,
105 |                                                        negative_songs=negative_songs,
106 |                                                        positive_artists=positive_artists,
107 |                                                        negative_artists=negative_artists,
108 |                                                        topn=top_n)
109 |         else:
110 |             sim_res = []
111 |         # parse similar result
112 |         parsed_sim_res = [{'name': a[0], 'similarity': a[1]} for a in sim_res]
113 |         result = {'code': 200, 'result': parsed_sim_res}
114 |         resp = make_response(json.dumps(result, ensure_ascii=False), 200)
115 |     except Exception, e:
116 |         res = {'code': 400, 'error_msg': e.message}
117 |         resp = make_response(json.dumps(res, ensure_ascii=False), 200)
118 |     resp.mimetype = 'application/json'
119 |     return resp
120 | 
121 | 
122 | @app.route('/musictaster/cluster/playlist/id/<plid>', methods=['GET'])
123 | def cluster_playlist_by_plid(plid=None):
124 |     try:
125 |         if request.args.get('cluster_n'):
126 |             cluster_n = eval(request.args.get('cluster_n'))
127 |         else:
128 |             cluster_n = 5
129 |         if request.args.get('type'):
130 |             cluster_type = request.args.get('type')
131 |         else:
132 |             cluster_type = 'song'
133 |         if cluster_type == 'artist':
134 |             cluster_res, playlist_name = s2v_operator.cluster_artist_in_playlist(plid, cluster_n=cluster_n)
135 |         else:
136 |             cluster_res, playlist_name = s2v_operator.cluster_song_in_playlist(plid, cluster_n=cluster_n)
137 |         result = {'code': 200, 'result': cluster_res, 'playlist_name': playlist_name, 'type': cluster_type}
138 |         resp = make_response(json.dumps(result, ensure_ascii=False), 200)
139 |         resp.mimetype = 'application/json'
140 |     except Exception, e:
141 |         res = {'code': 400, 'error_msg': e.message}
142 |         resp = make_response(json.dumps(res, ensure_ascii=False), 200)
143 |         resp.mimetype = 'application/json'
144 |     return resp
145 | 
146 | 
147 | @app.route('/musictaster/cluster/playlist/url', methods=['POST'])
148 | def cluster_playlist_by_url():
149 |     try:
150 |         if len(request.data):
151 |             req_obj = json.loads(request.data)
152 |         else:
153 |             req_obj = request.form
154 |         url = req_obj['url']
155 |         cluster_type = req_obj['type']
156 |         is_detailed = req_obj.get('is_detailed') if req_obj.get('is_detailed') else False
157 |         plid = re.findall('\d{4,}', url)[0]
158 |         if request.args.get('cluster_n'):
159 |             cluster_n = eval(request.args.get('cluster_n'))
160 |         else:
161 |             cluster_n = 5
162 | 
163 |         if cluster_type == 'artist':
164 |             cluster_res, playlist_name, detail_infos = s2v_operator.cluster_artist_in_playlist(plid,
165 |                                                                                                cluster_n=cluster_n,
166 |                                                                                                is_detailed=is_detailed)
167 |         else:
168 |             cluster_res, playlist_name, detail_infos = s2v_operator.cluster_song_in_playlist(plid, cluster_n=cluster_n,
169 |                                                                                              is_detailed=is_detailed)
170 |         if is_detailed:
171 |             result = {'code': 200, 'result': cluster_res, 'playlist_name': playlist_name, 'type': cluster_type,
172 |                       'detail_infos': detail_infos}
173 |         else:
174 |             result = {'code': 200, 'result': cluster_res, 'playlist_name': playlist_name, 'type': cluster_type}
175 |         resp = make_response(json.dumps(result, ensure_ascii=False), 200)
176 |         resp.mimetype = 'application/json'
177 |     except Exception, e:
178 |         res = {'code': 400, 'error_msg': e.message}
179 |         resp = make_response(json.dumps(res, ensure_ascii=False), 200)
180 |         resp.mimetype = 'application/json'
181 |     return resp
182 | 
183 | 
184 | def lower_array(arr):
185 |     return [a.lower() for a in arr]
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     app.run(host='0.0.0.0', port=2335, debug=False)
190 | 


--------------------------------------------------------------------------------
/api_server/static/js/music_taster.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by jayvee on 17/3/15.
  3 |  * https://github.com/JayveeHe
  4 |  */
  5 | 
  6 | 
  7 | // 歌单聚类
  8 | $("#btn_send").click(function () {
  9 |     var pl_url = $("#input_url").val();
 10 |     var send_datas = {
 11 |         url: pl_url,
 12 |         type: "song"
 13 |     };
 14 |     $.ajax('/cluster/playlist/url', {
 15 |         'data': JSON.stringify(send_datas), //{action:'x',params:['a','b','c']}
 16 |         'type': 'POST',
 17 |         'processData': false,
 18 |         'contentType': 'application/json' //typically 'application/x-www-form-urlencoded', but the service you are calling may expect 'text/json'... check with the service to see what they expect as content-type in the HTTP header.
 19 |     }).done(function (data, status) {
 20 |             $("#show_cluster").modal('toggle');
 21 |             if (status == "success") {
 22 |                 // parse cluster result
 23 |                 //解析聚类结果
 24 |                 var resp_obj = eval(data);
 25 |                 if (resp_obj['code'] == 200) {
 26 |                     var title = $("#playlist_name_title");
 27 |                     console.log(resp_obj['playlist_name']);
 28 |                     title.val(resp_obj['playlist_name'] + "\t" + title.val());
 29 |                     /// -------- d3.js -------
 30 |                     //准备复杂网络数据
 31 |                     var g = {"nodes": [], "links": []};
 32 |                     var cluster_result = resp_obj["result"];
 33 |                     g.nodes.push({'id': 'root', 'group': -1, 'label': 'root'});
 34 |                     for (var i = 0; i < cluster_result.length; i++) {
 35 |                         console.log(cluster_result[i]);
 36 |                         var item = cluster_result[i][0];
 37 |                         /**var c_color = '#' + (Math.floor(Math.random() * 16777215).toString(16) + '000000').substr(0, 6);
 38 |                          var c_x = (Math.random() - 0.5) * 50;
 39 |                          var c_y = (Math.random() - 0.5) * 50;**/
 40 |                         g.nodes.push({
 41 |                             "id": item,
 42 |                             "group": i,
 43 |                             'label': item
 44 |                         });
 45 |                         g.links.push({
 46 |                             'source': 'root',
 47 |                             'target': item,
 48 |                             'value': 1
 49 |                         });
 50 |                         var c_root_id = item;
 51 |                         var last_item = c_root_id;
 52 |                         for (var j = 1; j < cluster_result[i].length; j++) {
 53 |                             //console.log(i + "-" + j);
 54 |                             var n_item = cluster_result[i][j];
 55 |                             g.nodes.push({
 56 |                                 "id": n_item,
 57 |                                 "group": i,
 58 |                                 'label': n_item
 59 |                             });
 60 |                             g.links.push({
 61 |                                 'value': 1,
 62 |                                 'source': c_root_id,
 63 |                                 'target': n_item
 64 |                             });
 65 |                         }
 66 |                     }
 67 | 
 68 |                     // d3 初始化
 69 |                     var svg = d3.select("svg");
 70 | 
 71 |                     svg.selectAll('*').remove();
 72 |                     var c_canvas = $('#cluster_canvas');
 73 |                     var svg_width = 1000 * 0.9;
 74 |                     var svg_height = 500 * 0.9;
 75 |                     //console.log(c_canvas.parentElement().width + '-' + c_canvas.parentElement().height);
 76 |                     console.log('svg_width:' + svg_width + '\theight:' + svg_height);
 77 | 
 78 |                     var color = d3.scaleOrdinal(d3.schemeCategory20);
 79 | 
 80 |                     var simulation = d3.forceSimulation()
 81 |                         .force("link", d3.forceLink().id(function (d) {
 82 |                             return d.id;
 83 |                         }))
 84 |                         .force("charge", d3.forceManyBody())
 85 |                         .force("center", d3.forceCenter(svg_width / 2, svg_height / 2));
 86 | 
 87 | 
 88 |                     var link = svg.append("g")
 89 |                         .attr("class", "links")
 90 |                         .selectAll("line")
 91 |                         .data(g.links)
 92 |                         .enter().append("line")
 93 |                         .attr("stroke-width", function (d) {
 94 |                             return Math.sqrt(d.value);
 95 |                         });
 96 | 
 97 |                     var node = svg.append("g")
 98 |                         .attr("class", "nodes")
 99 |                         .selectAll("circle")
100 |                         .data(g.nodes)
101 |                         .enter()
102 |                         .append("circle")
103 |                         .attr("r", 5)
104 |                         .attr("fill", function (d) {
105 |                             return color(d.group);
106 |                         })
107 |                         .call(d3.drag()
108 |                             .on("start", dragstarted)
109 |                             .on("drag", dragged)
110 |                             .on("end", dragended));
111 | 
112 |                     var anchorNode = svg.append('g').attr('class', 'labels').selectAll("g.labels").data(g.nodes)
113 |                         .enter().append("svg:text").text(function (d) {
114 |                             return d.label;
115 |                         }).style("fill", "#555").style("font-family", "Arial").style("font-size", 6)
116 |                         .call(d3.drag()
117 |                             .on("start", dragstarted)
118 |                             .on("drag", dragged)
119 |                             .on("end", dragended));
120 |                     //anchorNode.append("svg:circle").attr("r", 0).style("fill", "#FFF");
121 | 
122 | 
123 |                     simulation
124 |                         .nodes(g.nodes)
125 |                         .on("tick", ticked);
126 | 
127 |                     simulation.force("link")
128 |                         .links(g.links);
129 |                     var zoom = d3.zoom()
130 |                         .on("zoom", zoomed);
131 | 
132 |                     svg
133 |                         .on("wheel", wheeled)
134 |                         .call(zoom)
135 |                         .call(zoom.transform, d3.zoomIdentity
136 |                             .translate(svg_width / 2, svg_height / 2)
137 |                             .scale(0.5)
138 |                             .translate(-svg_width / 2, -svg_height / 2));
139 |                     svg.call(zoom);
140 | 
141 |                     function wheeled() {
142 |                         console.log(d3.event);
143 |                     }
144 | 
145 |                     function zoomed() {
146 |                         node.attr("transform", d3.event.transform);
147 |                         link.attr("transform", d3.event.transform);
148 |                         anchorNode.attr("transform", d3.event.transform);
149 |                     }
150 | 
151 |                     function ticked() {
152 |                         link
153 |                             .attr("x1", function (d) {
154 |                                 return d.source.x;
155 |                             })
156 |                             .attr("y1", function (d) {
157 |                                 return d.source.y;
158 |                             })
159 |                             .attr("x2", function (d) {
160 |                                 return d.target.x;
161 |                             })
162 |                             .attr("y2", function (d) {
163 |                                 return d.target.y;
164 |                             });
165 | 
166 |                         node
167 |                             .attr("cx", function (d) {
168 |                                 return d.x;
169 |                             })
170 |                             .attr("cy", function (d) {
171 |                                 return d.y;
172 |                             });
173 |                         anchorNode
174 |                             .attr("x", function (d) {
175 |                                 return d.x;
176 |                             })
177 |                             .attr("y", function (d) {
178 |                                 return d.y;
179 |                             });
180 |                     }
181 | 
182 |                     function dragstarted(d) {
183 |                         if (!d3.event.active) simulation.alphaTarget(0.3).restart();
184 |                         d.fx = d.x;
185 |                         d.fy = d.y;
186 |                     }
187 | 
188 |                     function dragged(d) {
189 |                         d.fx = d3.event.x;
190 |                         d.fy = d3.event.y;
191 |                     }
192 | 
193 |                     function dragended(d) {
194 |                         if (!d3.event.active) simulation.alphaTarget(0);
195 |                         d.fx = null;
196 |                         d.fy = null;
197 |                     }
198 | 
199 | 
200 |                 }
201 |                 else {
202 |                     alert("请求错误,详情=" + resp_obj.toString());
203 |                 }
204 |             }
205 |             else {
206 |                 alert("请求失败");
207 |             }
208 |         }
209 |     ).fail(function () {
210 |         alert("请求失败");
211 |     });
212 | });
213 | 
214 | 
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/pipelines/fetch_user_data.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | Created by jayvee on 16/12/22.
  4 | """
  5 | import time
  6 | 
  7 | from utils.cloudmusic_api import *
  8 | from utils.cloudmusic_dao import CloudMusicDAO
  9 | from utils.db_utils import get_db_inst
 10 | 
 11 | 
 12 | def test():
 13 |     # u = User('2886507')
 14 |     # u.fill_details()
 15 |     # print u
 16 |     uname = ''  # 填入用户名,手机登录
 17 |     pwd = ''
 18 |     fetch_login_userdata(username=uname, password=pwd)
 19 | 
 20 | 
 21 | def fetch_login_userdata(username, password):
 22 |     """
 23 |     以当前登录用户为起点,获取各类信息
 24 |     :return:
 25 |     """
 26 |     user_info = user_login(username=username, password=password)
 27 |     if user_info != -1 and user_info['code'] == 200:
 28 |         # user_profile = user_info['profile']
 29 |         # uid = user_profile['userId']
 30 |         # upl = user_playlist(uid)
 31 |         # print len(upl)
 32 |         return user_info
 33 |     else:
 34 |         data_process_logger.warn('fetch login userdata failed')
 35 | 
 36 | 
 37 | def fetch_user_networks(start_id=None, max_user_count=5000):
 38 |     """
 39 |     启动用户信息爬取的函数
 40 |     Args:
 41 |         max_user_count: 本次最大爬取的用户数
 42 |         start_id: 入口id,如果没有则在数据库中任取一个
 43 | 
 44 |     Returns:
 45 | 
 46 |     """
 47 |     db_userinfo = get_db_inst('MusicTaster', 'UserInfos')
 48 |     DAO_inst = CloudMusicDAO('MusicTaster', 'UserInfos')
 49 |     # start_info = user_login('13717951224', 'hejiawei')
 50 |     # u_profile = user_profile(start_id)
 51 |     if not start_id:
 52 |         start_id = db_userinfo.find_one()['userId']
 53 |     idlist = set()
 54 |     idlist.add(start_id)
 55 |     # save start user info
 56 |     cur_id = start_id
 57 |     followlist = user_follows(cur_id)
 58 |     for i in followlist:
 59 |         idlist.add(i['userId'])
 60 |     # result_count = find_result.count()
 61 |     user_count = 0
 62 |     while len(idlist) > 0 and user_count < max_user_count and cur_id:
 63 |         if db_userinfo.find({'userId': cur_id}).count() != 0:
 64 |             # slp = random.random() * 1 + 0.5
 65 |             data_process_logger.info('[SKIP] No.%s User %s skip!' % (user_count, cur_id))
 66 |             # data_process_logger.info('sleep %s sec' % slp)
 67 |             user_count += 1
 68 |             cur_id = idlist.pop()
 69 |             continue
 70 |         u_profile = user_profile(cur_id)
 71 |         # db_userinfo.insert(u_profile)
 72 |         followlist = user_follows(cur_id)
 73 |         fanlist = user_fans(cur_id)
 74 |         u_profile['follows'] = followlist
 75 |         u_profile['fans'] = fanlist
 76 |         followids = []
 77 |         fanids = []
 78 |         for userinfo in followlist:
 79 |             int_id = userinfo['userId']
 80 |             followids.append(int_id)
 81 |             idlist.add(int_id)
 82 |         for userinfo in fanlist:
 83 |             int_id = userinfo['userId']
 84 |             fanids.append(int_id)
 85 |             idlist.add(int_id)
 86 |         u_profile['follow_ids'] = followids
 87 |         u_profile['follow_count'] = len(followids)
 88 |         u_profile['fan_ids'] = fanids
 89 |         u_profile['fan_count'] = len(fanids)
 90 |         DAO_inst.save_unique_item(u_profile)
 91 |         data_process_logger.info('[OK] No.%s User %s, nickname = %s ok! %s users left' % (
 92 |             user_count, cur_id, u_profile['nickname'], len(idlist)))
 93 |         slp = random.random() * 2 + 1
 94 |         data_process_logger.info('sleep %s sec' % slp)
 95 |         time.sleep(slp)
 96 |         cur_id = idlist.pop()
 97 |         user_count += 1
 98 |         # result_count = db_userinfo.find({'userId': cur_id}).count()
 99 |     print 'done'
100 | 
101 | 
102 | def fetch_playlist(max_user_count=100):
103 |     """
104 |     进行用户歌单的抓取,同时更新UserInfos、SongInfos和Plyalists三个数据库的信息
105 |     Args:
106 |         max_user_count: 最大抓取的用户数
107 | 
108 |     Returns:
109 |         无
110 |     """
111 |     user_dao_inst = CloudMusicDAO('MusicTaster', 'UserInfos')
112 |     playlist_dao_inst = CloudMusicDAO('MusicTaster', 'Playlists')
113 |     song_dao_inst = CloudMusicDAO('MusicTaster', 'SongInfos')
114 |     userid_list = user_dao_inst.db_inst.find({"playlists": {'$exists': False}}).distinct('userId')
115 |     # random.shuffle(userid_list)
116 |     count = 0
117 |     for uid in userid_list[:max_user_count]:
118 |         # count = 0
119 |         userinfo = user_dao_inst.db_inst.find_one({"userId": uid})
120 |         # fetch playlist ids
121 |         user_playlists = user_playlist(uid, limit=2000)
122 |         data_process_logger.info(
123 |             'processing the playlist of %s\nTotal playlist = %s' % (userinfo['nickname'], len(user_playlists)))
124 |         if len(user_playlists):
125 |             for i in range(len(user_playlists)):
126 |                 pl_info = user_playlists[i]
127 |                 data_process_logger.info(
128 |                     'processing %s No.%s playlist: %s, total song: %s' % (
129 |                         userinfo['nickname'], i, pl_info['name'], pl_info['trackCount']))
130 |                 # fetch playlist details
131 |                 # 首先查看是否在数据库中有
132 |                 pl_obj = playlist_dao_inst.db_inst.find_one({'id': pl_info['id']})
133 |                 if not pl_obj:
134 |                     try:
135 |                         pl_obj = playlist_detail(pl_info['id'])
136 |                         pl_song_ids = []
137 |                         if pl_obj != -1:
138 |                             for song in pl_obj['tracks']:
139 |                                 song_dao_inst.save_unique_item(song, primary_key='id')
140 |                                 pl_song_ids.append(song['id'])
141 |                             # 在playlist中保存track信息,只保存编号
142 |                             user_playlists[i]['tracks_ids'] = pl_song_ids
143 |                             pl_obj['tracks_ids'] = pl_song_ids
144 |                             playlist_dao_inst.save_unique_item(pl_obj, primary_key='id', is_inform=True)
145 |                             slp = random.random() * 2 + 1
146 |                             # data_process_logger.info('sleep %s sec' % slp)
147 |                             time.sleep(slp)
148 |                         else:
149 |                             data_process_logger.error('cannot fetch %s %s' % (pl_info['id'], pl_info['name']))
150 |                     except Exception, e:
151 |                         print e
152 |                 else:
153 |                     user_playlists[i]['tracks_ids'] = pl_obj['tracks_ids']
154 | 
155 |         # 在userinfo中保存playlist信息
156 |         userinfo['playlists'] = user_playlists
157 |         user_dao_inst.save_unique_item(userinfo, primary_key='userId', is_overwrite=True, is_inform=True)
158 |         data_process_logger.info('No.%s %s playlist handled!' % (count, userinfo['nickname']))
159 |         slp = random.random() * 2 + 1
160 |         data_process_logger.info('sleep %s sec' % slp)
161 |         time.sleep(slp)
162 |         count += 1
163 |     print 'done'
164 | 
165 | 
166 | def get_user_playlist(uid):
167 |     user_dao_inst = CloudMusicDAO('MusicTaster', 'UserInfos')
168 |     playlist_dao_inst = CloudMusicDAO('MusicTaster', 'Playlists')
169 |     song_dao_inst = CloudMusicDAO('MusicTaster', 'SongInfos')
170 |     # count = 0
171 |     userinfo = user_dao_inst.db_inst.find_one({"userId": uid})
172 |     # fetch playlist ids
173 |     user_playlists = user_playlist(uid, limit=2000)
174 |     data_process_logger.info(
175 |         'processing the playlist of %s\nTotal playlist = %s' % (userinfo['nickname'], len(user_playlists)))
176 |     if len(user_playlists):
177 |         for i in range(len(user_playlists)):
178 |             pl_info = user_playlists[i]
179 |             data_process_logger.info(
180 |                 'processing %s No.%s playlist: %s, total song: %s' % (
181 |                     userinfo['nickname'], i, pl_info['name'], pl_info['trackCount']))
182 |             # fetch playlist details
183 |             # 首先查看是否在数据库中有
184 |             pl_obj = playlist_dao_inst.db_inst.find_one({'id': pl_info['id']})
185 |             if not pl_obj:
186 |                 try:
187 |                     pl_obj = playlist_detail(pl_info['id'])
188 |                     pl_song_ids = []
189 |                     if pl_obj != -1:
190 |                         for song in pl_obj['tracks']:
191 |                             song_dao_inst.save_unique_item(song, primary_key='id')
192 |                             pl_song_ids.append(song['id'])
193 |                         # 在playlist中保存track信息,只保存编号
194 |                         user_playlists[i]['tracks_ids'] = pl_song_ids
195 |                         pl_obj['tracks_ids'] = pl_song_ids
196 |                         playlist_dao_inst.save_unique_item(pl_obj, primary_key='id', is_inform=True)
197 |                         slp = random.random() * 2 + 1
198 |                         # data_process_logger.info('sleep %s sec' % slp)
199 |                         time.sleep(slp)
200 |                     else:
201 |                         data_process_logger.error('cannot fetch %s %s' % (pl_info['id'], pl_info['name']))
202 |                 except Exception, e:
203 |                     print e
204 |             else:
205 |                 user_playlists[i]['tracks_ids'] = pl_obj['tracks_ids']
206 | 
207 |     # 在userinfo中保存playlist信息
208 |     userinfo['playlists'] = user_playlists
209 |     user_dao_inst.save_unique_item(userinfo, primary_key='userId', is_overwrite=True, is_inform=True)
210 |     data_process_logger.info('%s playlist handled!' % (userinfo['nickname']))
211 | 
212 | 
213 | if __name__ == '__main__':
214 |     # login_user_info = fetch_login_userdata('', '')
215 |     # start_id = login_user_info['profile']['userId']
216 |     tmp_id = 2886507
217 |     # get_user_playlist(tmp_id)
218 |     # fill_song_comments()
219 |     # fetch_user_networks()
220 |     fetch_playlist(max_user_count=1000)
221 |     # update_userinfo()
222 |     # test()
223 | 


--------------------------------------------------------------------------------
/utils/cloudmusic_api.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import hashlib
  3 | import json
  4 | import random
  5 | from urllib import urlencode
  6 | 
  7 | import requests
  8 | 
  9 | from utils.encrypt_utils import encrypted_request
 10 | from utils.logger_utils import data_process_logger
 11 | 
 12 | """
 13 | Created by jayvee on 16/12/14.
 14 | 主要用于不加处理地调用云音乐的API,返回数据均为remote端原始数据
 15 | """
 16 | 
 17 | # config_infos = get_config()
 18 | # csrf_token = config_infos['csrf_token']
 19 | 
 20 | header = {
 21 |     'Accept': '*/*',
 22 |     'Accept-Encoding': 'gzip,deflate,sdch',
 23 |     'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
 24 |     'Connection': 'keep-alive',
 25 |     'Content-Type': 'application/x-www-form-urlencoded',
 26 |     'Host': 'music.163.com',
 27 |     'Referer': 'http://music.163.com/',
 28 |     'User-Agent':
 29 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36'
 30 | }
 31 | cookies = {'appver': '1.5.2'}
 32 | 
 33 | # proxylist = [{}, {'http': '138.197.118.48:80'}, {'http': '104.196.224.28:80'}]
 34 | proxylist = [None]
 35 | 
 36 | retry_times = 3
 37 | 
 38 | 
 39 | def user_login(username, password):
 40 |     """
 41 |     用户登录api(手机登录)
 42 | 
 43 |     Args:
 44 |         username: 用户账号,手机号
 45 |         password: 密码
 46 |     Returns:
 47 |         result: a json obj of user data
 48 |     """
 49 |     base_url = 'https://music.163.com/weapi/login/cellphone'
 50 |     login_url = 'https://music.163.com/weapi/login/'
 51 |     password = hashlib.md5(password).hexdigest()
 52 |     text = {
 53 |         'phone': username,
 54 |         'password': password,
 55 |         'rememberLogin': 'true'
 56 |     }
 57 |     data = encrypted_request(text)
 58 |     # s = requests.session()
 59 |     # s.headers = header
 60 |     try:
 61 |         res = requests.post(base_url, data=data, headers=header, proxies=random.choice(proxylist)).content
 62 |         result = json.loads(res)
 63 |         return result
 64 |     except Exception, e:
 65 |         data_process_logger.error('%s login failed, reason = %s' % (username, e))
 66 |         return -1
 67 | 
 68 | 
 69 | def playlist_detail(playlist_id, limit=1000):
 70 |     """
 71 |     根据歌单id获取歌单的详情
 72 | 
 73 |     Args:
 74 |         playlist_id:
 75 |         limit:最大歌曲数为1000
 76 |     """
 77 |     for i in range(retry_times):
 78 |         try:
 79 |             base_url = 'http://music.163.com/api/playlist/detail?id=%s&limit=%s' % (playlist_id, limit)
 80 |             res = requests.get(base_url, headers=header).content
 81 |             # print res
 82 |             jsonobj = json.loads(res)
 83 |             if jsonobj['code'] == 200:
 84 |                 return json.loads(res)['result']
 85 |             else:
 86 |                 data_process_logger.error('error! result = %s' % res)
 87 |         except Exception, e:
 88 |             data_process_logger.error('%s playlist failed, reason = %s' % (playlist_id, e))
 89 |             data_process_logger.warn('%s playlist retrying...' % (playlist_id))
 90 |             continue
 91 |     return -1
 92 | 
 93 | 
 94 | def user_playlist(uid, offset=0, limit=1000):
 95 |     """
 96 |     根据用户id获取用户歌单编号
 97 |     :param uid:
 98 |     :param offset:
 99 |     :param limit:
100 |     :return:
101 |     """
102 |     base_url = 'http://music.163.com/api/user/playlist/?offset=%s&limit=%s&uid=%s' % (offset, limit, uid)
103 |     # data = {'offset': offset, 'limit': limit, 'uid': uid}
104 |     for i in range(retry_times):
105 |         try:
106 |             # data = urlencode(data)
107 |             res = requests.get(base_url, headers=header).content
108 |             data = json.loads(res)
109 |             return data['playlist']
110 |         except (requests.exceptions.RequestException, KeyError) as e:
111 |             data_process_logger.error(e)
112 |             data_process_logger.warn('retrying...')
113 |     return -1
114 | 
115 | 
116 | def song_detail(song_ids, offset=0):
117 |     """
118 |     根据歌曲id列表批量获取歌曲详情
119 |     :param song_ids:
120 |     :return:
121 |     """
122 |     tmpids = song_ids[offset:]
123 |     tmpids = tmpids[0:100]
124 |     tmpids = list(map(str, tmpids))
125 |     base_url = 'http://music.163.com/api/song/detail?ids=[%s]' % (  # NOQA
126 |         ','.join(tmpids))
127 |     for i in range(retry_times):
128 |         try:
129 |             data = json.loads(requests.get(base_url).content)
130 |             # the order of data['songs'] is no longer the same as tmpids,
131 |             # so just make the order back
132 |             data['songs'].sort(key=lambda song: tmpids.index(str(song['id'])))
133 |             return data['songs']
134 |         except requests.exceptions.RequestException as e:
135 |             data_process_logger.error(e)
136 |             data_process_logger.warn('retrying...')
137 |     return []
138 | 
139 | 
140 | def user_profile(uid):
141 |     """
142 |     根据uid获取用户详情(通过歌单列表间接获取creator信息)
143 |     :param uid:
144 |     :return:
145 |     """
146 |     # upl = user_playlist(uid, limit=1)
147 |     base_url = 'http://music.163.com/api/user/playlist/?offset=%s&limit=%s&uid=%s' % (0, 0, uid)
148 |     # data = {'offset': offset, 'limit': limit, 'uid': uid}
149 |     for i in range(retry_times):
150 |         try:
151 |             # data = urlencode(data)
152 |             res = requests.get(base_url, headers=header, proxies=random.choice(proxylist)).content
153 |             data = json.loads(res)
154 |             return data['playlist'][0]['creator']
155 |         except (requests.exceptions.RequestException, KeyError) as e:
156 |             data_process_logger.error(e)
157 |             data_process_logger.warn('retrying...')
158 |             continue
159 |     return -1
160 | 
161 | 
162 | def user_follows(uid):
163 |     """
164 |     根据uid获取用户关注列表
165 |     :param uid:
166 |     :return:
167 |     """
168 |     # upl = user_playlist(uid, limit=1)
169 |     base_url = 'http://music.163.com/weapi/user/getfollows/%s' % (uid)
170 |     # data = {'offset': offset, 'limit': limit, 'uid': uid}
171 |     for i in range(retry_times):
172 |         try:
173 |             # data = urlencode(data)
174 |             text = {
175 |                 'limit': 1000, 'offset': 0
176 |             }
177 |             data = encrypted_request(text)
178 |             res = requests.post(base_url, data=data, headers=header, proxies=random.choice(proxylist)).content
179 |             data = json.loads(res)
180 |             return data['follow']
181 |         except (requests.exceptions.RequestException, KeyError) as e:
182 |             data_process_logger.error(e)
183 |             data_process_logger.warn('retrying...')
184 |             continue
185 |     return -1
186 | 
187 | 
188 | def user_fans(uid):
189 |     """
190 |     根据uid获取用户粉丝列表
191 |     :param uid:
192 |     :return:
193 |     """
194 |     # upl = user_playlist(uid, limit=1)
195 |     base_url = 'http://music.163.com/weapi/user/getfolloweds/'
196 |     # data = {'offset': offset, 'limit': limit, 'uid': uid}
197 |     for i in range(retry_times):
198 |         try:
199 |             # data = urlencode(data)
200 |             text = {
201 |                 'userId': uid,
202 |                 'limit': 1000, 'offset': 0
203 |             }
204 |             data = encrypted_request(text)
205 |             res = requests.post(base_url, data=data, headers=header, proxies=random.choice(proxylist)).content
206 |             data = json.loads(res)
207 |             return data['followeds']
208 |         except (requests.exceptions.RequestException, KeyError) as e:
209 |             data_process_logger.error(e)
210 |             data_process_logger.warn('retrying...')
211 |             continue
212 |     return -1
213 | 
214 | 
215 | def song_comments(commentThreadId, limit=10, offset=0):
216 |     """
217 |     根据commentThreadId获取歌曲评论
218 | 
219 |     """
220 |     # upl = user_playlist(uid, limit=1)
221 |     base_url = 'http://music.163.com/weapi/v1/resource/comments/%s/' % commentThreadId
222 |     # data = {'offset': offset, 'limit': limit, 'uid': uid}
223 |     for i in range(retry_times):
224 |         try:
225 |             # data = urlencode(data)
226 |             text = {
227 |                 'limit': limit, 'offset': offset
228 |             }
229 |             data = encrypted_request(text)
230 |             res = requests.post(base_url, data=data, headers=header, proxies=random.choice(proxylist)).content
231 |             data = json.loads(res)
232 |             if data['code'] == 200:
233 |                 return data
234 |             else:
235 |                 print 'error, details = %s' % data
236 |         except (requests.exceptions.RequestException, KeyError) as e:
237 |             data_process_logger.error(e)
238 |             data_process_logger.warn('retrying...')
239 |             continue
240 |     return -1
241 | 
242 | 
243 | def search_web(s_name, type, limit=10):
244 |     """
245 |     网页搜索api
246 |     :param s_name: 搜索词
247 |     :param type: 搜索类型:1 单曲;10 专辑;100 歌手;1000 歌单;1002 用户
248 |     :param limit:
249 |     :return:
250 |     """
251 |     data = {"s": s_name, "type": type, "limit": limit}
252 |     search_url = 'http://music.163.com/api/search/get/web'
253 |     for i in range(retry_times):
254 |         try:
255 |             d = urlencode(data)
256 |             res = requests.post(search_url, d, headers=header, proxies=random.choice(proxylist)).content
257 |             data = json.loads(res)
258 |             return data
259 |         except (requests.exceptions.RequestException, KeyError) as e:
260 |             data_process_logger.error(e)
261 |             data_process_logger.warn('retrying...')
262 |     return -1
263 | 
264 | 
265 | if __name__ == '__main__':
266 |     pass
267 |     comms = song_comments('R_SO_4_26612932')
268 |     print 'd'
269 |     # a = playlist_detail(326069502, limit=500)
270 |     # print a
271 |     # b = user_playlist('2886507', limit=1000)
272 |     # print b
273 |     # c = song_detail(['37239018', '23'])
274 |     # d = user_profile('2886507')
275 |     # f = user_follows('2886507')
276 |     # fans = user_fans('2886507')
277 |     # i = user_infos('2886507')
278 |     # print search_web('jayvee he', '1002', 10)
279 | 


--------------------------------------------------------------------------------
/song2vec/song2vec_operator.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | """
  4 | Created by jayvee on 17/2/22.
  5 | https://github.com/JayveeHe
  6 | """
  7 | import pickle
  8 | 
  9 | import cPickle
 10 | from gensim import matutils
 11 | from gensim.models.word2vec_inner import REAL
 12 | from numpy.core.multiarray import ndarray, array, dot
 13 | from sklearn.cluster import AffinityPropagation
 14 | 
 15 | from utils.cloudmusic_api import playlist_detail
 16 | from utils.logger_utils import data_process_logger
 17 | 
 18 | 
 19 | class Song2VecOperator:
 20 |     def __init__(self, song2vec_model_path=None, artist2vec_model_path=None):
 21 |         """
 22 |         初始化,需要填入两种模型的地址
 23 |         Args:
 24 |             song2vec_model_path:
 25 |             artist2vec_model_path:
 26 |         """
 27 |         try:
 28 |             if song2vec_model_path:
 29 |                 with open(song2vec_model_path, 'rb') as s2v_file:
 30 |                     self.song2vec_model = cPickle.load(s2v_file)
 31 |                     print self.song2vec_model.estimate_memory()
 32 |             if artist2vec_model_path:
 33 |                 with open(artist2vec_model_path, 'rb') as a2v_file:
 34 |                     self.artist2vec_model = cPickle.load(a2v_file)
 35 |                     print self.artist2vec_model.estimate_memory()
 36 |             self.song2vec_model.init_sims()
 37 |             self.artist2vec_model.init_sims()
 38 |         except IOError, ioe:
 39 |             print '%s' % ioe
 40 | 
 41 |     def calc_song_similar(self, positive_songs=[], negative_songs=[],
 42 |                           positive_artists=[], negative_artists=[],
 43 |                           song_weight=1.0, artist_weight=1.5,
 44 |                           topn=10, restrict_vocab=None):
 45 |         """
 46 |         计算歌曲和歌手的加减相似度,求出最近似的歌曲top n
 47 |         Args:
 48 |             topn:
 49 |             restrict_vocab:
 50 |             artist_weight:
 51 |             song_weight:
 52 |             positive_songs:
 53 |             negative_songs:
 54 |             positive_artists:
 55 |             negative_artists:
 56 | 
 57 |         Returns:
 58 | 
 59 |         """
 60 |         try:
 61 |             positive_songs = [(word, song_weight) for word in positive_songs]
 62 |             negative_songs = [(word, -song_weight) for word in negative_songs]
 63 |             positive_artists = [(word, artist_weight) for word in positive_artists]
 64 |             negative_artists = [(word, -artist_weight) for word in negative_artists]
 65 |             all_words, mean = set(), []
 66 |             if positive_songs + negative_songs:
 67 |                 for song, weight in positive_songs + negative_songs:
 68 |                     song = song.strip()
 69 |                     if isinstance(song, ndarray):
 70 |                         mean.append(weight * song)
 71 |                     elif song in self.song2vec_model.vocab:
 72 |                         mean.append(weight * self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index])
 73 |                         all_words.add(self.song2vec_model.vocab[song].index)
 74 |                     else:
 75 |                         raise KeyError("song '%s' not in vocabulary" % song)
 76 |             # limited = self.song2vec_model.syn0norm if restrict_vocab is None \
 77 |             #     else self.song2vec_model.syn0norm[:restrict_vocab]
 78 |             if positive_artists + negative_artists:
 79 |                 for artist, weight in positive_artists + negative_artists:
 80 |                     if isinstance(word, ndarray):
 81 |                         mean.append(weight * artist)
 82 |                     elif word in self.artist2vec_model.vocab:
 83 |                         mean.append(weight * self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index])
 84 |                         all_words.add(self.artist2vec_model.vocab[artist].index)
 85 |                     else:
 86 |                         raise KeyError("artist '%s' not in vocabulary" % artist)
 87 |             if not mean:
 88 |                 raise ValueError("cannot compute similarity with no input")
 89 |             mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
 90 |             limited = self.song2vec_model.syn0norm if restrict_vocab is None \
 91 |                 else self.song2vec_model.syn0norm[:restrict_vocab]
 92 |             # limited += self.artist2vec_model.syn0norm if restrict_vocab is None \
 93 |             #     else self.artist2vec_model.syn0norm[:restrict_vocab]
 94 |             dists = dot(limited, mean)
 95 |             if not topn:
 96 |                 return dists
 97 |             best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
 98 |             # ignore (don't return) words from the input
 99 |             result = [(self.song2vec_model.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
100 |             return result[:topn]
101 |         except Exception, e:
102 |             print 'error = %s' % e
103 |             raise e
104 | 
105 |     def calc_artist_similar(self, positive_songs=[], negative_songs=[],
106 |                             positive_artists=[], negative_artists=[],
107 |                             song_weight=1.0, artist_weight=1.5,
108 |                             topn=10, restrict_vocab=None):
109 |         """
110 |         计算歌曲和歌手的加减相似度,求出最近似的歌手top n
111 |         Args:
112 |             topn:
113 |             restrict_vocab:
114 |             artist_weight:
115 |             song_weight:
116 |             positive_songs:
117 |             negative_songs:
118 |             positive_artists:
119 |             negative_artists:
120 | 
121 |         Returns:
122 | 
123 |         """
124 |         try:
125 |             positive_songs = [(word, song_weight) for word in positive_songs]
126 |             negative_songs = [(word, -song_weight) for word in negative_songs]
127 |             positive_artists = [(word, artist_weight) for word in positive_artists]
128 |             negative_artists = [(word, -artist_weight) for word in negative_artists]
129 |             all_words, mean = set(), []
130 |             if positive_songs + negative_songs:
131 |                 for song, weight in positive_songs + negative_songs:
132 |                     if isinstance(song, ndarray):
133 |                         mean.append(weight * song)
134 |                     elif song in self.song2vec_model.vocab:
135 |                         mean.append(weight * self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index])
136 |                         all_words.add(self.song2vec_model.vocab[song].index)
137 |                     else:
138 |                         raise KeyError("song '%s' not in vocabulary" % song)
139 |             # limited = self.song2vec_model.syn0norm if restrict_vocab is None \
140 |             #     else self.song2vec_model.syn0norm[:restrict_vocab]
141 |             if positive_artists + negative_artists:
142 |                 for artist, weight in positive_artists + negative_artists:
143 |                     if isinstance(word, ndarray):
144 |                         mean.append(weight * artist)
145 |                     elif word in self.artist2vec_model.vocab:
146 |                         mean.append(weight * self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index])
147 |                         all_words.add(self.artist2vec_model.vocab[artist].index)
148 |                     else:
149 |                         raise KeyError("artist '%s' not in vocabulary" % artist)
150 |             if not mean:
151 |                 raise ValueError("cannot compute similarity with no input")
152 |             mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
153 |             limited = self.artist2vec_model.syn0norm if restrict_vocab is None \
154 |                 else self.artist2vec_model.syn0norm[:restrict_vocab]
155 |             # limited += self.artist2vec_model.syn0norm if restrict_vocab is None \
156 |             #     else self.artist2vec_model.syn0norm[:restrict_vocab]
157 |             dists = dot(limited, mean)
158 |             if not topn:
159 |                 return dists
160 |             best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
161 |             # ignore (don't return) words from the input
162 |             result = [(self.artist2vec_model.index2word[sim], float(dists[sim])) for sim in best if
163 |                       sim not in all_words]
164 |             return result[:topn]
165 |         except Exception, e:
166 |             print 'error = %s' % e
167 |             raise e
168 | 
169 |     def cluster_song_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
170 |         """
171 |         获取单个歌单内的歌曲聚类信息
172 |         Args:
173 |             playlist_id: 歌单id
174 |             cluster_n:聚类数
175 |             is_detailed: 返回的结果是否包含详情
176 | 
177 |         Returns:
178 |             聚类后的列表
179 |         """
180 |         playlist_obj = playlist_detail(playlist_id)
181 |         song_list = []
182 |         vec_list = []
183 |         song_info_dict = {}
184 |         ap_cluster = AffinityPropagation()
185 |         data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
186 |         for item in playlist_obj['tracks']:
187 |             song = item['name'].lower()
188 |             song_info_dict[song] = {
189 |                 'name': song,
190 |                 'artist': item['artists'][0]['name'],
191 |                 'id': item['id'],
192 |                 'album_img_url': item['album']['picUrl'],
193 |                 'site_url': 'http://music.163.com/#/song?id=%s' % item['id']
194 |             }
195 |             # print song
196 |             if song not in song_list:
197 |                 song_list.append(song)
198 |                 # print self.song2vec_model.vocab.get(song)
199 |                 # print self.song2vec_model.syn0norm == None
200 |                 if self.song2vec_model.vocab.get(song) and len(self.song2vec_model.syn0norm):
201 |                     song_vec = self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index]
202 |                 else:
203 |                     data_process_logger.warn(
204 |                         'The song %s of playlist-%s is not in dataset' % (song, playlist_obj['name']))
205 |                     song_vec = [0 for i in range(self.song2vec_model.vector_size)]
206 |                 vec_list.append(song_vec)
207 |         # song_list = list(song_list)
208 |         if len(vec_list) > 1:
209 |             cluster_result = ap_cluster.fit(vec_list, song_list)
210 |             cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
211 |             for i in range(len(cluster_result.labels_)):
212 |                 label = cluster_result.labels_[i]
213 |                 index = i
214 |                 cluster_array[label].append(song_list[i])
215 |             return cluster_array, playlist_obj['name'], song_info_dict
216 |         else:
217 |             return [song_list], playlist_obj['name'], song_info_dict
218 | 
219 |     def cluster_artist_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
220 |         """
221 |         获取单个歌单内的歌手聚类信息
222 |         Args:
223 |             playlist_id: 歌单id
224 |             cluster_n:聚类数
225 |             is_detailed: 是否包含详情信息
226 | 
227 |         Returns:
228 |             聚类后的列表
229 |         """
230 |         playlist_obj = playlist_detail(playlist_id)
231 |         artist_list = []
232 |         vec_list = []
233 |         ap_cluster = AffinityPropagation()
234 |         data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
235 |         for item in playlist_obj['tracks']:
236 |             artist = item['artists'][0]['name'].lower()
237 |             # print artist
238 |             if artist not in artist_list:
239 |                 artist_list.append(artist)
240 |                 # print self.song2vec_model.vocab.get(artist)
241 |                 # print self.song2vec_model.syn0norm == None
242 |                 if self.artist2vec_model.vocab.get(artist) and len(self.artist2vec_model.syn0norm):
243 |                     artist_vec = self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index]
244 |                 else:
245 |                     data_process_logger.warn(
246 |                         'The artist %s of playlist-%s is not in dataset' % (artist, playlist_obj['name']))
247 |                     artist_vec = [0 for i in range(self.artist2vec_model.vector_size)]
248 |                 vec_list.append(artist_vec)
249 |         # artist_list = list(artist_list)
250 |         # vec_list = list(vec_list)
251 |         if len(vec_list) > 1:
252 |             cluster_result = ap_cluster.fit(vec_list, artist_list)
253 |             cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
254 |             for i in range(len(cluster_result.labels_)):
255 |                 label = cluster_result.labels_[i]
256 |                 index = i
257 |                 cluster_array[label].append(artist_list[i])
258 |             return cluster_array, playlist_obj['name'], {}
259 |         else:
260 |             return [artist_list], playlist_obj['name'], {}
261 | 
262 | 
263 | if __name__ == '__main__':
264 |     s2vo = Song2VecOperator(song2vec_model_path='../datas/[full]50d_20iter_10win_5min_song2vec.model',
265 |                             artist2vec_model_path='../datas/[full]50d_20iter_10win_5min_artist2vec.model')
266 |     # res = s2vo.calc_song_artist_similar(positive_songs=[u'time machine', u'yellow', u'viva la vida'],
267 |     #                                     negative_songs=[],
268 |     #                                     positive_artists=[],
269 |     #                                     negative_artists=[],
270 |     #                                     artist_weight=1.0, topn=20)
271 |     # for i in res:
272 |     #     print i[0], i[1]
273 |     s2vo.cluster_song_in_playlist('3659853')
274 |     # s2vo.cluster_artist_in_playlist('3659853')
275 | 


--------------------------------------------------------------------------------
/song2vec/rock_gensim.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | """
  4 | Created by jayvee on 17/2/16.
  5 | https://github.com/JayveeHe
  6 | """
  7 | import random
  8 | 
  9 | import gensim
 10 | import pickle
 11 | from gensim import corpora
 12 | from gensim.models import word2vec
 13 | 
 14 | from utils.cloudmusic_dao import CloudMusicDAO
 15 | from utils.logger_utils import data_process_logger
 16 | 
 17 | 
 18 | def prepare_song_dict(tag=''):
 19 |     """
 20 |     从数据库中遍历歌单,准备song2vec的训练数据
 21 |     Args:
 22 |         tag: 备注tag信息
 23 | 
 24 |     Returns:
 25 | 
 26 |     """
 27 |     playlist_dao_inst = CloudMusicDAO('MusicTaster', 'Playlists')
 28 |     print playlist_dao_inst.db_inst.find(
 29 |         {'trackCount': {'$gte': 3, '$lte': 1000}, 'playCount': {'$gte': 1}},
 30 |         {'tracks': 1, 'name': 1}).limit(100000).count()
 31 |     find_result = playlist_dao_inst.db_inst.find(
 32 |         {'trackCount': {'$gte': 3, '$lte': 1000}, 'playCount': {'$gte': 1}},
 33 |         {'tracks': 1, 'name': 1}).limit(100000)
 34 |     # 将歌单中的歌曲名组合成歌曲名序列
 35 |     total_song_set = []
 36 |     count = 0
 37 |     for item in find_result:
 38 |         data_process_logger.info('No.%s %s' % (count, item['name']))
 39 |         # 保存歌单中的歌曲序列
 40 |         song_seq = []
 41 |         for song in item['tracks']:
 42 |             sname = song['name']
 43 |             song_seq.append(sname.lower())
 44 |         total_song_set.append(song_seq)
 45 |         count += 1
 46 |     data_process_logger.info('start building dictionary')
 47 |     song_dictionary = corpora.Dictionary(total_song_set)
 48 |     print u'歌单数', song_dictionary.num_docs
 49 |     print u'歌曲数', song_dictionary.num_pos
 50 |     data_process_logger.info('start saving datas')
 51 |     song_dictionary.save('../datas/song_dictionary_%s.dict' % tag)
 52 |     pickle.dump(total_song_set, open('../datas/songs_seq_%s.dat' % tag, 'wb'))
 53 |     return song_dictionary
 54 | 
 55 | 
 56 | def train_song2vec_model(fout_path, input_datas=None, data_path=None, min_count=5, sorted_vocab=1, window=10, size=250,
 57 |                          iter_n=50):
 58 |     """
 59 |     训练song2vec模型
 60 |     Args:
 61 |         fout_path:
 62 |         input_datas:
 63 |         data_path:
 64 |         min_count:
 65 |         sorted_vocab:
 66 |         window:
 67 |         size:
 68 |         iter_n:
 69 | 
 70 |     Returns:
 71 | 
 72 |     """
 73 |     if not input_datas and data_path:
 74 |         input_datas = pickle.load(open(data_path, 'rb'))
 75 |     data_process_logger.info('start training')
 76 |     random.shuffle(input_datas)
 77 |     input_datas = input_datas[:45000]
 78 |     wv_model = gensim.models.Word2Vec(input_datas, min_count=min_count, sorted_vocab=sorted_vocab, window=window,
 79 |                                       size=size, iter=iter_n)
 80 |     with open(fout_path, 'wb') as fout:
 81 |         data_process_logger.info('start saving model')
 82 |         pickle.dump(wv_model, fout)
 83 |         print 'model saved'
 84 | 
 85 | 
 86 | def prepare_artist_dict(tag=''):
 87 |     playlist_dao_inst = CloudMusicDAO('MusicTaster', 'Playlists')
 88 |     # print playlist_dao_inst.db_inst.find(
 89 |     #     {'trackCount': {'$gte': 10, '$lte': 600}, 'playCount': {'$gte': 10}},
 90 |     #     {'name': 1}).limit(100000).count()
 91 |     find_result = playlist_dao_inst.db_inst.find(
 92 |         {'trackCount': {'$gte': 10, '$lte': 600}, 'playCount': {'$gte': 5}},
 93 |         {'tracks': 1, 'name': 1}).limit(100000)
 94 |     # 将歌单中的歌曲名组合成歌曲名序列
 95 |     total_artists_set = []
 96 |     count = 0
 97 |     for item in find_result:
 98 |         data_process_logger.info('No.%s %s' % (count, item['name']))
 99 |         # 保存歌单中的歌曲序列
100 |         artists_seq = []
101 |         for song in item['tracks']:
102 |             sname = song['artists'][0]['name']
103 |             artists_seq.append(sname.lower())
104 |         total_artists_set.append(artists_seq)
105 |         count += 1
106 |     data_process_logger.info('start building dictionary')
107 |     artist_dictionary = corpora.Dictionary(total_artists_set)
108 |     print u'歌单数', artist_dictionary.num_docs
109 |     try:
110 |         print u'歌手数', len(artist_dictionary.token2id)
111 |     except Exception, e:
112 |         print 'error = %s' % e
113 |     data_process_logger.info('start saving datas')
114 |     artist_dictionary.save('../datas/artists_dictionary_%s.dict' % tag)
115 |     pickle.dump(total_artists_set, open('../datas/artists_seq_%s.dat' % tag, 'wb'))
116 |     return artist_dictionary
117 | 
118 | 
119 | def train_artist2vec_model(fout_path, input_datas=None, data_path=None, min_count=5, sorted_vocab=1, window=10,
120 |                            size=250,
121 |                            iter_n=50):
122 |     if not input_datas and data_path:
123 |         input_datas = pickle.load(open(data_path, 'rb'))
124 |     data_process_logger.info('start training')
125 |     wv_model = gensim.models.Word2Vec(input_datas, min_count=min_count, sorted_vocab=sorted_vocab, window=window,
126 |                                       size=size, iter=iter_n)
127 |     with open(fout_path, 'wb') as fout:
128 |         data_process_logger.info('start saving model')
129 |         pickle.dump(wv_model, fout)
130 |         print 'model saved'
131 | 
132 | 
133 | def train_artistsong2vec_model(fout_path, input_datas=None, data_path=None,
134 |                                min_count=5, sorted_vocab=1, window=10,
135 |                                size=250,
136 |                                iter_n=50):
137 |     if not input_datas and data_path:
138 |         input_datas = pickle.load(open(data_path, 'rb'))
139 |     full_data = []
140 |     for i in input_datas:
141 |         tmp = []
142 |         for j in i:
143 |             tmp.append(j[0])
144 |             tmp.append(j[1])
145 |         full_data.append(tmp)
146 |     data_process_logger.info('start training')
147 |     wv_model = gensim.models.Word2Vec(full_data, min_count=min_count, sorted_vocab=sorted_vocab, window=window,
148 |                                       size=size, iter=iter_n)
149 |     with open(fout_path, 'wb') as fout:
150 |         data_process_logger.info('start saving model')
151 |         pickle.dump(wv_model, fout)
152 |         print 'model saved'
153 | 
154 | 
155 | def test_song2vec():
156 |     tag = 'full'
157 |     # prepare_song_dict(tag=tag)
158 |     min_count = 5
159 |     sorted_vocab = 1
160 |     window = 10
161 |     size = 50
162 |     iter_n = 20
163 |     modelpath = '../datas/[%s_reduced]%sd_%siter_%swin_%smin_song2vec.model' % (tag, size, iter_n, window, min_count)
164 |     train_song2vec_model(fout_path=modelpath, data_path='../datas/songs_seq_%s.dat' % tag,
165 |                          min_count=min_count,
166 |                          sorted_vocab=sorted_vocab, window=window,
167 |                          size=size, iter_n=iter_n)
168 |     print 'model params:\tag: %s\tnmin: %s\twin: %s\tsize: %s\titer_n: %s' % (tag, min_count, window, size, iter_n)
169 |     with open(modelpath, 'rb') as fin:
170 |         data_process_logger.info('loading')
171 |         m = pickle.load(fin)
172 |         data_process_logger.info('start predicting')
173 |         s1, s2 = u'半岛铁盒', u'成都'.lower()
174 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
175 |         s1, s2 = u'viva la vida', u'yellow'
176 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
177 |         s1, s2 = u'夜空中最亮的星', u'南山南'
178 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
179 |         s1, s2 = u'photograph', u'need you now'
180 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
181 |         print '---------------'
182 |         tsong = u'告白气球'
183 |         print u'%s 最相似的歌曲:' % tsong
184 |         for i in m.most_similar(tsong, topn=20):
185 |             print i[0], i[1]
186 |         print '---------------'
187 |         tsong = u'晴天'.lower()
188 |         print u'%s 最相似的歌曲:' % tsong
189 |         for i in m.most_similar(tsong, topn=20):
190 |             print i[0], i[1]
191 |         print '---------------'
192 |         tsong = u'are you ok'.lower()
193 |         print u'%s 最相似的歌曲:' % tsong
194 |         for i in m.most_similar(tsong, topn=20):
195 |             print i[0], i[1]
196 |         print '---------------'
197 |         tsong = u'How To Save A Life - New Album Version'.lower()
198 |         print u'%s 最相似的歌曲:' % tsong
199 |         for i in m.most_similar(tsong, topn=20):
200 |             print i[0], i[1]
201 |         print '---------------'
202 |         tsong = u'往南'.lower()
203 |         print u'%s 最相似的歌曲:' % tsong
204 |         for i in m.most_similar(tsong, topn=20):
205 |             print i[0], i[1]
206 |         print '=============='
207 |         add_arr = [u'晴天', u'雨天', u'欧若拉']
208 |         minus_arr = [u'说爱你']
209 |         line = '+'.join(add_arr)
210 |         line += '-' + '-'.join(minus_arr)
211 |         print line
212 |         for i in m.most_similar(positive=add_arr, negative=minus_arr):
213 |             print i[0], i[1]
214 | 
215 | 
216 | def test_artist2vec():
217 |     tag = 'full'
218 |     min_count = 5
219 |     sorted_vocab = 1
220 |     window = 10
221 |     size = 50
222 |     iter_n = 20
223 |     # prepare_artist_dict(tag=tag)
224 |     modelpath = '../datas/[%s]%sd_%siter_%swin_%smin_artist2vec.model' % (tag, size, iter_n, window, min_count)
225 |     print 'model params:\tag: %s\tnmin: %s\twin: %s\tsize: %s\titer_n: %s' % (tag, min_count, window, size, iter_n)
226 |     # train_artist2vec_model(fout_path=modelpath, data_path='../datas/artists_seq_%s.dat' % tag,
227 |     #                        min_count=min_count,
228 |     #                        sorted_vocab=sorted_vocab, window=window,
229 |     #                        size=size, iter_n=iter_n)
230 |     with open(modelpath, 'rb') as fin:
231 |         m = pickle.load(fin)
232 |         s1, s2 = u'周杰伦', u'王力宏'.lower()
233 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
234 |         s1, s2 = u'蔡依林', u'梁静茹'
235 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
236 |         s1, s2 = u'梁静茹', u'孙燕姿'
237 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
238 |         print '---------------'
239 |         tsong = u'老狼'
240 |         print u'%s 最相似的歌手:' % tsong
241 |         for i in m.most_similar(tsong, topn=20):
242 |             print i[0], i[1]
243 |         print '---------------'
244 |         tsong = u'周杰伦'.lower()
245 |         print u'%s 最相似的歌手:' % tsong
246 |         for i in m.most_similar(tsong, topn=20):
247 |             print i[0], i[1]
248 |         print '---------------'
249 |         tsong = u'蔡依林'.lower()
250 |         print u'%s 最相似的歌手:' % tsong
251 |         for i in m.most_similar(tsong, topn=20):
252 |             print i[0], i[1]
253 |         print '---------------'
254 |         tsong = u's.h.e'.lower()
255 |         print u'%s 最相似的歌手:' % tsong
256 |         for i in m.most_similar(tsong, topn=20):
257 |             print i[0], i[1]
258 |         print '---------------'
259 |         tsong = u'spyair'.lower()
260 |         print u'%s 最相似的歌手:' % tsong
261 |         for i in m.most_similar(tsong, topn=20):
262 |             print i[0], i[1]
263 |         print '=============='
264 |         # add_arr = [u'晴天', u'布拉格广场', u'去大理']
265 |         # minus_arr = [u'faded'.lower(), u'时间煮雨', u'爱的供养']
266 |         # line = '+'.join(add_arr)
267 |         # line += '-' + '-'.join(minus_arr)
268 |         # print line
269 |         # for i in m.most_similar(positive=add_arr, negative=minus_arr):
270 |         #     print i[0], i[1]
271 | 
272 | 
273 | def prepare_song_artist_dict(tag=''):
274 |     playlist_dao_inst = CloudMusicDAO('MusicTaster', 'Playlists')
275 |     print playlist_dao_inst.db_inst.find(
276 |         {'trackCount': {'$gte': 5, '$lte': 1000}, 'playCount': {'$gte': 5}},
277 |         {'tracks': 1, 'name': 1}).limit(100000).count()
278 |     find_result = playlist_dao_inst.db_inst.find(
279 |         {'trackCount': {'$gte': 5, '$lte': 1000}, 'playCount': {'$gte': 5}},
280 |         {'tracks': 1, 'name': 1}).limit(100000)
281 |     # 将歌单中的歌曲名组合成歌曲名序列
282 |     total_song_artist_set = []
283 |     count = 0
284 |     for item in find_result:
285 |         data_process_logger.info('No.%s %s' % (count, item['name']))
286 |         # 保存歌单中的歌曲序列
287 |         song_artist_seq = []
288 |         for song in item['tracks']:
289 |             sname = song['name']
290 |             artist = song['artists'][0]['name'].lower()
291 |             song_artist_seq.append((sname.lower(), artist))
292 |         total_song_artist_set.append(song_artist_seq)
293 |         count += 1
294 |     data_process_logger.info('start building dictionary')
295 |     # song_dictionary = corpora.Dictionary(total_song_artist_set)
296 |     # print u'歌单数', song_dictionary.num_docs
297 |     # print u'歌曲数', song_dictionary.num_pos
298 |     data_process_logger.info('start saving datas')
299 |     # song_dictionary.save('../datas/song_artist_dictionary_%s.dict' % tag)
300 |     pickle.dump(total_song_artist_set, open('../datas/songs_artists_seq_%s.dat' % tag, 'wb'))
301 |     # return song_dictionary
302 | 
303 | 
304 | def test_artistsong2vec():
305 |     tag = 'full'
306 |     min_count = 5
307 |     sorted_vocab = 1
308 |     window = 10
309 |     size = 50
310 |     iter_n = 20
311 |     # prepare_artist_dict(tag=tag)
312 |     modelpath = '../datas/[%s]%sd_%siter_%swin_%smin_artistsong2vec.model' % (tag, size, iter_n, window, min_count)
313 |     print 'model params:\tag: %s\tnmin: %s\twin: %s\tsize: %s\titer_n: %s' % (tag, min_count, window, size, iter_n)
314 |     # train_artistsong2vec_model(fout_path=modelpath, data_path='../datas/songs_artists_seq_%s.dat' % tag)
315 |     with open(modelpath, 'rb') as fin:
316 |         m = pickle.load(fin)
317 |         s1, s2 = u'周杰伦', u'蔡依林'.lower()
318 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
319 |         s1, s2 = u'周杰伦', u'东风破'
320 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
321 |         s1, s2 = u'梁静茹', u'孙燕姿'
322 |         print u'%s 与 %s 的相似度为: %.4f' % (s1, s2, m.similarity(s1, s2))
323 |         print '---------------'
324 |         tsong = u'你听得到'
325 |         print u'%s 最相似的歌手:' % tsong
326 |         for i in m.most_similar(tsong, topn=20):
327 |             print i[0], i[1]
328 |         print '---------------'
329 |         tsong = u'周杰伦'.lower()
330 |         print u'%s 最相似的歌手:' % tsong
331 |         for i in m.most_similar(tsong, topn=20):
332 |             print i[0], i[1]
333 |         print '---------------'
334 |         tsong = u'蔡依林'.lower()
335 |         print u'%s 最相似的歌手:' % tsong
336 |         for i in m.most_similar(tsong, topn=20):
337 |             print i[0], i[1]
338 |         print '---------------'
339 |         tsong = u'雷军'.lower()
340 |         print u'%s 最相似的歌手:' % tsong
341 |         for i in m.most_similar(tsong, topn=20):
342 |             print i[0], i[1]
343 |         print '---------------'
344 |         tsong = u'王力宏'.lower()
345 |         print u'%s 最相似的歌手:' % tsong
346 |         for i in m.most_similar_cosmul(tsong, topn=20):
347 |             print i[0], i[1]
348 |         print '=============='
349 |         add_arr = [u'周杰伦', u'王力宏', u'王力宏']
350 |         minus_arr = [u'晴天', u'回到过去']
351 |         line = '+'.join(add_arr)
352 |         line += '-' + '-'.join(minus_arr)
353 |         print line
354 |         for i in m.most_similar(positive=add_arr, negative=minus_arr):
355 |             print i[0], i[1]
356 | 
357 | 
358 | if __name__ == '__main__':
359 |     test_song2vec()
360 |     # test_artist2vec()
361 |     # prepare_song_artist_dict('full')
362 |     # test_artistsong2vec()
363 | 


--------------------------------------------------------------------------------
/api_server/templates/demo.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>MusicTaster</title>
  6 |     <link href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.min.css" rel="stylesheet">
  7 |     <!--<link href="http://cdn.bootcss.com/bootstrap/3.3.5/css/bootstrap-theme.min.css" rel="stylesheet">-->
  8 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
  9 |     {#    <script src="//cdn.bootcss.com/sigma.js/1.2.0/sigma.min.js"></script>#}
 10 |     {#    <script src="//cdn.bootcss.com/sigma.js/1.2.0/plugins/sigma.parsers.json.min.js"></script>#}
 11 |     {#    <script src="//cdn.bootcss.com/sigma.js/1.2.0/plugins/sigma.layout.forceAtlas2.min.js"></script>#}
 12 |     {#    <script src="//cdn.bootcss.com/sigma.js/1.2.0/plugins/sigma.plugins.dragNodes.min.js"></script>#}
 13 |     {#    <link href="http://cdn.bootcss.com/flat-ui/2.3.0/css/flat-ui.min.css" rel="stylesheet">#}
 14 |     {#    <link rel="icon" href="/static/favicon.ico" type="image/x-icon"/>#}
 15 |     <link href="https://cdnjs.cloudflare.com/ajax/libs/flat-ui/2.3.0/css/flat-ui.min.css" rel="stylesheet">
 16 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/flat-ui/2.3.0/js/flat-ui.min.js"></script>
 17 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.7.1/d3.min.js"></script>
 18 |     {#    <script src="http://d3js.org/d3.v3.min.js"></script>#}
 19 | 
 20 |     <style>
 21 | 
 22 |         .links line {
 23 |             stroke: #999;
 24 |             stroke-opacity: 0.6;
 25 |         }
 26 | 
 27 |         .nodes circle {
 28 |             stroke: #fff;
 29 |             stroke-width: 1.5px;
 30 |         }
 31 | 
 32 |     </style>
 33 | 
 34 | </head>
 35 | <body>
 36 | <div class="container" id="main">
 37 |     <div class="row">
 38 |         <div class="col-md-12">
 39 |             <h4 style="align-content: center;">
 40 |                 MusicTaster
 41 |             </h4>
 42 | 
 43 |             <p>
 44 |                 A demo based on Song2Vec and Artist2Vec
 45 |             </p>
 46 |             <p>
 47 |                 详情见博客《<a
 48 |                     href="http://jayveehe.github.io/2017/03/18/song2vec/">MusicTaster——一种Song2Vec和Artist2Vec的实践</a>》
 49 |             </p>
 50 |             <p>
 51 |                 欢迎Star@Github <a
 52 |                     href="https://github.com/JayveeHe/MusicTaster">https://github.com/JayveeHe/MusicTaster</a>
 53 |             </p>
 54 |             <hr/>
 55 |         </div>
 56 |     </div>
 57 |     {#    <div class="row">#}
 58 |     {#        <div class="col-md-3"></div>#}
 59 |     {#        <div class="col-md-2">#}
 60 |     {#            <span style="float:right;">摘要句数：</span>#}
 61 |     {#        </div>#}
 62 |     {#        <div class="col-md-1">#}
 63 |     {#            <input style="float: left;" type="number" id="sent_num" class="form-control has-success" value="5">#}
 64 |     {#        </div>#}
 65 |     {#        <div class="col-md-2">#}
 66 |     {#            <button class="btn btn-info" id="btn_send" style="svg_width: 80%;">确认</button>#}
 67 |     {#        </div>#}
 68 |     {#        <div class="col-md-4"></div>#}
 69 |     {#    </div>#}
 70 |     <div class="row" style="height: 100%;">
 71 |         <div class="col-md-1"></div>
 72 |         <div class="col-md-10" style="height: 100%;">
 73 |             <div class="row" style="height: 60%;">
 74 |                 <h6>
 75 |                     歌单聚类：
 76 |                 </h6>
 77 |                 <span>请输入网易云音乐的歌单地址</span>
 78 |                 <hr/>
 79 |                 <input style="max-width: 100% ;width:100%;height: 80%;" id="input_url"
 80 |                        class="form-control has-success" value="http://music.163.com/#/playlist?id=3659853">
 81 |                 <button class="btn btn-info" id="btn_url_send" style="margin-top: 5%; width: 100%;">确认</button>
 82 |                 <hr>
 83 |             </div>
 84 | 
 85 |         </div>
 86 |         <div class="col-md-1"></div>
 87 |     </div>
 88 |     <div class="row">
 89 |         <div class="col-md-12"></div>
 90 |     </div>
 91 |     <div class="row" style="width: 100%;margin-top: 20px">
 92 |         <div class="col-md-1"></div>
 93 |         <div class="col-md-10">
 94 |             <div class="row">
 95 |                 <h6>
 96 |                     歌曲风格:
 97 |                 </h6>
 98 |                 <span>找出与给定歌曲(可多个)的风格近似的其他歌曲</span>
 99 |                 <hr/>
100 |             </div>
101 |             <div class="row">
102 |                 <div class="tagsinput-primary">
103 |                     <input name="tagsinput" class="tagsinput" id="input_name" data-role="tagsinput" value="贝加尔湖畔,虎口脱险"
104 |                            style="display: none;">
105 |                     结果数: <input type="number" id="input_sim_num" value=15 style="margin: 10px">
106 |                 </div>
107 |                 <button class="btn btn-info" id="btn_name_send" style=" width: 100%;">确认</button>
108 |             </div>
109 |         </div>
110 |         <div class="col-md-1"></div>
111 |     </div>
112 |     <div class="modal fade" id="show_cluster" tabindex="-1" role="dialog"
113 |          aria-hidden="true">
114 |         <div class="modal-dialog" style="width: 1000px;height:720px">
115 |             <div class="modal-content">
116 |                 <div class="modal-header">
117 |                     <button type="button" class="close"
118 |                             data-dismiss="modal" aria-hidden="true">
119 |                         &times;
120 |                     </button>
121 |                     <h4 class="modal-title" id="playlist_name_title">
122 |                         歌单聚类结果
123 |                     </h4>
124 |                 </div>
125 |                 <div class="modal-body" id="modal_text" style="width: 1000px;height:600px">
126 |                     <svg class="modal-body" id="cluster_canvas" style="width: 100%;height: 100%">
127 |                     </svg>
128 |                 </div>
129 |                 <div class="modal-footer">
130 |                     <button type="button" class="btn btn-default"
131 |                             data-dismiss="modal" id="modal_dismiss">关闭
132 |                     </button>
133 |                     <script>
134 |                         /**$("#modal_dismiss").click(function () {
135 |                             d3.select("svg").selectAll('*').remove();
136 |                             console.log('kill with dismiss');
137 |                         })**/
138 |                     </script>
139 |                 </div>
140 |             </div>
141 |             <!-- /.modal-content -->
142 |         </div>
143 |         <!-- /.modal -->
144 |     </div>
145 |     <div class="modal fade" id="show_similar" tabindex="-1" role="dialog"
146 |          aria-hidden="true">
147 |         <div class="modal-dialog" style="width: 1000px;height:720px">
148 |             <div class="modal-content">
149 |                 <div class="modal-header">
150 |                     <button type="button" class="close"
151 |                             data-dismiss="modal" aria-hidden="true">
152 |                         &times;
153 |                     </button>
154 |                     <h4 class="modal-title" id="playlist_name_title">
155 |                         歌曲相似性结果
156 |                     </h4>
157 |                 </div>
158 |                 <div class="modal-body" id="song_similar_text">
159 |                 </div>
160 |                 <div class="modal-footer">
161 |                     <button type="button" class="btn btn-default"
162 |                             data-dismiss="modal" id="modal_dismiss">关闭
163 |                     </button>
164 |                     <script>
165 |                         /**$("#modal_dismiss").click(function () {
166 |                             d3.select("svg").selectAll('*').remove();
167 |                             console.log('kill with dismiss');
168 |                         })**/
169 |                     </script>
170 |                 </div>
171 |             </div>
172 |             <!-- /.modal-content -->
173 |         </div>
174 |         <!-- /.modal -->
175 |     </div>
176 | </div>
177 | <script>
178 | 
179 |     // 歌单聚类
180 |     $("#btn_url_send").click(function () {
181 |         var pl_url = $("#input_url").val();
182 |         var send_datas = {
183 |             url: pl_url,
184 |             type: "song",
185 |             is_detailed: true
186 |         };
187 |         $.ajax('/musictaster/cluster/playlist/url', {
188 |             'data': JSON.stringify(send_datas), //{action:'x',params:['a','b','c']}
189 |             'type': 'POST',
190 |             'processData': false,
191 |             'contentType': 'application/json' //typically 'application/x-www-form-urlencoded', but the service you are calling may expect 'text/json'... check with the service to see what they expect as content-type in the HTTP header.
192 |         }).done(function (data, status) {
193 |                     $("#show_cluster").modal('toggle');
194 |                     if (status == "success") {
195 |                         // parse cluster result
196 |                         //解析聚类结果
197 |                         var resp_obj = eval(data);
198 |                         if (resp_obj['code'] == 200) {
199 |                             var title = $("#playlist_name_title");
200 |                             console.log(resp_obj['playlist_name']);
201 |                             title.text(resp_obj['playlist_name'] + "\t-\t" + '聚类结果');
202 |                             /// -------- d3.js -------
203 |                             //准备复杂网络数据
204 |                             var g = {"nodes": [], "links": []};
205 |                             var cluster_result = resp_obj["result"];
206 |                             g.nodes.push({'id': 'root', 'group': -1, 'label': 'root'});
207 |                             for (var i = 0; i < cluster_result.length; i++) {
208 |                                 console.log(cluster_result[i]);
209 |                                 var item = cluster_result[i][0];
210 |                                 /**var c_color = '#' + (Math.floor(Math.random() * 16777215).toString(16) + '000000').substr(0, 6);
211 |                                  var c_x = (Math.random() - 0.5) * 50;
212 |                                  var c_y = (Math.random() - 0.5) * 50;**/
213 |                                 g.nodes.push({
214 |                                     "id": item,
215 |                                     "group": i,
216 |                                     'label': item
217 |                                 });
218 |                                 g.links.push({
219 |                                     'source': 'root',
220 |                                     'target': item,
221 |                                     'value': 1
222 |                                 });
223 |                                 var c_root_id = item;
224 |                                 var last_item = c_root_id;
225 |                                 for (var j = 1; j < cluster_result[i].length; j++) {
226 |                                     //console.log(i + "-" + j);
227 |                                     var n_item = cluster_result[i][j];
228 |                                     g.nodes.push({
229 |                                         "id": n_item,
230 |                                         "group": i,
231 |                                         'label': n_item
232 |                                     });
233 |                                     g.links.push({
234 |                                         'value': 1,
235 |                                         'source': c_root_id,
236 |                                         'target': n_item
237 |                                     });
238 |                                 }
239 |                             }
240 | 
241 |                             // d3 初始化
242 |                             var svg = d3.select("svg");
243 | 
244 |                             svg.selectAll('*').remove();
245 |                             var c_canvas = $('#cluster_canvas');
246 |                             var svg_width = 1000 * 0.9;
247 |                             var svg_height = 600 * 0.9;
248 |                             //console.log(c_canvas.parentElement().width + '-' + c_canvas.parentElement().height);
249 |                             console.log('svg_width:' + svg_width + '\theight:' + svg_height);
250 | 
251 |                             var color = d3.scaleOrdinal(d3.schemeCategory20);
252 | 
253 |                             var simulation = d3.forceSimulation()
254 |                                     .force("link", d3.forceLink().id(function (d) {
255 |                                         return d.id;
256 |                                     }))
257 |                                     .force("charge", d3.forceManyBody())
258 |                                     .force("collide", d3.forceCollide().radius(function (d) {
259 |                                         return d.r + 0.5;
260 |                                     }).iterations(2))
261 |                                     .force("center", d3.forceCenter(svg_width / 2, svg_height / 2));
262 | 
263 | 
264 |                             var link = svg.append("g")
265 |                                     .attr("class", "links")
266 |                                     .selectAll("line")
267 |                                     .data(g.links)
268 |                                     .enter().append("line")
269 |                                     .attr("stroke-width", function (d) {
270 |                                         return Math.sqrt(d.value);
271 |                                     });
272 | 
273 |                             var node = svg.append("g")
274 |                                     .attr("class", "nodes")
275 |                                     .selectAll("circle")
276 |                                     .data(g.nodes)
277 |                                     .enter()
278 |                                     .append("circle")
279 |                                     .attr("r", 5)
280 |                                     .attr("fill", function (d) {
281 |                                         return color(d.group);
282 |                                     })
283 |                                     .call(d3.drag()
284 |                                             .on("start", dragstarted)
285 |                                             .on("drag", dragged)
286 |                                             .on("end", dragended));
287 | 
288 |                             var anchorNode = svg.append('g').attr('class', 'labels').selectAll("g.labels").data(g.nodes)
289 |                                     .enter().append("svg:text").text(function (d) {
290 |                                         return d.label;
291 |                                     }).style("fill", "#555").style("font-family", "Arial").style("font-size", 6)
292 |                                     .call(d3.drag()
293 |                                             .on("start", dragstarted)
294 |                                             .on("drag", dragged)
295 |                                             .on("end", dragended));
296 |                             //anchorNode.append("svg:circle").attr("r", 0).style("fill", "#FFF");
297 | 
298 | 
299 |                             simulation
300 |                                     .nodes(g.nodes)
301 |                                     .on("tick", ticked);
302 | 
303 |                             simulation.force("link")
304 |                                     .links(g.links);
305 |                             var zoom = d3.zoom()
306 |                                     .on("zoom", zoomed);
307 | 
308 |                             svg
309 |                                     .on("wheel", wheeled)
310 |                                     .call(zoom)
311 |                                     .call(zoom.transform, d3.zoomIdentity
312 |                                             .translate(svg_width / 2, svg_height / 2)
313 |                                             .scale(0.5)
314 |                                             .translate(-svg_width / 2, -svg_height / 2));
315 |                             svg.call(zoom);
316 | 
317 |                             function wheeled() {
318 |                                 console.log(d3.event);
319 |                             }
320 | 
321 |                             function zoomed() {
322 |                                 node.attr("transform", d3.event.transform);
323 |                                 link.attr("transform", d3.event.transform);
324 |                                 anchorNode.attr("transform", d3.event.transform);
325 |                             }
326 | 
327 |                             function ticked() {
328 |                                 link
329 |                                         .attr("x1", function (d) {
330 |                                             return d.source.x;
331 |                                         })
332 |                                         .attr("y1", function (d) {
333 |                                             return d.source.y;
334 |                                         })
335 |                                         .attr("x2", function (d) {
336 |                                             return d.target.x;
337 |                                         })
338 |                                         .attr("y2", function (d) {
339 |                                             return d.target.y;
340 |                                         });
341 | 
342 |                                 node
343 |                                         .attr("cx", function (d) {
344 |                                             return d.x;
345 |                                         })
346 |                                         .attr("cy", function (d) {
347 |                                             return d.y;
348 |                                         });
349 |                                 anchorNode
350 |                                         .attr("x", function (d) {
351 |                                             return d.x;
352 |                                         })
353 |                                         .attr("y", function (d) {
354 |                                             return d.y;
355 |                                         });
356 |                             }
357 | 
358 |                             function dragstarted(d) {
359 |                                 if (!d3.event.active) simulation.alphaTarget(0.3).restart();
360 |                                 d.fx = d.x;
361 |                                 d.fy = d.y;
362 |                             }
363 | 
364 |                             function dragged(d) {
365 |                                 d.fx = d3.event.x;
366 |                                 d.fy = d3.event.y;
367 |                             }
368 | 
369 |                             function dragended(d) {
370 |                                 if (!d3.event.active) simulation.alphaTarget(0);
371 |                                 d.fx = null;
372 |                                 d.fy = null;
373 |                             }
374 | 
375 | 
376 |                         }
377 |                         else {
378 |                             alert("请求错误,详情=" + resp_obj.toString());
379 |                         }
380 |                     }
381 |                     else {
382 |                         alert("请求失败");
383 |                     }
384 |                 }
385 |         ).fail(function () {
386 |             alert("请求失败");
387 |         });
388 |     });
389 | 
390 | 
391 | </script>
392 | <script>
393 |     $('#btn_name_send').click(function () {
394 |         var input_names_str = $('#input_name');
395 |         console.log(input_names_str.val());
396 |         var song_names = input_names_str.val().split(',');
397 |         console.log(song_names);
398 |         var input_num = $("#input_sim_num").val();
399 |         console.log(input_num);
400 |         var send_datas = {
401 |             "positive_songs": song_names,
402 |             "negative_songs": [],
403 |             "positive_artists": [],
404 |             "negative_artists": [],
405 |             "top_n": input_num
406 |         };
407 |         $.ajax('/musictaster/similar/song', {
408 |             'data': JSON.stringify(send_datas), //{action:'x',params:['a','b','c']}
409 |             'type': 'POST',
410 |             'processData': false,
411 |             'contentType': 'application/json' //typically 'application/x-www-form-urlencoded', but the service you are calling may expect 'text/json'... check with the service to see what they expect as content-type in the HTTP header.
412 |         }).done(function (data, status) {
413 |                     if (status == "success") {
414 |                         var resp_obj = eval(data);
415 |                         if (resp_obj['code'] == 200) {
416 |                             // parse cluster result
417 |                             //解析聚类结果
418 |                             $("#show_similar").modal('toggle');
419 |                             var similar_result = $("#song_similar_text");
420 |                             similar_result.empty();
421 |                             similar_result.append("<tr><td>歌曲名</td><td>相似度</td></tr>");
422 |                             for (var i = 0; i < resp_obj['result'].length; i++) {
423 |                                 console.log(resp_obj['result'][i]['name'] + '-' + resp_obj['result'][i]['similarity']);
424 |                                 similar_result.append('<tr><td>' + resp_obj['result'][i]['name'] + '</td><td>' + resp_obj['result'][i]['similarity'] + '</td></tr>');
425 |                             }
426 |                         }
427 |                         else {
428 |                             alert("请求错误,详情=" + resp_obj['error_msg']);
429 |                         }
430 |                     }
431 |                     else {
432 |                         alert("请求失败," + data.toString());
433 |                     }
434 |                 }
435 |         ).fail(function (data) {
436 |             alert("请求失败, 原因=" + data);
437 |         });
438 |     })
439 | 
440 | </script>
441 | </body>
442 | 
443 | </html>


--------------------------------------------------------------------------------