├── README.md ├── bin ├── POM_function.py ├── POM_function.pyc ├── config_sample.py ├── config_sample.pyc ├── safe_math.py ├── safe_math.pyc ├── wc_click_model_inference_by_id.py ├── wc_common.py └── wc_common.pyc ├── relevance_test_dataset ├── data │ ├── model_result │ │ ├── DBN_result.txt │ │ ├── POM_result.txt │ │ ├── PSCM_result.txt │ │ ├── TCM_result.txt │ │ ├── THCM_result.txt │ │ └── UBM_result.txt │ └── relevance_label.txt └── readme.txt ├── sample ├── .DS_Store ├── output │ ├── _DBN.model │ ├── _DBN.model.perplexity │ ├── _DBN.model.relevance │ ├── _DBNModel.model │ ├── _DBNModel.model.perplexity │ ├── _DBNModel.model.relevance │ ├── _POM.model │ ├── _POM.model.perplexity │ ├── _POM.model.relevance │ ├── _PSCM.model │ ├── _PSCM.model.perplexity │ ├── _PSCM.model.relevance │ ├── _TCM.model │ ├── _TCM.model.perplexity │ ├── _TCM.model.relevance │ ├── _THCMOri.model │ ├── _THCMOri.model.perplexity │ ├── _THCMOri.model.relevance │ ├── _UBM.model │ ├── _UBM.model.perplexity │ └── _UBM.model.relevance ├── query_class ├── query_id ├── test_data ├── train_data └── url_id └── user_preference_test_dataset ├── query_result_page ├── css │ ├── custom.css │ ├── sessions.txt │ ├── static_pages.css.scss │ ├── tasks.txt │ └── users.css.scss ├── pages │ ├── 一卡通充值.html │ ├── 上海东方电视台直播.html │ ├── 上海卫视在线直播.html │ ├── 下载斗破苍穹小说.html │ ├── 下载跑跑卡丁车.html │ ├── 世友地板.html │ ├── 东方卫视在线直播.html │ ├── 东风集团.html │ ├── 中华军事网新闻.html │ ├── 中华老字号名单.html │ ├── 中央军委四总部领导名单.html │ ├── 中央台直播.html │ ├── 中央财经频道直播.html │ ├── 中文国际在线直播.html │ ├── 中文网海贼王.html │ ├── 中顾集团.html │ ├── 九把刀小说.html │ ├── 京能集团.html │ ├── 什么是健康.html │ ├── 价格表模板.html │ ├── 会计实操培训.html │ ├── 佳缘佳信.html │ ├── 侠盗猎车手圣安地列斯存档.html │ ├── 信达集团.html │ ├── 全书免费小说.html │ ├── 全球未解之谜.html │ ├── 刘德华全家照.html │ ├── 北京二手货车.html │ ├── 北京协和医院如何挂号.html │ ├── 北京卡酷少儿频道直播.html │ ├── 北京磁动力新能源汽车.html │ ├── 北京科教频道在线直播.html │ ├── 华宇物流查询单号.html │ ├── 原始点疗法感冒发烧.html │ ├── 口袋妖怪漆黑的魅影4.5.html │ ├── 古代穿越小说.html │ ├── 同声传译培训.html │ ├── 唐家三少全部小说.html │ ├── 嘉民集团.html │ ├── 国内新闻.html │ ├── 国际足球新闻.html │ ├── 在线制作证件照.html │ ├── 地下城与勇士职业.html │ ├── 塔罗牌在线占卜事业.html │ ├── 夜间版赛尔号.html │ ├── 天天酷跑破解版微信.html │ ├── 天津汽车摇号.html │ ├── 太平保险.html │ ├── 太平洋保险意外险.html │ ├── 好看的奇幻小说.html │ ├── 好看的校园小说.html │ ├── 好看的黑道小说.html │ ├── 如何鉴别乳液的好坏.html │ ├── 姓名及其身份证号.html │ ├── 安卓怎么安装两个微信.html │ ├── 完结的小说.html │ ├── 官网魅族.html │ ├── 家电维修培训.html │ ├── 少年魔王.html │ ├── 山东齐鲁电视台在线直播.html │ ├── 山东齐鲁频道直播.html │ ├── 山竹的营养价值.html │ ├── 巾帼共筑中国梦演讲稿.html │ ├── 常州公共自行车.html │ ├── 常熟叉车培训.html │ ├── 幻影现实.html │ ├── 幽默视频.html │ ├── 广东珠江频道直播.html │ ├── 广州市社会保险.html │ ├── 应聘班级服务员演讲稿.html │ ├── 弓形虫抗体.html │ ├── 志元食品集团.html │ ├── 怎么下载视频.html │ ├── 怎么祛斑小窍门.html │ ├── 怎样下载有声小说.html │ ├── 怎样查黄道吉日.html │ ├── 怎样炸薯条.html │ ├── 我是歌手二季一期直播.html │ ├── 所有特殊符号.html │ ├── 手机无法发送短信.html │ ├── 排列三字谜.html │ ├── 搜狐网新闻.html │ ├── 搞笑生日祝福短信.html │ ├── 敬业与乐业.html │ ├── 新浪围棋.html │ ├── 新浪新闻.html │ ├── 日产汽车.html │ ├── 时尚频道直播.html │ ├── 智联招聘哈尔滨.html │ ├── 有趣新闻.html │ ├── 权健国际自然医学集团.html │ ├── 查快递号.html │ ├── 模具简历模板.html │ ├── 民间偏方大全集锦.html │ ├── 气电两种车.html │ ├── 求职简历模板.html │ ├── 江苏直播.html │ ├── 河北区号.html │ ├── 河北移动彩铃.html │ ├── 河南都市频道在线直播.html │ ├── 泰山牌石膏板.html │ ├── 洋丰集团.html │ ├── 洛克王国时空.html │ ├── 济南影视在线直播.html │ ├── 济南生活频道在线直播.html │ ├── 浙江移动网上选号.html │ ├── 海贼王无尽的冒险.html │ ├── 深圳手机水货报价单.html │ ├── 渤海集团.html │ ├── 游戏赛车.html │ ├── 游民星空.html │ ├── 湖北恒安纸业.html │ ├── 湖南申湘汽车集团.html │ ├── 演讲稿.html │ ├── 烟台化工企业.html │ ├── 烧腊培训.html │ ├── 焊工培训.html │ ├── 熊猫视频直播.html │ ├── 狂野飙车.html │ ├── 王濛和周洋的关系.html │ ├── 环保产业.html │ ├── 珠江直播.html │ ├── 珠海新闻.html │ ├── 生肖与楼层的关系.html │ ├── 申通快递单.html │ ├── 申通快递号.html │ ├── 申通查单.html │ ├── 电子书少年药王.html │ ├── 电工培训.html │ ├── 电脑上下载微信.html │ ├── 电脑微信.html │ ├── 电脑组装配置单.html │ ├── 白木耳的营养价值.html │ ├── 百家姓.html │ ├── 皇家凯旋木门.html │ ├── 皮卡改装越野房车.html │ ├── 盗墓传说.html │ ├── 盗墓笔记全集有声小说.html │ ├── 社会与法频道直播.html │ ├── 福彩字谜.html │ ├── 福彩字谜画谜.html │ ├── 移动网上营业.html │ ├── 穿越好看的小说.html │ ├── 笔记本电脑屏幕变暗.html │ ├── 第一财经视频直播.html │ ├── 签证工作证明模板.html │ ├── 精灵传说.html │ ├── 红豆杉的养殖方法视频.html │ ├── 组织架构图模板.html │ ├── 经典语录说说.html │ ├── 结婚准备清单.html │ ├── 给我一个身份证号.html │ ├── 给我个身份证号.html │ ├── 缤纷百货.html │ ├── 网上缴费充值.html │ ├── 网上订火车.html │ ├── 网络工程师培训.html │ ├── 网页登陆微信.html │ ├── 羽毛球直播.html │ ├── 职业规划模板.html │ ├── 能出没之环球大冒险.html │ ├── 腾讯网新闻.html │ ├── 腾讯视频直播.html │ ├── 自学考试专业.html │ ├── 自驾游朝鲜旅游的见闻.html │ ├── 致富网养殖业.html │ ├── 英国探亲签证材料清单.html │ ├── 英文菜单.html │ ├── 英语作文万能模板.html │ ├── 英雄连勇气传说.html │ ├── 蜜桔种植技术.html │ ├── 装修费用清单.html │ ├── 观音生日.html │ ├── 视频码率与清晰度关系.html │ ├── 请假条.html │ ├── 贵州卫视在线直播回放.html │ ├── 贵阳直播.html │ ├── 赶集网二手车.html │ ├── 足疗师培训.html │ ├── 身份证号码查询手机号.html │ ├── 迅雷免费高清.html │ ├── 邵逸夫医院挂号.html │ ├── 郑州二手货车.html │ ├── 都是地府惹的祸有声小说.html │ ├── 醉红楼.html │ ├── 重庆市委常委名单.html │ ├── 重庆酸辣粉培训.html │ ├── 金光集团.html │ ├── 霸气称号.html │ ├── 青海酿皮的做法视频.html │ ├── 音乐符号.html │ ├── 顺丰快递号.html │ ├── 香芋种植技术.html │ ├── 驾驶证号查询驾照.html │ ├── 鬼吹灯全集小说.html │ ├── 魔兽领主有声小说.html │ ├── 鲷鱼的营养价值.html │ ├── 黑龙江电信.html │ ├── 齐鲁卫视直播.html │ └── 齐鲁台直播.html └── query_id.txt ├── readme.txt └── user_rating ├── 1.txt ├── 10.txt ├── 11.txt ├── 12.txt ├── 13.txt ├── 14.txt ├── 15.txt ├── 16.txt ├── 17.txt ├── 18.txt ├── 19.txt ├── 2.txt ├── 20.txt ├── 21.txt ├── 22.txt ├── 3.txt ├── 4.txt ├── 5.txt ├── 6.txt ├── 7.txt ├── 8.txt └── 9.txt /README.md: -------------------------------------------------------------------------------- 1 | # [PSCMModel](https://github.com/THUIR/PSCMModel) 2 | 3 | PSCMModel is a small set of Python scripts for the user click models based on Yandex version (https://github.com/varepsilon/clickmodels). 4 | 5 | A *Click Model* is a probabilistic graphical model used to predict search engine click data from past observations. 6 | 7 | This project is aimed to implement recently proposed click models and intended to be easy-to-read and easy-to-modify. If it's not, please let me know how to improve it :) 8 | 9 | # Models Implemented 10 | - *Partially Sequential Click Model* ( **PSCM** ) model: Chao Wang, Yiqun Liu, Meng Wang, Ke Zhou, Jian-Yun Nie, Shaoping Ma. Incorporating Non-sequential Behavior into Click Models. SIGIR (2015). 11 | - *Temporal Hidden Click Model* ( **THCM** ) model: Danqing Xu, Yiqun Liu, Min Zhang, Shaoping Ma. Incorporating revisiting behaviors into click models. WSDM (2012). 12 | - *Temporal Click Model* ( **TCM** ) model: Wanhong Xu, Eren Manavoglu, Erick Cantú-Paz. Temporal Click Model for Sponsored Search. SIFIR (2010). 13 | - *Partially Observable Markov Model* ( **POM** ) model: Kuansan Wang, Nikolas Gloy, Xiaolong Li. Inferring search behaviors using partially observable markov (pom) model. WSDM (2010). 14 | - *Dynamic Bayesian Network* ( **DBN** ) model: Chapelle, O. and Zhang, Y. 2009. A dynamic bayesian network click model for web search ranking. WWW (2009). (This model is exactly the same implementation as Yandex version) 15 | - *User Browsing Model* ( **UBM** ): Dupret, G. and Piwowarski, B. 2008. A user browsing model to predict search engine click data from past observations. SIGIR (2008). 16 | 17 | # Files 18 | ## README.md 19 | This file. 20 | 21 | ## bin/ 22 | Directory with the scripts. 23 | 24 | ## sample/ 25 | Directory with the sample dataset. 26 | 27 | ## relevance_test_dataset/ 28 | query-result relevance data for PSCM model's paper (Section 5.3) 29 | 30 | ## user_preference_test_dataset/ 31 | user preference test data for PSCM model's paper (Section 5.4) 32 | 33 | # Format of the Input Data 34 | A small example can be found under `sample/` (tab-separated). 5 files are included in this directory: 35 | 36 | - query_id: encode each query into a unique id. 37 | - e.g.: "test 1 10 5" means query ("test") with a unique id (1), 10 sessions are found in search logs and 5 sessions contain click action. 38 | - query_class: The probability of been each searh intent for each query. 39 | - e.g.: "test 0.25 0.25 0.25 0.25" means query ("test") has four search intents. Set "query_id 1" for each query if this information is needless. 40 | - url_id: encode each URL into a unique id. 41 | - train_data, test_data: search logs, in which each line represents one query-session. 10 tab-separated part are included. The inner separator for each [] is space (" "): 42 | - query_id [url_id * 10] [click * 10] [click_time * 10] [mouse_feature_1 * 10] [mouse_feature_2 * 10] [mouse_feature_3 * 10] [mouse_feature_4 * 10] [mouse_feature_5 * 10] [mouse_feature_6 * 10] 43 | - click: 1 represents click, 0 represents no click 44 | - click_time: >0 represents click time in seconds, -1 represents no click 45 | - mouse_feature_1: The most left position mouse ever reach to in the result’s display area 46 | - mouse_feature_2: User’s total right towards movement length in the result’s display area 47 | - mouse_feature_3: The total dwell time that cursor spends in the result's display area neglecting its horizontal coordinate 48 | - mouse_feature_4: The total dwell time that cursor spends in the result's display area 49 | - mouse_feature_5: The total time that cursor hovers over the result's display area 50 | - mouse_feature_6: The amount of cursor actions (scroll, test select, move times) that appear in the result's display area 51 | - Ps: Just set mouse feature as 0 if your search logs do not contain mouse movement information. 52 | 53 | 54 | 55 | # Usage 56 | in bin/config_sample.py: select click models (e.g.: TEST_MODELS = ['PSCM', 'UBM', 'DBN', 'POM', 'TCM', 'THCM']) 57 | 58 | in bin/ : python wc_click_model_inference_by_id.py ../sample 59 | 60 | # Output 61 | in target data directory, "/output" directory will be automatically generated, in which model results will be logged: 62 | - model_name.model: Parameters of this model generated from train_data 63 | - model_name.model.perplexity: Perplexity metrics of this model tested on test_data 64 | - model_name.model.relevance: Query-URL-Relevance generated from this model 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /bin/POM_function.py: -------------------------------------------------------------------------------- 1 | import sys, os, re, urllib, math, random , safe_math 2 | 3 | class Path: 4 | prob = 0.0 5 | v_list = [] # list of visit element index 6 | s_list = [] # list of 0/1 ,skip 7 | def __init__(self, prob, v_list, s_list): 8 | self.prob = prob 9 | self.v_list = v_list 10 | self.s_list = s_list 11 | def tostring(self): 12 | ret = str(self.prob) 13 | for i in range(0, len(self.v_list)): 14 | ret = ret + "\t" + str(self.v_list[i]) + "(" + str(self.s_list[i]) + ")" 15 | return ret 16 | 17 | def compute_P_given_A(path, param_V, param_S, param_first_click, first_flag, position_limit): 18 | prob = 0.0 19 | if first_flag == 0 : 20 | prob = 1.0 21 | elif len(path.s_list) == 0: 22 | prob = 0.0 23 | else: 24 | prob = param_first_click[path.v_list[0]] 25 | if path.s_list[0] == 1: 26 | if not path.v_list[0] == position_limit: 27 | prob = prob * param_S[path.v_list[0]] 28 | else: 29 | if not path.v_list[0] == position_limit: 30 | prob = prob * (1.0 - param_S[path.v_list[0]]) 31 | for i in range(1, len(path.s_list)): 32 | previous_v = path.v_list[i - 1] 33 | current_v = path.v_list[i] 34 | current_s = path.s_list[i] 35 | prob = prob * param_V[previous_v][current_v] 36 | if current_s == 1: 37 | if not current_v == position_limit: 38 | prob = prob * param_S[current_v] 39 | else: 40 | if not current_v == position_limit: 41 | prob = prob * (1.0 - param_S[current_v]) 42 | path.prob = prob 43 | return prob 44 | 45 | def copy_path(v_list, s_list): 46 | new_v_list = [] 47 | new_s_list = [] 48 | for j in range(0, len(v_list)): 49 | new_v_list.append(v_list[j]) 50 | new_s_list.append(s_list[j]) 51 | path = Path(0.0, new_v_list, new_s_list) 52 | return path 53 | 54 | def insert_path_list(list, p, max_node_num): 55 | insert_flag = True 56 | insert_pos = len(list) 57 | for i in range(0, len(list)): 58 | if p.prob < list[len(list)- 1 - i].prob: 59 | insert_pos = len(list) - i 60 | break 61 | if insert_pos == max_node_num: 62 | insert_flag = False 63 | if insert_flag == True: 64 | list.insert(insert_pos, p) 65 | for i in range(max_node_num, len(list)): 66 | list.pop() 67 | return insert_flag 68 | 69 | def add_Qk_list(insert_interval, interval_num, path_list, v_list, s_list, param_V, param_S, param_first_click, first_flag, max_qk_length, max_insert_num, position_limit, max_node_num, basic_flag): 70 | if len(s_list) == 0: 71 | return 72 | in_sert_flag = True 73 | if basic_flag == 1: 74 | basic_path = copy_path(v_list, s_list) 75 | compute_P_given_A(basic_path, param_V, param_S, param_first_click, first_flag, position_limit) 76 | in_sert_flag = insert_path_list(path_list, basic_path, max_node_num) 77 | if in_sert_flag == False: 78 | return 79 | if max_insert_num <= 0: 80 | return 81 | insert_begin = 0 82 | insert_end = 0 83 | interval_index = 0 84 | insert_flag = 0 85 | for i in range(1, len(s_list)): 86 | if s_list[i] == 0: 87 | insert_begin = insert_end 88 | insert_end = i 89 | if interval_index == insert_interval: 90 | insert_flag = 1 91 | break 92 | interval_index = interval_index + 1 93 | if insert_flag == 0: 94 | return 95 | #print str(insert_begin) + " - " + str(insert_end) 96 | # for j in range(insert_begin, insert_end + 1): 97 | # print v_list[j] 98 | for i in range(0, position_limit): 99 | occur = 0 100 | for j in range(insert_begin, insert_end + 1): 101 | if i == v_list[j]: 102 | occur = 1 103 | if occur == 1 or insert_end - insert_begin + 2 > max_qk_length: 104 | continue 105 | v_list.insert(insert_end, i) 106 | s_list.insert(insert_end, 1) 107 | #print path.tostring() 108 | add_Qk_list(insert_interval, interval_num, path_list, v_list, s_list, param_V, param_S, param_first_click, first_flag, max_qk_length, max_insert_num -1, position_limit, max_node_num, 1) 109 | del v_list[insert_end] 110 | del s_list[insert_end] 111 | add_Qk_list(insert_interval + 1, interval_num, path_list, v_list, s_list, param_V, param_S, param_first_click, first_flag, max_qk_length, max_insert_num, position_limit, max_node_num, 0) 112 | -------------------------------------------------------------------------------- /bin/POM_function.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/bin/POM_function.pyc -------------------------------------------------------------------------------- /bin/config_sample.py: -------------------------------------------------------------------------------- 1 | MAX_ITERATIONS = 40 2 | MAX_ITERATION_POM = 5 #poor performance on large iteration rounds 3 | MAX_ITERATION_THCM = 10 #poor performance on large iteration rounds 4 | DEBUG = False 5 | PRETTY_LOG = True 6 | CLASS_K = 1 7 | DAY_D = 1 8 | TEST_MODELS = ['PSCM', 'UBM', 'DBN','THCM','TCM', 'POM'] 9 | 10 | 11 | MIN_DOCS_PER_QUERY = 10 12 | MAX_DOCS_PER_QUERY = 10 13 | 14 | SERP_SIZE = 10 15 | EXTENDED_LOG_FORMAT = False 16 | 17 | TRANSFORM_LOG = False 18 | QUERY_INDEPENDENT_PAGER = False 19 | 20 | TRAIN_FOR_METRIC = False 21 | PRINT_EBU_STATS = True 22 | 23 | MAX_INSERT_NUM = 3 24 | MAX_QK_LENGTH = 2 25 | MAX_TOP_N = 50 26 | MAX_POM_CHAIN = 5 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /bin/config_sample.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/bin/config_sample.pyc -------------------------------------------------------------------------------- /bin/safe_math.py: -------------------------------------------------------------------------------- 1 | import sys, os, re, math, random 2 | 3 | def safe_log(num, base): 4 | if num <= 1e-6: 5 | return -20.0 6 | if num >= 1.0: 7 | return 0.0 8 | return math.log(num, base) -------------------------------------------------------------------------------- /bin/safe_math.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/bin/safe_math.pyc -------------------------------------------------------------------------------- /bin/wc_common.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | def arr_string(arr, sep="\t"): 4 | info = "" 5 | if len(arr) > 0: 6 | info += str(arr[0]) 7 | for i in range(1, len(arr)): 8 | info += sep + str(arr[i]) 9 | return info 10 | 11 | def string_arr(str, sep, process_function): 12 | arr = str.strip().split(sep) 13 | ret = [] 14 | for i in range(0, len(arr)): 15 | if process_function == "int": 16 | ret.append(int(arr[i])) 17 | elif process_function == "float": 18 | if arr[i] == "": 19 | ret.append(0.0) 20 | else: 21 | ret.append(float(arr[i])) 22 | else: 23 | ret.append(arr[i]) 24 | return ret 25 | 26 | def matrix_string(arr, inner_sep="\t", outer_sep="\n"): 27 | info = "" 28 | for i in range(0, len(arr)): 29 | info += arr_string(arr[i], inner_sep) 30 | info += outer_sep 31 | return info 32 | 33 | def arr_string_index(arr, index, sep="\t"): 34 | info = "" 35 | if len(arr) > 0: 36 | info += str(arr[0][index]) 37 | for i in range(1, len(arr)): 38 | info += sep + str(arr[i][index]) 39 | return info 40 | 41 | def matrix_string_index(arr, index, inner_sep="\t", outer_sep="\n"): 42 | info = "" 43 | for i in range(0, len(arr)): 44 | info += arr_string_index(arr[i], index, inner_sep) 45 | info += outer_sep 46 | return info 47 | 48 | class Rect: 49 | x1 = 0 50 | x2 = 0 51 | y1 = 0 52 | y2 = 0 53 | def __init__(self, x1, y1, x2, y2): 54 | self.x1 = x1 55 | self.x2 = x2 56 | self.y1 = y1 57 | self.y2 = y2 58 | def in_rect(self, x, y): 59 | return x >= self.x1 and x < self.x2 and y >= self.y1 and y < self.y2 60 | def to_string(self): 61 | return str(self.x1) + "," + str(self.x2) + "," + str(self.y1) + "," + str(self.y2) 62 | 63 | def init_22(): 64 | ret = [] 65 | ret.append([]) 66 | ret.append([]) 67 | ret[0].append(0) 68 | ret[0].append(0) 69 | ret[1].append(0) 70 | ret[1].append(0) 71 | return ret 72 | 73 | def print_22(title, row, column, matrix): 74 | print title + "\t" + column + "=0\t" + column + "=1" 75 | print row + "=0\t" + str(matrix[0][0]) + "\t" + str(matrix[0][1]) 76 | print row + "=1\t" + str(matrix[1][0]) + "\t" + str(matrix[1][1]) 77 | 78 | def print_22_info(title, row, column, matrix): 79 | info = "" 80 | info += title + "\t" + column + "=0\t" + column + "=1" + '\n' 81 | info += row + "=0\t" + str(matrix[0][0]) + "\t" + str(matrix[0][1]) + '\n' 82 | info += row + "=1\t" + str(matrix[1][0]) + "\t" + str(matrix[1][1]) + '\n' 83 | return info 84 | 85 | def print_22_trans(title, row, column, matrix): 86 | print title + "\t" + row + "=0\t" + row + "=1" 87 | #print column + "=0\t" + str(matrix[0][0]) + "\t" + str(matrix[1][0]) 88 | #print column + "=1\t" + str(matrix[0][1]) + "\t" + str(matrix[1][1]) 89 | print str(matrix[0][0]) + "\t" + str(matrix[1][0]) 90 | print str(matrix[0][1]) + "\t" + str(matrix[1][1]) 91 | 92 | def set_string(arr, list): 93 | info = "" 94 | if len(list) > 0: 95 | info += str(arr[list[0]]) 96 | for i in range(1, len(arr)): 97 | info += "\t" + str(arr[list[i]]) 98 | return info 99 | 100 | def get_index(name, arr): 101 | for i in range(0, len(arr)): 102 | if name == arr[i]: 103 | return i 104 | print "Get index error: " + str(name) 105 | return 0 106 | 107 | def load_valid_user(filename): 108 | user_set = {} 109 | user_list = [] 110 | in_file = open(filename) 111 | in_file.readline() 112 | line_list = in_file.readlines() 113 | in_file.close() 114 | for line in line_list: 115 | arr = line.strip().split("\t") 116 | user = arr[0] 117 | if user != "": 118 | user_set[user] = arr[1:] 119 | user_list.append(user) 120 | return (user_set, user_list) 121 | 122 | def load_mouse_feature(filename): 123 | mouse_feature_set = {} 124 | in_file = open(filename) 125 | mouse_feature_name_list = in_file.readline().strip().split("\t") 126 | line_list = in_file.readlines() 127 | in_file.close() 128 | user_index = get_index("user_id", mouse_feature_name_list) 129 | page_index = get_index("index", mouse_feature_name_list) 130 | result_index_index = get_index("rank", mouse_feature_name_list) 131 | for line in line_list: 132 | arr = line.strip().split("\t") 133 | if len(arr) == len(mouse_feature_name_list): 134 | user = arr[user_index] 135 | index = int(arr[page_index]) 136 | result_index = int(arr[result_index_index]) 137 | if not mouse_feature_set.has_key(user): 138 | mouse_feature_set[user] = {} 139 | if not mouse_feature_set[user].has_key(index): 140 | mouse_feature_set[user][index] = {} 141 | mouse_feature_set[user][index][result_index] = arr 142 | return (mouse_feature_set, mouse_feature_name_list) 143 | 144 | def load_mouse_feature_arff(filename): 145 | mouse_feature_set = {} 146 | in_file = open(filename) 147 | mouse_feature_name_list = [] 148 | while True: 149 | line = in_file.readline() 150 | if line.startswith("@data"): 151 | break 152 | if line.startswith("@attribute"): 153 | arr = line.strip().split(" ") 154 | mouse_feature_name_list.append(arr[1]) 155 | line_list = in_file.readlines() 156 | in_file.close() 157 | user_index = get_index("user_id", mouse_feature_name_list) 158 | page_index = get_index("index", mouse_feature_name_list) 159 | result_index_index = get_index("rank", mouse_feature_name_list) 160 | for line in line_list: 161 | arr = line.strip().split(",") 162 | if len(arr) == len(mouse_feature_name_list): 163 | user = arr[user_index] 164 | index = int(arr[page_index]) 165 | result_index = int(arr[result_index_index]) 166 | if not mouse_feature_set.has_key(user): 167 | mouse_feature_set[user] = {} 168 | if not mouse_feature_set[user].has_key(index): 169 | mouse_feature_set[user][index] = {} 170 | mouse_feature_set[user][index][result_index] = arr 171 | return (mouse_feature_set, mouse_feature_name_list) 172 | 173 | def load_result_coordinate(result_coordinate_file_name): 174 | #load each result's area 175 | index_coordinate_set = {} 176 | index_result_num_set = {} 177 | result_coordinate_file = open(result_coordinate_file_name) 178 | result_coordinate_file.readline() 179 | result_coordinate_file.readline() 180 | while True: 181 | line = result_coordinate_file.readline() 182 | if not line: 183 | break 184 | list = line.strip().split("\t") 185 | if len(list) != 2: 186 | continue 187 | index = int(list[0]) 188 | result_num = int(list[1]) 189 | if not index_coordinate_set.has_key(index): 190 | index_coordinate_set[index] = [] 191 | index_result_num_set[index] = result_num 192 | for i in range(0, result_num): 193 | rect_list = result_coordinate_file.readline().strip().split("\t") 194 | result_rect = Rect(int(rect_list[0]), int(rect_list[1]) , int(rect_list[0]) + int(rect_list[2]), int(rect_list[1]) + int(rect_list[3])) 195 | index_coordinate_set[index].append(result_rect) 196 | result_coordinate_file.close() 197 | return (index_coordinate_set, index_result_num_set) 198 | 199 | def load_human_relevance_binary_label(filename):#median label 200 | relevance_set = {} 201 | in_file = open(filename) 202 | info_list = in_file.readline().strip().split("\t") 203 | people_num = len(info_list) - 2 204 | line_list = in_file.readlines() 205 | in_file.close() 206 | for line in line_list: 207 | arr = line.strip().split("\t") 208 | if len(arr) != len(info_list): 209 | continue 210 | index = int(arr[0]) 211 | result_index = int(arr[1]) 212 | label_list = [] 213 | for i in range(0, people_num): 214 | label_list.append(int(arr[2 + i])) 215 | label_list = sorted(label_list, key=lambda x : x, reverse=False) 216 | final_label = label_list[people_num / 2] 217 | if final_label > 2: 218 | final_label = 1 219 | else: 220 | final_label = 0 221 | if not relevance_set.has_key(index): 222 | relevance_set[index] = {} 223 | relevance_set[index][result_index] = final_label 224 | return relevance_set 225 | 226 | 227 | #compare two examine 228 | #eyetracking data is real value 229 | #check data is predict value 230 | # eyetracking 231 | # p(examine) n(not examine) 232 | # check p' check_p_eye_p(TP) check_p_eye_n(FP) 233 | # n' check_n_eye_p(FN) check_n_eye_n(TN) 234 | def compute_ROC(check_p_eye_p, check_p_eye_n, check_n_eye_p, check_n_eye_n): 235 | TP = float(check_p_eye_p) 236 | TN = float(check_n_eye_n) 237 | FP = float(check_p_eye_n) 238 | FN = float(check_n_eye_p) 239 | P = TP + FN 240 | N = TN + FP 241 | accuracy = (TP + TN) / (P + N) 242 | precision = (TP) / (TP + FP) 243 | recall = (TP) / P 244 | F = (2.0 * precision * recall) / (precision + recall) 245 | MCC = (TP * TN - FP * FN) / (math.sqrt(TP +FP) * math.sqrt(TP +FN) * math.sqrt(TN +FP) * math.sqrt(TN +FN)) 246 | PRa = (TP + TN) / (P + N) 247 | PRe = ((TP + FP) / (P + N)) * ((TP + FN) / (P + N)) + ((FP + TN) / (P + N)) * ((FN + TN) / (P + N)) 248 | Kappa = (PRa - PRe) / (1.0 - PRe) 249 | ret = "" 250 | ret = ret + "--ROC--" + "\n" 251 | ret = ret + "\t\teyetracking" + "\n" 252 | ret = ret + "\t\tp(examine)\tn(not examine)" + "\n" 253 | ret = ret + "check\tp'\t" + str(check_p_eye_p) + "\t" + str(check_p_eye_n) + "\n" 254 | ret = ret + "\tn'\t" + str(check_n_eye_p) + "\t" + str(check_n_eye_n) + "\n" 255 | ret = ret + "accuracy\t" + str(accuracy) + "\n" 256 | ret = ret + "precision\t" + str(precision) + "\n" 257 | ret = ret + "recall\t" + str(recall) + "\n" 258 | ret = ret + "F Measure\t" + str(F) + "\n" 259 | ret = ret + "MCC\t" + str(MCC) + "\n" 260 | ret = ret + "Kappa\t" + str(Kappa) + "\n" 261 | return ret 262 | 263 | def compute_ALL(TN, FP, FN, TP): 264 | TP = float(TP) 265 | TN = float(TN) 266 | FP = float(FP) 267 | FN = float(FN) 268 | P = TP + FN 269 | N = TN + FP 270 | FPR = 0.0 271 | if FP + TN > 0: 272 | FPR = FP / (FP + TN) 273 | accuracy = 0.0 274 | if (P + N) > 0: 275 | accuracy = (TP + TN) / (P + N) 276 | precision = 0 277 | if TP + FP > 0: 278 | precision = (TP) / (TP + FP) 279 | recall = 0 280 | if P > 0: 281 | recall = (TP) / P 282 | F = 0.0 283 | if precision + recall > 0: 284 | F = (2.0 * precision * recall) / (precision + recall) 285 | Kappa = 0.0 286 | MCC = 0.0 287 | if P + N > 0: 288 | #MCC = (TP * TN - FP * FN) / (math.sqrt(TP +FP) * math.sqrt(TP +FN) * math.sqrt(TN +FP) * math.sqrt(TN +FN)) 289 | PRa = (TP + TN) / (P + N) 290 | PRe = ((TP + FP) / (P + N)) * ((TP + FN) / (P + N)) + ((FP + TN) / (P + N)) * ((FN + TN) / (P + N)) 291 | if abs(1.0 - PRe) > 0: 292 | Kappa = (PRa - PRe) / (1.0 - PRe) 293 | return (accuracy, precision, recall, F, Kappa, FPR) 294 | 295 | def compute_avg_var(list): 296 | avg = 0.0 297 | var = 0.0 298 | N = len(list) 299 | if N > 0: 300 | for i in range(0, N): 301 | avg += list[i] 302 | avg /= N 303 | for i in range(0, N): 304 | var += (list[i] - avg) * (list[i] - avg) 305 | var /= N 306 | return (avg, var) 307 | 308 | def compute_correlation(x, y, max_len): 309 | length = max_len 310 | if len(x) < length: 311 | length = len(x) 312 | if len(y) < length: 313 | length = len(y) 314 | if length == 0: 315 | return 0.0 316 | x_avg = 0.0 317 | y_avg = 0.0 318 | for i in range(0, length): 319 | x_avg = x_avg + x[i] 320 | y_avg = y_avg + y[i] 321 | x_avg = x_avg / length 322 | y_avg = y_avg / length 323 | a = 0.0 324 | b = 0.0 325 | c = 0.0 326 | for i in range(0, length): 327 | a = a + (x[i] - x_avg) * (y[i] - y_avg) 328 | b = b + (x[i] - x_avg) * (x[i] - x_avg) 329 | c = c + (y[i] - y_avg) * (y[i] - y_avg) 330 | if b <= 0 or c <= 0: 331 | # print "X = " + str(x) 332 | # print "Y = " + str(y) 333 | return 0.0 334 | return (a / math.sqrt(b * c)) 335 | 336 | def load_arff_line(file_name): 337 | in_file = open(file_name) 338 | line_list = in_file.readlines() 339 | in_file.close() 340 | head_list = [] 341 | data_list = [] 342 | head_flag = 1 343 | for line in line_list: 344 | if line.startswith("@data"): 345 | head_flag = 0 346 | continue 347 | if head_flag == 1: 348 | head_list.append(line) 349 | else: 350 | data_list.append(line) 351 | return (head_list, data_list) -------------------------------------------------------------------------------- /bin/wc_common.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/bin/wc_common.pyc -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/DBN_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/DBN_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/POM_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/POM_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/PSCM_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/PSCM_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/TCM_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/TCM_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/THCM_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/THCM_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/model_result/UBM_result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/model_result/UBM_result.txt -------------------------------------------------------------------------------- /relevance_test_dataset/data/relevance_label.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/relevance_test_dataset/data/relevance_label.txt -------------------------------------------------------------------------------- /relevance_test_dataset/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | 1. data/relevance_label.txt : human labeled query-relevance file 3 | format: query url relevance 4 | 2. data/model_result/ : each model's output relevance 5 | format: query url model_output_relevance_score relevance_label_in_relevance_label.txt 6 | 7 | 8 | -------------------------------------------------------------------------------- /sample/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/sample/.DS_Store -------------------------------------------------------------------------------- /sample/output/_DBN.model: -------------------------------------------------------------------------------- 1 | DBN 2 | LogLikelihood -0.268750970587 3 | Perplexity 1.33332917852 4 | positionPerplexity 1.92859781235 1.65805856396 1.5011797714 1.40660536996 1.23900212094 1.19437064713 1.13020308874 1.10040835275 1.09001822271 1.08484783526 5 | positionPerplexitySkip 1.93565931479 1.34177315361 1.27300991973 1.20212029385 1.10714735702 1.07984293302 1.04744970827 1.031658275 1.02147622384 1.0162527429 6 | positionPerplexityClick 1.92121658227 4.23212196391 4.62189139303 5.68891807783 7.98446502426 9.50664880751 12.4592594355 14.2559551109 25.8494730509 35.2015180249 7 | 8 | +++++DBN: 9 | -------------------------------------------------------------------------------- /sample/output/_DBNModel.model: -------------------------------------------------------------------------------- 1 | DBNModel 2 | LogLikelihood -0.268750970587 3 | Perplexity 1.33332917852 4 | positionPerplexity 1.92859781235 1.65805856396 1.5011797714 1.40660536996 1.23900212094 1.19437064713 1.13020308874 1.10040835275 1.09001822271 1.08484783526 5 | positionPerplexitySkip 1.93565931479 1.34177315361 1.27300991973 1.20212029385 1.10714735702 1.07984293302 1.04744970827 1.031658275 1.02147622384 1.0162527429 6 | positionPerplexityClick 1.92121658227 4.23212196391 4.62189139303 5.68891807783 7.98446502426 9.50664880751 12.4592594355 14.2559551109 25.8494730509 35.2015180249 7 | 8 | +++++DBN: 9 | -------------------------------------------------------------------------------- /sample/output/_POM.model: -------------------------------------------------------------------------------- 1 | POM 2 | LogLikelihood -0.34534060654 3 | Perplexity 1.50782923987 4 | positionPerplexity 3.45677206192 1.71008654133 1.48664861466 1.38853142982 1.24626719247 1.21494220648 1.16754703405 1.14710177215 1.13553669444 1.12485885138 5 | positionPerplexitySkip 1.08865023879 1.08747302944 1.08332845055 1.08290128912 1.07975766254 1.0783787359 1.07708682623 1.07419708415 1.07320691768 1.06968399493 6 | positionPerplexityClick 11.6182401136 12.6887286756 12.8746957832 12.6742938908 13.3960048828 14.13153201 14.8849984937 15.5559108162 17.8007995209 16.3949198413 7 | 8 | +++++V: 9 | 0.0 8.90096758955e-06 4.04286637603e-07 5.26813806918e-08 2.25213046034e-09 3.14483663058e-10 1.16415847287e-10 1.76371470641e-12 7.67335045744e-11 2.5962942876e-13 10 | 3.44226897874e-05 0.0 2.6057573024e-05 1.79116353869e-06 2.29998590313e-07 2.34221812048e-08 4.67479758842e-10 1.84474938174e-10 5.61802811654e-11 2.77364522734e-10 11 | 1.85557727267e-06 6.66468271701e-08 0.0 7.99764757849e-06 5.71768579521e-07 1.24710396478e-08 2.48262468405e-09 1.28393961255e-08 1.02388737797e-09 3.11277559679e-12 12 | 1.87550728e-06 1.05150543054e-08 3.07496399744e-08 0.0 3.79329879259e-06 2.67812482506e-07 2.89594176666e-09 1.40750584769e-07 1.71658585903e-09 1.76020212045e-08 13 | 4.21192682267e-07 5.53136644668e-09 3.66549057179e-10 1.43686981229e-07 0.0 1.90534908183e-05 7.69977061153e-07 7.50226734625e-07 4.21536708111e-19 5.87742585676e-20 14 | 4.75207611416e-09 2.80501854151e-09 2.66675771729e-28 4.08963204197e-07 5.50636306184e-09 0.0 1.89617524312e-06 9.19260484075e-06 2.77421231745e-07 2.52208247854e-28 15 | 2.42165460341e-08 2.2862244432e-07 7.5949246253e-09 7.53922938399e-09 2.81879289582e-06 8.63372959395e-07 0.0 1.64077999211e-05 1.39130606525e-07 1.28913826358e-07 16 | 1.88020965097e-08 2.3091807689e-38 1.01311536604e-21 2.45872880055e-43 1.19297070359e-07 1.38688704043e-08 1.35325549445e-08 0.0 1.0020429948e-07 4.63794611207e-06 17 | 2.93594378441e-06 1.00455500175e-07 3.16848690599e-20 8.01966072008e-29 9.40697038512e-08 7.35098591587e-38 1.07576274248e-07 1.65657669235e-07 0.0 1.04052579643e-06 18 | 3.10365258071e-07 4.30598663257e-18 7.09519818798e-22 9.49021990158e-47 1.57285023164e-57 7.25895146543e-08 2.10352901329e-16 3.25851159562e-07 3.37619379128e-21 0.0 19 | -------------------------------------------------------------------------------- /sample/output/_PSCM.model: -------------------------------------------------------------------------------- 1 | PSCM 2 | LogLikelihood -0.204372996957 3 | Perplexity 1.23005190288 4 | positionPerplexity 1.38257668755 1.35908356213 1.32075807637 1.27105926762 1.2097732569 1.18684544533 1.15172492754 1.14048985036 1.13394937407 1.14425858092 5 | positionPerplexitySkip 1.58729526072 1.28074315885 1.23961424442 1.18610476846 1.13832057875 1.12309306364 1.1107702391 1.10820589308 1.10852432685 1.11769456075 6 | positionPerplexityClick 1.19610786214 1.76766299012 2.03540173018 2.35178106586 3.31519605452 3.69683150317 3.61137145109 3.56652766194 3.42535026038 3.99909227932 7 | 8 | +++++Gamma: 9 | i : 0 10 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 11 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 12 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 13 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 14 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 15 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 16 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 17 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 18 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 19 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 20 | 0.999651689307 0.00214854655344 0.00348010693311 0.00551857920922 0.0130711077906 0.0190011434586 0.0289296436761 0.0323399369852 0.0527245197121 0.0545254077439 0.5 21 | i : 1 22 | 0.5 0.997674418605 0.01111217743 0.01603341848 0.0281247889163 0.0477396610901 0.0597454448968 0.109929034465 0.0625763789712 0.16641096036 0.5 23 | 0.0190141455896 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 24 | 0.0469856143265 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 25 | 0.059938364581 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 26 | 0.112111763086 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 27 | 0.20987080967 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 28 | 0.20653042629 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 29 | 0.160318186598 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 30 | 0.1966303068 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 31 | 0.177637356221 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 32 | 0.5 0.998613037448 0.00530869944092 0.0078643752273 0.0170655518098 0.0263057366406 0.0334362419999 0.037555261513 0.0595264158238 0.0679172300793 0.5 33 | i : 2 34 | 0.5 0.5 0.994949494949 0.0165220417152 0.0301177303221 0.0528204972604 0.0584539583267 0.116507628374 0.0598228065664 0.186186474157 0.5 35 | 0.5 0.5 0.995515695067 0.0178077916033 0.0390908363474 0.0521736796366 0.127319433968 0.159621292879 0.215662221658 0.126587858295 0.5 36 | 0.0395454743077 0.0462835205326 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 37 | 0.0629610991972 0.0865669037496 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 38 | 0.149191093813 0.156981061621 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 39 | 0.207087571877 0.249878640381 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 40 | 0.246292011336 0.292977767228 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 41 | 0.214793647298 0.39620399563 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 42 | 0.185538983362 0.24182047196 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 43 | 0.220085206397 0.350706809015 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 44 | 0.5 0.5 0.997613365155 0.00813670450286 0.0176130812444 0.0267717299555 0.035540402083 0.0394506197544 0.0659031297076 0.0779423755478 0.5 45 | i : 3 46 | 0.5 0.5 0.5 0.992857142857 0.0335009566065 0.05747645313 0.0703636903794 0.114038062935 0.0660368513853 0.191444890285 0.5 47 | 0.5 0.5 0.5 0.9921875 0.0431111305154 0.0597743720934 0.134602926657 0.167742230353 0.215701675403 0.132115520212 0.5 48 | 0.5 0.5 0.5 0.992805755396 0.038128632001 0.0678381440224 0.0696667369902 0.0968956460386 0.177900808021 0.255966690866 0.5 49 | 0.0617688453888 0.077902601455 0.0727568271826 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 50 | 0.156180280677 0.188320543228 0.208167981368 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 51 | 0.253026784816 0.266811274569 0.257363644853 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 52 | 0.292023899901 0.343881773893 0.40400818707 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 53 | 0.20506814124 0.366134702762 0.421424806993 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 54 | 0.234733377543 0.316453092248 0.359170344176 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 55 | 0.256733457543 0.376221179396 0.336078794632 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 56 | 0.5 0.5 0.5 0.996336996337 0.018734676248 0.0298208209919 0.0386238971547 0.0408210158332 0.0704224369159 0.0781353584502 0.5 57 | i : 4 58 | 0.5 0.5 0.5 0.5 0.986111111111 0.063599549244 0.0773353730052 0.134267939849 0.0744278927094 0.226600047073 0.5 59 | 0.5 0.5 0.5 0.5 0.981132075472 0.0639062296604 0.146392525096 0.18161197252 0.230115245013 0.140637858712 0.5 60 | 0.5 0.5 0.5 0.5 0.984848484848 0.0740265327795 0.0785658567108 0.104928708572 0.165102075581 0.300103373929 0.5 61 | 0.5 0.5 0.5 0.5 0.990196078431 0.0468722495219 0.119423552148 0.10400496628 0.210311029866 0.130850820814 0.5 62 | 0.153187661437 0.164376479554 0.205202802288 0.138506254331 0.5 0.5 0.5 0.5 0.5 0.5 0.5 63 | 0.281412516073 0.377381871593 0.270455410313 0.224731628973 0.5 0.5 0.5 0.5 0.5 0.5 0.5 64 | 0.343794276672 0.33841688628 0.430185214708 0.450182508644 0.5 0.5 0.5 0.5 0.5 0.5 0.5 65 | 0.251749918694 0.366215568119 0.454057449828 0.376981629891 0.5 0.5 0.5 0.5 0.5 0.5 0.5 66 | 0.248344239827 0.33502288213 0.416014747387 0.370733909154 0.5 0.5 0.5 0.5 0.5 0.5 0.5 67 | 0.30736495145 0.396521836978 0.395768954375 0.3889864697 0.5 0.5 0.5 0.5 0.5 0.5 0.5 68 | 0.5 0.5 0.5 0.5 0.992063492063 0.0349449638523 0.0427049525226 0.0434189817975 0.0730677609593 0.0845428529341 0.5 69 | i : 5 70 | 0.5 0.5 0.5 0.5 0.5 0.976744186047 0.0796838192452 0.144612271143 0.0748346639971 0.214763071625 0.5 71 | 0.5 0.5 0.5 0.5 0.5 0.975609756098 0.158585244897 0.164317709365 0.23308888768 0.138486306488 0.5 72 | 0.5 0.5 0.5 0.5 0.5 0.972222222222 0.0806875027455 0.107494124586 0.181421048582 0.296762576505 0.5 73 | 0.5 0.5 0.5 0.5 0.5 0.983870967742 0.122137330609 0.102338053212 0.205140118698 0.137568382803 0.5 74 | 0.5 0.5 0.5 0.5 0.5 0.985507246377 0.0870888456755 0.112392924003 0.197451512082 0.214931231045 0.5 75 | 0.249208495813 0.342138263545 0.25626921009 0.160491045057 0.179681487292 0.5 0.5 0.5 0.5 0.5 0.5 76 | 0.396938104535 0.364005836017 0.427952276304 0.416137106191 0.231455657105 0.5 0.5 0.5 0.5 0.5 0.5 77 | 0.24975703776 0.366866867582 0.482810368569 0.375384255519 0.337911795639 0.5 0.5 0.5 0.5 0.5 0.5 78 | 0.247397134416 0.330204375452 0.41188785209 0.425200146919 0.346275920376 0.5 0.5 0.5 0.5 0.5 0.5 79 | 0.353673724991 0.398411143313 0.384874861566 0.359262120936 0.365945551172 0.5 0.5 0.5 0.5 0.5 0.5 80 | 0.5 0.5 0.5 0.5 0.5 0.9875 0.0430939884466 0.0450410585043 0.0739409124186 0.090870067628 0.5 81 | i : 6 82 | 0.5 0.5 0.5 0.5 0.5 0.5 0.971428571429 0.13343382814 0.0713814370954 0.23301188585 0.5 83 | 0.5 0.5 0.5 0.5 0.5 0.5 0.941176470588 0.177560138246 0.230443918202 0.145902784638 0.5 84 | 0.5 0.5 0.5 0.5 0.5 0.5 0.96875 0.106133295244 0.177502176749 0.298597657054 0.5 85 | 0.5 0.5 0.5 0.5 0.5 0.5 0.958333333333 0.100860085018 0.225165246083 0.141527598006 0.5 86 | 0.5 0.5 0.5 0.5 0.5 0.5 0.969696969697 0.118429459072 0.216555348267 0.214829885798 0.5 87 | 0.5 0.5 0.5 0.5 0.5 0.5 0.980392156863 0.0952755111414 0.0930277498643 0.367414279809 0.5 88 | 0.360769111415 0.290486119962 0.402677934762 0.411509127803 0.210828922289 0.213835587255 0.5 0.5 0.5 0.5 0.5 89 | 0.243399436057 0.398377249587 0.490601524738 0.383577976047 0.35166146247 0.27941354888 0.5 0.5 0.5 0.5 0.5 90 | 0.278199264095 0.340674931892 0.394711759459 0.428517101601 0.382099952444 0.381817594202 0.5 0.5 0.5 0.5 0.5 91 | 0.331006795893 0.407490163895 0.391925975984 0.359262120936 0.366637013722 0.464510725891 0.5 0.5 0.5 0.5 0.5 92 | 0.5 0.5 0.5 0.5 0.5 0.5 0.983050847458 0.0450214616231 0.0740684597255 0.09149146547 0.5 93 | i : 7 94 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.941176470588 0.0757838917927 0.231983859217 0.5 95 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.916666666667 0.236683970098 0.149961557246 0.5 96 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.952380952381 0.185764542719 0.301808058289 0.5 97 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.954545454545 0.220363305896 0.13528243736 0.5 98 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.964285714286 0.21132140631 0.21129982169 0.5 99 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.967741935484 0.0957822151716 0.382698277371 0.5 100 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.978260869565 0.101328594127 0.108812380671 0.5 101 | 0.215840692652 0.339683057338 0.493074794784 0.329641920317 0.272606084927 0.26705408963 0.225800003501 0.5 0.5 0.5 0.5 102 | 0.263752968138 0.328481055755 0.400595325616 0.436461660299 0.376993936811 0.37111309738 0.321789792364 0.5 0.5 0.5 0.5 103 | 0.363965660252 0.378489028801 0.393956862738 0.389495459512 0.366718120366 0.462018756659 0.268899959098 0.5 0.5 0.5 0.5 104 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.981818181818 0.0747140262912 0.0862560388423 0.5 105 | i : 8 106 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.96875 0.232118553393 0.5 107 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.888888888889 0.149204092626 0.5 108 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.923076923077 0.3124118341 0.5 109 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.923076923077 0.14106000881 0.5 110 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.928571428571 0.215020358271 0.5 111 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.962962962963 0.383186878901 0.5 112 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.961538461538 0.106029036554 0.5 113 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.975 0.102682325264 0.5 114 | 0.229638978328 0.264783835887 0.398866592572 0.376901006542 0.327201451266 0.35499233302 0.307292930776 0.213716902224 0.5 0.5 0.5 115 | 0.373560030825 0.410595898915 0.394891717748 0.411570231207 0.371201427318 0.466109668735 0.331657920179 0.246898454645 0.5 0.5 0.5 116 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.96875 0.0896842433044 0.5 117 | i : 9 118 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.909090909091 0.5 119 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.928571428571 0.5 120 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.875 0.5 121 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.933333333333 0.5 122 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.888888888889 0.5 123 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.833333333333 0.5 124 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.954545454545 0.5 125 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.96 0.5 126 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.967741935484 0.5 127 | 0.305366521591 0.351374802145 0.33603745001 0.359262120936 0.339683057338 0.4166411194 0.255235837726 0.197239226301 0.221295517334 0.5 0.5 128 | 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.965517241379 0.5 129 | -------------------------------------------------------------------------------- /sample/output/_TCM.model: -------------------------------------------------------------------------------- 1 | TCM 2 | LogLikelihood -0.412542848671 3 | Perplexity 1.75989467278 4 | positionPerplexity 5.62152735478 1.96205258439 1.60779794696 1.46500594252 1.25663292419 1.21119243018 1.14706552893 1.12286411062 1.10554909283 1.09925881246 5 | positionPerplexitySkip 1.03093768332 1.03002917611 1.03095865931 1.03104391349 1.03094432531 1.0309862092 1.03093666373 1.03095807117 1.03093752498 1.03096182478 6 | positionPerplexityClick 33.3209493236 34.0198292373 33.3127257107 33.3337810658 33.3291195423 33.3332721401 33.3242091473 33.328658631 33.33728031 33.5071987899 7 | 8 | ++++alpha beta gamma log_likelihood: 9 | -0.32 0.915767617962 0.915767617962 -8382.33034452 10 | 0.32 0.337336644547 0.337336644547 -5085.17764209 11 | 0.25 0.167324838171 0.167324838171 -3028.13779854 12 | 0.35 0.102593331433 0.102593331433 -2099.49387753 13 | 0.24 0.0737694906974 0.0737694906974 -1588.76908179 14 | -------------------------------------------------------------------------------- /sample/output/_THCMOri.model: -------------------------------------------------------------------------------- 1 | THCMOri 2 | LogLikelihood -0.328242137981 3 | Perplexity 1.41580284817 4 | positionPerplexity 2.17279318697 1.68485281534 1.5413429223 1.44481316736 1.30055113125 1.26085628264 1.20975347583 1.1896571129 1.17604999545 1.17735839162 5 | positionPerplexitySkip 2.38258797742 1.44376253685 1.36859731942 1.27927708335 1.1907598345 1.1691813315 1.14414138951 1.14199218961 1.13325147051 1.13852474065 6 | positionPerplexityClick 1.97250616375 3.33796919191 3.46754203029 4.26469371258 5.602530856 5.96072565202 7.03200556261 6.0328399176 7.16598810369 7.02895035281 7 | 8 | alpha: 0.955427126886 9 | gamma: 0.747801604862 10 | -------------------------------------------------------------------------------- /sample/output/_UBM.model: -------------------------------------------------------------------------------- 1 | UBM 2 | LogLikelihood -0.250621373177 3 | Perplexity 1.30759891966 4 | positionPerplexity 1.92011283673 1.59308806521 1.43154195657 1.35026357599 1.20989689037 1.17346235985 1.1266509292 1.09974678291 1.08889463464 1.08233116511 5 | positionPerplexitySkip 1.93680283669 1.22504756035 1.15391760848 1.11727528781 1.06291811092 1.04850727669 1.03482967798 1.03029320497 1.02511692014 1.0187943291 6 | positionPerplexityClick 1.90275629277 5.09721924516 6.22939290265 7.2796237367 10.3313775342 11.9042395266 16.4867379906 14.6621111659 20.646340699 27.1661996686 7 | 8 | +++++Gamma: 9 | M : 0 10 | 0.96317196559 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 11 | 0.417058357466 0.390035767419 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 12 | 0.532508718924 0.174829104072 0.221289685013 0.5 0.5 0.5 0.5 0.5 0.5 0.5 13 | 0.478976785828 0.320012246925 0.112044455348 0.159537601372 0.5 0.5 0.5 0.5 0.5 0.5 14 | 0.471333680745 0.258655397628 0.137343889733 0.0710589946175 0.0982413538764 0.5 0.5 0.5 0.5 0.5 15 | 0.49992810215 0.292195651926 0.110970328432 0.0988110850954 0.0353910292879 0.0601630919786 0.5 0.5 0.5 0.5 16 | 0.546386396482 0.26857792627 0.118296749132 0.154700813816 0.0484557486255 0.0346881905088 0.0382008282062 0.5 0.5 0.5 17 | 0.651198264213 0.277696122466 0.204379812944 0.143029567371 0.0758470242375 0.0293622902401 0.0142086747104 0.0427363387438 0.5 0.5 18 | 0.61363214542 0.316586991058 0.253603755957 0.115768574586 0.0728879680697 0.0612626122735 0.0377192783729 0.0285534987096 0.0213795456865 0.5 19 | 0.558525389346 0.344346236266 0.269308647727 0.073561241644 0.0681998969163 0.0727148034139 0.0334268719148 0.0304247627397 0.00952522081619 0.0183754636613 20 | -------------------------------------------------------------------------------- /sample/query_id: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUIR/PSCMModel/fa86b44d5030148b314268a5f311a49067d44330/sample/query_id -------------------------------------------------------------------------------- /user_preference_test_dataset/query_result_page/css/sessions.txt: -------------------------------------------------------------------------------- 1 | // Place all the styles related to the Sessions controller here. 2 | // They will automatically be included in application.css. 3 | // You can use Sass (SCSS) here: http://sass-lang.com/ 4 | -------------------------------------------------------------------------------- /user_preference_test_dataset/query_result_page/css/static_pages.css.scss: -------------------------------------------------------------------------------- 1 | // Place all the styles related to the StaticPages controller here. 2 | // They will automatically be included in application.css. 3 | // You can use Sass (SCSS) here: http://sass-lang.com/ 4 | -------------------------------------------------------------------------------- /user_preference_test_dataset/query_result_page/css/tasks.txt: -------------------------------------------------------------------------------- 1 | // Place all the styles related to the Tasks controller here. 2 | // They will automatically be included in application.css. 3 | // You can use Sass (SCSS) here: http://sass-lang.com/ 4 | -------------------------------------------------------------------------------- /user_preference_test_dataset/query_result_page/css/users.css.scss: -------------------------------------------------------------------------------- 1 | // Place all the styles related to the Users controller here. 2 | // They will automatically be included in application.css. 3 | // You can use Sass (SCSS) here: http://sass-lang.com/ 4 | -------------------------------------------------------------------------------- /user_preference_test_dataset/query_result_page/pages/中顾集团.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |54 | 搜索内容为 55 | 中顾集团 56 |
57 |PSCM Model
64 |UBM Model
119 | 中顾集团 120 | 121 |54 | 搜索内容为 55 | 佳缘佳信 56 |
57 |PSCM Model
64 |UBM Model
127 | 佳缘佳信_世纪佳缘官方客户端软件,免费下载,轻松和TA聊起来! 128 | 129 |54 | 搜索内容为 55 | 少年魔王 56 |
57 |PSCM Model
64 |UBM Model
127 | 少年魔王最新章节_少年魔王全文阅读_免费小说网 128 | 129 |54 | 搜索内容为 55 | 弓形虫抗体 56 |
57 |PSCM Model
64 |UBM Model
127 | 弓形虫抗体_化验检查 _寻医问药_xywy.com 128 | 129 |54 | 搜索内容为 55 | 搜狐网新闻 56 |
57 |PSCM Model
64 |UBM Model
135 | 搜狐新闻-搜狐 136 | 137 |54 | 搜索内容为 55 | 游民星空 56 |
57 |PSCM Model
64 |UBM Model
119 | 游民星空 - 大型单机游戏媒体 提供最具特色单机游戏资讯、下载 120 | 121 |54 | 搜索内容为 55 | 湖北恒安纸业 56 |
57 |PSCM Model
64 |UBM Model
127 | 恒安(湖北)心相印纸制品有限公司怎么样?(公司评价、点评、印象... 128 | 129 |54 | 搜索内容为 55 | 湖南申湘汽车集团 56 |
57 |PSCM Model
64 |UBM Model
119 | 首页/湖南申湘汽车销售有限责任公司/别克经销商官方网站/上海通用别克 120 | 121 |54 | 搜索内容为 55 | 英文菜单 56 |
57 |PSCM Model
64 |UBM Model
127 | 英文菜单_英文菜单价格_英文菜单厂家_第1页_世界工厂网 128 | 129 |54 | 搜索内容为 55 | 观音生日 56 |
57 |PSCM Model
64 |UBM Model
127 | 观音生日~~~-喀秋莎-搜狐博客 128 | 129 |54 | 搜索内容为 55 | 视频码率与清晰度关系 56 |
57 |PSCM Model
64 |UBM Model
111 | 谈谈码率,帧率,分辨率和清晰度的关系 - Premiere应用与解疑 - DV视... 112 | 113 |54 | 搜索内容为 55 | 鲷鱼的营养价值 56 |
57 |PSCM Model
64 |UBM Model
119 | 鲷鱼的营养价值_好处_营养成分 - 大众养生网 120 | 121 |