"
1014 | ],
1015 | "text/plain": [
1016 | " searched_for hashtags \\\n",
1017 | "5643 #人工知能 トルコ ドローン 神風ドローン 顔認証 ai drone 人工知能 \n",
1018 | "2944 #python 駆け出しエンジニアとつながりたい python プログラミング初心者 \n",
1019 | "210 #kaggle kaggle 機械学習 データサイエンティスト \n",
1020 | "2115 #python 駆け出しエンジニアと繋がりたい プログラミング python プログラミング学習 プログラミ... \n",
1021 | "1275 #データサイエンティスト 統計学 統計学専攻 データサイエンティスト 統計 外資系 外資系企業 \n",
1022 | "... ... ... \n",
1023 | "1666 #python python 機械学習 回帰分析 \n",
1024 | "6165 #人工知能 makeinglandscape deeplearning nowlearning ai g... \n",
1025 | "5210 #python オホーツク fswebcam python イマソラ 北海道 photo raspberrypi \n",
1026 | "873 #人工知能 ai 人工知能 \n",
1027 | "3881 #人工知能 セルフブランディング ai ルーチンワーク 人工知能 \n",
1028 | "\n",
1029 | " hashtag_len \n",
1030 | "5643 7 \n",
1031 | "2944 3 \n",
1032 | "210 3 \n",
1033 | "2115 5 \n",
1034 | "1275 6 \n",
1035 | "... ... \n",
1036 | "1666 3 \n",
1037 | "6165 6 \n",
1038 | "5210 7 \n",
1039 | "873 2 \n",
1040 | "3881 4 \n",
1041 | "\n",
1042 | "[100 rows x 3 columns]"
1043 | ]
1044 | },
1045 | "execution_count": 28,
1046 | "metadata": {},
1047 | "output_type": "execute_result"
1048 | }
1049 | ],
1050 | "source": [
1051 | "df.sample(100)"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": 26,
1057 | "metadata": {},
1058 | "outputs": [],
1059 | "source": [
1060 | "df.to_csv('twitter_post.csv', index=False)"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "code",
1065 | "execution_count": null,
1066 | "metadata": {},
1067 | "outputs": [],
1068 | "source": []
1069 | }
1070 | ],
1071 | "metadata": {
1072 | "kernelspec": {
1073 | "display_name": "Python 3",
1074 | "language": "python",
1075 | "name": "python3"
1076 | },
1077 | "language_info": {
1078 | "codemirror_mode": {
1079 | "name": "ipython",
1080 | "version": 3
1081 | },
1082 | "file_extension": ".py",
1083 | "mimetype": "text/x-python",
1084 | "name": "python",
1085 | "nbconvert_exporter": "python",
1086 | "pygments_lexer": "ipython3",
1087 | "version": "3.7.2"
1088 | }
1089 | },
1090 | "nbformat": 4,
1091 | "nbformat_minor": 4
1092 | }
1093 |
--------------------------------------------------------------------------------
/nlp/twitter_analytics_using_nlplot/wordcloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/takapy0210/geek_blog/e36604f01d26f4d14bdacc6bb2995c929d49fdfa/nlp/twitter_analytics_using_nlplot/wordcloud.png
--------------------------------------------------------------------------------
/other/hatenablog_css/design.css:
--------------------------------------------------------------------------------
1 | /* */
2 | @import "https://blog.hatena.ne.jp/-/theme/8599973812270629022.css";
3 |
4 | /* ブログタイトル文字サイズ変更 */
5 | @media screen and (max-width: 640px){
6 | #blog-title #title {
7 | font-size: 20px !important;
8 | }
9 | }
10 |
11 | /* タイトル下のブログ説明部分 (設定 > 基本設定 > ブログの説明) */
12 | h2#blog-description {
13 | font-size: 12px !important; /* 文字サイズ変更 */
14 | margin: 1px; /* 上下左右に余白追加 */
15 | }
16 |
17 | /* 【タブレット、PC限定】 */
18 | @media screen and (min-width:641px) {
19 | h2#blog-description {
20 | font-size: 18px !important; /* 文字サイズ変更 */
21 | margin: 5px; /* 上下左右に余白追加 */
22 | }
23 | }
24 |
25 | /* */
26 |
27 | /* ### ヘッダーの背景色 ### */
28 | #blog-title{
29 | background: #f8f8ff !important;
30 | }
31 | /* ### ブログタイトルの文字色 ### */
32 | #title a{
33 | color: #カラーコード !important;
34 | }
35 | /* ### ブログ説明の文字色 ### */
36 | #blog-description{
37 | color: #カラーコード !important;
38 | }
39 |
40 | /*
41 | 枠 (div) の設定 (画面幅、境界線、背景色)
42 | -------------------------------------------------------*/
43 | /* 【タブレット、PC限定】 */
44 | @media screen and (min-width:641px) {
45 | /* 左右の余白を削除 */
46 | #container {
47 | width: 80%; /* 本文エリアとサイドバー含めて画面いっぱいに広げる */
48 | }
49 |
50 | /* 本文エリア全体 */
51 | #main {
52 | background-color: #ffffff; /* 本文エリアの背景色 */
53 | /*border: 1px solid #bde6f2;*/ /* 本文エリアの枠線 */
54 | border-radius: 5px; /* 角を丸くする */
55 | float: left; /* サイドバーをfloat leftにするため */
56 | margin: 30px 2% 0 8%;
57 | padding: 30px; /* 本文エリアの文字と枠線の間の余白 */
58 | width: 50%; /* 画面幅50% */
59 | }
60 |
61 | /* サイドバー */
62 | #box2 {
63 | float: left; /* 本文エリアの方に詰める */
64 | margin: 30px 3% 0 2%;
65 | width: 22%; /* 画面幅22% 画面拡大しても崩れにくいよう余裕持たせてます */
66 | }
67 | }
68 |
69 | /*
70 | 基本的なフォント設定 (見出しの設定は、「記事エリアの設定」で行う)
71 | -------------------------------------------------------*/
72 | /* 全体のフォントファミリー指定 */
73 | /* ★ WEBフォントの追加設定 */
74 | .entry-content, .entry-title, body {
75 | font-family: 'Noto Sans', 'Noto Sans JP', 'Hiragino Kaku Gothic ProN', メイリオ, Meiryo, sans-serif;
76 | /*letter-spacing: 0.0005em;*/ /* 字間を0.05字分空けて読みやすくする */
77 | }
78 |
79 | /*
80 | 見出しの設定
81 | -------------------------------------------------------*/
82 | /* 記事タイトルのフォント */
83 | h1.entry-title a {
84 | font-size: 15px;
85 | }
86 |
87 | /* 【タブレット、PC限定】 */
88 | @media screen and (min-width:641px) {
89 | h1.entry-title a {
90 | font-size: 22px;
91 | }
92 | }
93 |
94 | /* 見出し1の設定 */
95 | .entry-content h1 {
96 | padding: 0.6em 0.6em;/*上下 左右の余白*/
97 | border-left: solid 2.5px #ffa8a8;/*左線*/
98 | font-size: 22px;
99 | background: #f9fcff;/*背景色*/
100 | /*padding: 10px 20px 15px 20px;*/
101 | }
102 |
103 | /* 見出し2の設定 */
104 | .entry-content h2 {
105 | padding: 0.2em 0.4em;/*上下 左右の余白*/
106 | color: #494949;/*文字色*/
107 | background: transparent;/*背景透明に*/
108 | border-bottom: solid 1.5px #d3d3d3;
109 | font-size: 20px;
110 | }
111 |
112 | /* 見出し3の設定 */
113 | .entry-content h3 {
114 | padding: 0.2em 0.2em;/*上下 左右の余白*/
115 | color: #494949;/*文字色*/
116 | font-size: 18px;
117 | }
118 |
119 | /*
120 | 強調文字列部分の設定 (マークダウンでいう右記の部分: ** 文字列 **)
121 | -------------------------------------------------------*/
122 | /* 蛍光ペンの設定 */
123 | /* (補足) rgbaの4番目の引数は透明度を表す。0で透明、1で完全に塗りつぶす。transparentは一部ブラウザで黒と解釈されるので使わない */
124 | /* (補足) 60%の数値を両方増やすと蛍光ペンが細くなる。両方減らすと太くなる */
125 | /* (補足) 前半を20%、後半を80%とすると、上から20%の位置を透明、80%の位置を色付きとし、間はグラデーションになる */
126 | /* ■蛍光ペンのデザイン:https://naifix.com/strong-css-sample/ */
127 | .entry-content strong {
128 | background: linear-gradient(rgba(246, 210, 139, 0) 60%, rgba(246, 210, 139, 1) 60%);
129 | border-radius: 2px; /* 角を丸める */
130 | }
131 |
132 |
133 | /* 行間の設定 */
134 | /*.entry-content p {margin:0.1}*/
135 | .entry-content p{
136 | line-height: 1.8em; /* 行間の幅調整*/
137 | letter-spacing:0.4pt; /* 文字の間隔調整*/
138 | /*font-size:15px;*/
139 | }
140 |
141 | /*
142 | 目次の設定
143 | -------------------------------------------------------*/
144 | /* 見出し1に対応する目次の余白調整、および自動採番用の変数セット */
145 | ul.table-of-contents > li {
146 | margin-top: 0.1em;
147 | list-style-type: none;
148 | counter-increment: mokuji-1; /* mokuji-1という変数に1を足す */
149 | counter-reset: mokuji-2; /* mokuji-2という変数の値を0に戻す */
150 | line-height:132%;
151 | }
152 |
153 | /* 見出し1に対応する目次の自動採番 */
154 | ul.table-of-contents > li::before{
155 | content: counter(mokuji-1) ". "; /* 文字列挿入。"1. " のような形式 */
156 | }
157 |
158 | /* 見出し2に対応する目次の余白調整、および自動採番用の変数セット */
159 | ul.table-of-contents ul > li {
160 | list-style-type: none;
161 | margin-top: 0;
162 | counter-increment: mokuji-2;
163 | line-height:132%;
164 | }
165 |
166 | /* 見出し2に対応する目次の自動採番 */
167 | ul.table-of-contents ul > li::before {
168 | content: counter(mokuji-1) "." counter(mokuji-2) ". "; /* 文字列挿入。"1.1. " のような形式 */
169 | }
170 |
171 | /* 見出し3以降に対応する目次を非表示にする */
172 | ul.table-of-contents ul ul {
173 | display: none;
174 | line-height:132%;
175 | }
176 |
177 | .entry-content .table-of-contents {
178 | /*margin: 2em 2em;*/ /*目次上下の余白*/
179 | padding: 3em 0 2em 2em; /*目次下内部余白 上,右,下,左*/
180 | /*margin: 0 0 0 3.5em;*/ /*左側余白*/
181 | /*padding: 0.01em 0 0 0 !important;*/ /*行間余白*/
182 | /*border: 1px solid #ddd;*/ /*枠線のスタイル*/
183 | /*background-color: #fff;*/ /*目次内背景色*/
184 | /*font-size: 0.95em;*/ /*文字サイズ*/
185 | /*font-weight: normal;*/ /*文字太さ*/
186 | border-radius: 5px; /*角を丸める*/
187 | }
188 |
189 | ul.table-of-contents > li a:link{
190 | color:#ad8383; /*色はここを変更*/
191 | text-decoration: none;
192 | }
193 |
194 | .entry-content .table-of-contents a:link{
195 | color:#ad8383; /*色はここを変更*/
196 | text-decoration: none;
197 | }
198 |
199 | ul.table-of-contents > li a:visited{
200 | color:#8c6a6a; /*色はここを変更*/
201 | }
202 |
203 | .entry-content .table-of-contents a:visited{
204 | color: #8c6a6a; /*色はここを変更*/
205 | }
206 |
207 | .entry-content .table-of-contents a:hover{
208 | text-decoration:underline; /*下線をつける*/
209 | }
210 |
211 | /*
212 | ソースコードのシンタックスハイライト (Syntax Highlighting) の書式
213 | -------------------------------------------------------*/
214 | /* 文字サイズ変更 */
215 | .entry-content pre.code {
216 | font-size:90%;
217 | line-height:150%;
218 | }
219 |
220 | /* コードの背景色変更 */
221 | .entry-content pre.code {
222 | background-color: #3F3F3F;
223 | color: #DCDCDC;
224 | }
225 | /*
226 | pre.code ol{
227 | margin-top: 0;
228 | margin-bottom: 0;
229 | }
230 | pre.code .code-list{
231 | border-left: 1px solid #999999; *縦線*
232 | padding-left:6px;
233 | }
234 | pre.code .code-list:nth-child(2n+1) {
235 | background-color: #424242; *奇数行の背景色*
236 | }
237 | */
238 | .synSpecial { color: #cc9393; }
239 | .synType { color: #E3CEAB; }
240 | .synComment { color: #7A987A; }
241 | .synPreProc { color: #8c8cb4; }
242 | .synIdentifier { color: #6e96be; }
243 | .synConstant { color: #cc9393; }
244 | .synStatement { color: #efc986; }
245 |
246 |
247 | /*
248 | 追尾する目次
249 | -------------------------------------------------------*/
250 | #stoc-module {
251 | backface-visibility: hidden;
252 | }
253 | #stoc-module.tracking {
254 | margin-bottom: 0;
255 | }
256 | #stoc-module.fixed {
257 | position: fixed;
258 | }
259 | #stoc-module.absolute {
260 | position: absolute;
261 | }
262 | #stoc-module.sticky {
263 | position: -webkit-sticky;
264 | position: sticky;
265 | }
266 | #stoc-module.fade-in {
267 | animation: fadeIn 300ms;
268 | }
269 | @keyframes fadeIn {
270 | 0% {opacity: 0}
271 | 100% {opacity: 1}
272 | }
273 |
274 | #stoc {
275 | overflow-y: auto;
276 | }
277 | #stoc.shadow {
278 | /* Shadows */
279 | background:
280 | radial-gradient(farthest-side at top, rgba(0,0,0,.17), transparent) top / 100% 11px,
281 | radial-gradient(farthest-side at bottom, rgba(0,0,0,.17), transparent) bottom / 100% 11px;
282 | background-repeat: no-repeat;
283 | background-attachment: scroll;
284 | }
285 | #stoc ol {
286 | margin: 0;
287 | padding: 0 0 0 1em;
288 | list-style-type: none;
289 | }
290 | #stoc > ol {
291 | padding-left: 0;
292 | }
293 | #stoc.shadow > ol {
294 | /* Shadow covers */
295 | background:
296 | linear-gradient(#fff 30%, transparent) top / 100% 40px,
297 | linear-gradient(transparent, #fff 70%) bottom / 100% 40px;
298 | background-repeat: no-repeat;
299 | background-attachment: local;
300 | }
301 | #stoc a {
302 | padding: 2px 2px 2px 6px;
303 | display: block;
304 | text-decoration: none;
305 | }
306 | #stoc:not(.touch) a:hover {
307 | background-color: rgba(0,0,0,.04);
308 | text-decoration: underline;
309 | }
310 | #stoc .active {
311 | background-color: rgba(0,0,0,.04);
312 | }
313 |
314 | #stoc::-webkit-scrollbar {
315 | width: 8px;
316 | background: #ececec;
317 | }
318 | #stoc::-webkit-scrollbar-button {
319 | display: none;
320 | }
321 | #stoc::-webkit-scrollbar-thumb {
322 | background: #b1b1b1;
323 | }
324 |
325 |
326 | /*
327 | Categoryをタイル表示に変更(Innocent)
328 | うまく動かない
329 | -------------------------------------------------------*/
330 | /*
331 | .hatena-module-category .hatena-urllist {
332 | margin: 0 0 -6px;
333 | padding: 0;
334 | }
335 | .hatena-module-category .hatena-urllist li::before {
336 | content: none;
337 | }
338 | .hatena-module-category .hatena-urllist li {
339 | border-top: 0;
340 | display: block;
341 | float: left;
342 | margin: 0 6px 6px 0;
343 | padding: 0;
344 | }
345 | .hatena-module-category .hatena-urllist li a {
346 | border: 1px solid #e6e6e6;
347 | border-radius: 2px;
348 | display: block;
349 | font-size: 0.8667em;
350 | line-height: 32px;
351 | padding: 0 12px;
352 | }
353 | .hatena-module-category .hatena-urllist li a:hover {
354 | background-color: #f6f6f6;
355 | color: #333;
356 | }
357 | */
--------------------------------------------------------------------------------
/recommendation/graph/ml-latest-small/README.txt:
--------------------------------------------------------------------------------
1 | Summary
2 | =======
3 |
4 | This dataset (ml-latest-small) describes 5-star rating and free-text tagging activity from [MovieLens](http://movielens.org), a movie recommendation service. It contains 100836 ratings and 3683 tag applications across 9742 movies. These data were created by 610 users between March 29, 1996 and September 24, 2018. This dataset was generated on September 26, 2018.
5 |
6 | Users were selected at random for inclusion. All selected users had rated at least 20 movies. No demographic information is included. Each user is represented by an id, and no other information is provided.
7 |
8 | The data are contained in the files `links.csv`, `movies.csv`, `ratings.csv` and `tags.csv`. More details about the contents and use of all these files follows.
9 |
10 | This is a *development* dataset. As such, it may change over time and is not an appropriate dataset for shared research results. See available *benchmark* datasets if that is your intent.
11 |
12 | This and other GroupLens data sets are publicly available for download at .
13 |
14 |
15 | Usage License
16 | =============
17 |
18 | Neither the University of Minnesota nor any of the researchers involved can guarantee the correctness of the data, its suitability for any particular purpose, or the validity of results based on the use of the data set. The data set may be used for any research purposes under the following conditions:
19 |
20 | * The user may not state or imply any endorsement from the University of Minnesota or the GroupLens Research Group.
21 | * The user must acknowledge the use of the data set in publications resulting from the use of the data set (see below for citation information).
22 | * The user may redistribute the data set, including transformations, so long as it is distributed under these same license conditions.
23 | * The user may not use this information for any commercial or revenue-bearing purposes without first obtaining permission from a faculty member of the GroupLens Research Project at the University of Minnesota.
24 | * The executable software scripts are provided "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of them is with you. Should the program prove defective, you assume the cost of all necessary servicing, repair or correction.
25 |
26 | In no event shall the University of Minnesota, its affiliates or employees be liable to you for any damages arising out of the use or inability to use these programs (including but not limited to loss of data or data being rendered inaccurate).
27 |
28 | If you have any further questions or comments, please email
29 |
30 |
31 | Citation
32 | ========
33 |
34 | To acknowledge use of the dataset in publications, please cite the following paper:
35 |
36 | > F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19.
37 |
38 |
39 | Further Information About GroupLens
40 | ===================================
41 |
42 | GroupLens is a research group in the Department of Computer Science and Engineering at the University of Minnesota. Since its inception in 1992, GroupLens's research projects have explored a variety of fields including:
43 |
44 | * recommender systems
45 | * online communities
46 | * mobile and ubiquitious technologies
47 | * digital libraries
48 | * local geographic information systems
49 |
50 | GroupLens Research operates a movie recommender based on collaborative filtering, MovieLens, which is the source of these data. We encourage you to visit to try it out! If you have exciting ideas for experimental work to conduct on MovieLens, send us an email at - we are always interested in working with external collaborators.
51 |
52 |
53 | Content and Use of Files
54 | ========================
55 |
56 | Formatting and Encoding
57 | -----------------------
58 |
59 | The dataset files are written as [comma-separated values](http://en.wikipedia.org/wiki/Comma-separated_values) files with a single header row. Columns that contain commas (`,`) are escaped using double-quotes (`"`). These files are encoded as UTF-8. If accented characters in movie titles or tag values (e.g. Misérables, Les (1995)) display incorrectly, make sure that any program reading the data, such as a text editor, terminal, or script, is configured for UTF-8.
60 |
61 |
62 | User Ids
63 | --------
64 |
65 | MovieLens users were selected at random for inclusion. Their ids have been anonymized. User ids are consistent between `ratings.csv` and `tags.csv` (i.e., the same id refers to the same user across the two files).
66 |
67 |
68 | Movie Ids
69 | ---------
70 |
71 | Only movies with at least one rating or tag are included in the dataset. These movie ids are consistent with those used on the MovieLens web site (e.g., id `1` corresponds to the URL ). Movie ids are consistent between `ratings.csv`, `tags.csv`, `movies.csv`, and `links.csv` (i.e., the same id refers to the same movie across these four data files).
72 |
73 |
74 | Ratings Data File Structure (ratings.csv)
75 | -----------------------------------------
76 |
77 | All ratings are contained in the file `ratings.csv`. Each line of this file after the header row represents one rating of one movie by one user, and has the following format:
78 |
79 | userId,movieId,rating,timestamp
80 |
81 | The lines within this file are ordered first by userId, then, within user, by movieId.
82 |
83 | Ratings are made on a 5-star scale, with half-star increments (0.5 stars - 5.0 stars).
84 |
85 | Timestamps represent seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970.
86 |
87 |
88 | Tags Data File Structure (tags.csv)
89 | -----------------------------------
90 |
91 | All tags are contained in the file `tags.csv`. Each line of this file after the header row represents one tag applied to one movie by one user, and has the following format:
92 |
93 | userId,movieId,tag,timestamp
94 |
95 | The lines within this file are ordered first by userId, then, within user, by movieId.
96 |
97 | Tags are user-generated metadata about movies. Each tag is typically a single word or short phrase. The meaning, value, and purpose of a particular tag is determined by each user.
98 |
99 | Timestamps represent seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970.
100 |
101 |
102 | Movies Data File Structure (movies.csv)
103 | ---------------------------------------
104 |
105 | Movie information is contained in the file `movies.csv`. Each line of this file after the header row represents one movie, and has the following format:
106 |
107 | movieId,title,genres
108 |
109 | Movie titles are entered manually or imported from , and include the year of release in parentheses. Errors and inconsistencies may exist in these titles.
110 |
111 | Genres are a pipe-separated list, and are selected from the following:
112 |
113 | * Action
114 | * Adventure
115 | * Animation
116 | * Children's
117 | * Comedy
118 | * Crime
119 | * Documentary
120 | * Drama
121 | * Fantasy
122 | * Film-Noir
123 | * Horror
124 | * Musical
125 | * Mystery
126 | * Romance
127 | * Sci-Fi
128 | * Thriller
129 | * War
130 | * Western
131 | * (no genres listed)
132 |
133 |
134 | Links Data File Structure (links.csv)
135 | ---------------------------------------
136 |
137 | Identifiers that can be used to link to other sources of movie data are contained in the file `links.csv`. Each line of this file after the header row represents one movie, and has the following format:
138 |
139 | movieId,imdbId,tmdbId
140 |
141 | movieId is an identifier for movies used by . E.g., the movie Toy Story has the link .
142 |
143 | imdbId is an identifier for movies used by . E.g., the movie Toy Story has the link .
144 |
145 | tmdbId is an identifier for movies used by . E.g., the movie Toy Story has the link .
146 |
147 | Use of the resources listed above is subject to the terms of each provider.
148 |
149 |
150 | Cross-Validation
151 | ----------------
152 |
153 | Prior versions of the MovieLens dataset included either pre-computed cross-folds or scripts to perform this computation. We no longer bundle either of these features with the dataset, since most modern toolkits provide this as a built-in feature. If you wish to learn about standard approaches to cross-fold computation in the context of recommender systems evaluation, see [LensKit](http://lenskit.org) for tools, documentation, and open-source code examples.
154 |
--------------------------------------------------------------------------------
/recommendation/matrix_factorization/data/ml-25m/README.txt:
--------------------------------------------------------------------------------
1 | Summary
2 | =======
3 |
4 | This dataset (ml-25m) describes 5-star rating and free-text tagging activity from [MovieLens](http://movielens.org), a movie recommendation service. It contains 25000095 ratings and 1093360 tag applications across 62423 movies. These data were created by 162541 users between January 09, 1995 and November 21, 2019. This dataset was generated on November 21, 2019.
5 |
6 | Users were selected at random for inclusion. All selected users had rated at least 20 movies. No demographic information is included. Each user is represented by an id, and no other information is provided.
7 |
8 | The data are contained in the files `genome-scores.csv`, `genome-tags.csv`, `links.csv`, `movies.csv`, `ratings.csv` and `tags.csv`. More details about the contents and use of all these files follows.
9 |
10 | This and other GroupLens data sets are publicly available for download at .
11 |
12 |
13 | Usage License
14 | =============
15 |
16 | Neither the University of Minnesota nor any of the researchers involved can guarantee the correctness of the data, its suitability for any particular purpose, or the validity of results based on the use of the data set. The data set may be used for any research purposes under the following conditions:
17 |
18 | * The user may not state or imply any endorsement from the University of Minnesota or the GroupLens Research Group.
19 | * The user must acknowledge the use of the data set in publications resulting from the use of the data set (see below for citation information).
20 | * The user may not redistribute the data without separate permission.
21 | * The user may not use this information for any commercial or revenue-bearing purposes without first obtaining permission from a faculty member of the GroupLens Research Project at the University of Minnesota.
22 | * The executable software scripts are provided "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of them is with you. Should the program prove defective, you assume the cost of all necessary servicing, repair or correction.
23 |
24 | In no event shall the University of Minnesota, its affiliates or employees be liable to you for any damages arising out of the use or inability to use these programs (including but not limited to loss of data or data being rendered inaccurate).
25 |
26 | If you have any further questions or comments, please email
27 |
28 |
29 | Citation
30 | ========
31 |
32 | To acknowledge use of the dataset in publications, please cite the following paper:
33 |
34 | > F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19.
35 |
36 |
37 | Further Information About GroupLens
38 | ===================================
39 |
40 | GroupLens is a research group in the Department of Computer Science and Engineering at the University of Minnesota. Since its inception in 1992, GroupLens's research projects have explored a variety of fields including:
41 |
42 | * recommender systems
43 | * online communities
44 | * mobile and ubiquitious technologies
45 | * digital libraries
46 | * local geographic information systems
47 |
48 | GroupLens Research operates a movie recommender based on collaborative filtering, MovieLens, which is the source of these data. We encourage you to visit to try it out! If you have exciting ideas for experimental work to conduct on MovieLens, send us an email at - we are always interested in working with external collaborators.
49 |
50 |
51 | Content and Use of Files
52 | ========================
53 |
54 | Verifying the Dataset Contents
55 | ------------------------------
56 |
57 | We encourage you to verify that the dataset you have on your computer is identical to the ones hosted at [grouplens.org](http://grouplens.org). This is an important step if you downloaded the dataset from a location other than [grouplens.org](http://grouplens.org), or if you wish to publish research results based on analysis of the MovieLens dataset.
58 |
59 | We provide a [MD5 checksum](http://en.wikipedia.org/wiki/Md5sum) with the same name as the downloadable `.zip` file, but with a `.md5` file extension. To verify the dataset:
60 |
61 | # on linux
62 | md5sum ml-25m.zip; cat ml-25m.zip.md5
63 |
64 | # on OSX
65 | md5 ml-25m.zip; cat ml-25m.zip.md5
66 |
67 | # windows users can download a tool from Microsoft (or elsewhere) that verifies MD5 checksums
68 |
69 | Check that the two lines of output contain the same hash value.
70 |
71 |
72 | Formatting and Encoding
73 | -----------------------
74 |
75 | The dataset files are written as [comma-separated values](http://en.wikipedia.org/wiki/Comma-separated_values) files with a single header row. Columns that contain commas (`,`) are escaped using double-quotes (`"`). These files are encoded as UTF-8. If accented characters in movie titles or tag values (e.g. Misérables, Les (1995)) display incorrectly, make sure that any program reading the data, such as a text editor, terminal, or script, is configured for UTF-8.
76 |
77 |
78 | User Ids
79 | --------
80 |
81 | MovieLens users were selected at random for inclusion. Their ids have been anonymized. User ids are consistent between `ratings.csv` and `tags.csv` (i.e., the same id refers to the same user across the two files).
82 |
83 |
84 | Movie Ids
85 | ---------
86 |
87 | Only movies with at least one rating or tag are included in the dataset. These movie ids are consistent with those used on the MovieLens web site (e.g., id `1` corresponds to the URL ). Movie ids are consistent between `ratings.csv`, `tags.csv`, `movies.csv`, and `links.csv` (i.e., the same id refers to the same movie across these four data files).
88 |
89 |
90 | Ratings Data File Structure (ratings.csv)
91 | -----------------------------------------
92 |
93 | All ratings are contained in the file `ratings.csv`. Each line of this file after the header row represents one rating of one movie by one user, and has the following format:
94 |
95 | userId,movieId,rating,timestamp
96 |
97 | The lines within this file are ordered first by userId, then, within user, by movieId.
98 |
99 | Ratings are made on a 5-star scale, with half-star increments (0.5 stars - 5.0 stars).
100 |
101 | Timestamps represent seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970.
102 |
103 |
104 | Tags Data File Structure (tags.csv)
105 | -----------------------------------
106 |
107 | All tags are contained in the file `tags.csv`. Each line of this file after the header row represents one tag applied to one movie by one user, and has the following format:
108 |
109 | userId,movieId,tag,timestamp
110 |
111 | The lines within this file are ordered first by userId, then, within user, by movieId.
112 |
113 | Tags are user-generated metadata about movies. Each tag is typically a single word or short phrase. The meaning, value, and purpose of a particular tag is determined by each user.
114 |
115 | Timestamps represent seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970.
116 |
117 |
118 | Movies Data File Structure (movies.csv)
119 | ---------------------------------------
120 |
121 | Movie information is contained in the file `movies.csv`. Each line of this file after the header row represents one movie, and has the following format:
122 |
123 | movieId,title,genres
124 |
125 | Movie titles are entered manually or imported from , and include the year of release in parentheses. Errors and inconsistencies may exist in these titles.
126 |
127 | Genres are a pipe-separated list, and are selected from the following:
128 |
129 | * Action
130 | * Adventure
131 | * Animation
132 | * Children's
133 | * Comedy
134 | * Crime
135 | * Documentary
136 | * Drama
137 | * Fantasy
138 | * Film-Noir
139 | * Horror
140 | * Musical
141 | * Mystery
142 | * Romance
143 | * Sci-Fi
144 | * Thriller
145 | * War
146 | * Western
147 | * (no genres listed)
148 |
149 |
150 | Links Data File Structure (links.csv)
151 | ---------------------------------------
152 |
153 | Identifiers that can be used to link to other sources of movie data are contained in the file `links.csv`. Each line of this file after the header row represents one movie, and has the following format:
154 |
155 | movieId,imdbId,tmdbId
156 |
157 | movieId is an identifier for movies used by . E.g., the movie Toy Story has the link .
158 |
159 | imdbId is an identifier for movies used by . E.g., the movie Toy Story has the link .
160 |
161 | tmdbId is an identifier for movies used by . E.g., the movie Toy Story has the link .
162 |
163 | Use of the resources listed above is subject to the terms of each provider.
164 |
165 |
166 | Tag Genome (genome-scores.csv and genome-tags.csv)
167 | -------------------------------------------------
168 |
169 | This data set includes a current copy of the Tag Genome.
170 |
171 | [genome-paper]: http://files.grouplens.org/papers/tag_genome.pdf
172 |
173 | The tag genome is a data structure that contains tag relevance scores for movies. The structure is a dense matrix: each movie in the genome has a value for *every* tag in the genome.
174 |
175 | As described in [this article][genome-paper], the tag genome encodes how strongly movies exhibit particular properties represented by tags (atmospheric, thought-provoking, realistic, etc.). The tag genome was computed using a machine learning algorithm on user-contributed content including tags, ratings, and textual reviews.
176 |
177 | The genome is split into two files. The file `genome-scores.csv` contains movie-tag relevance data in the following format:
178 |
179 | movieId,tagId,relevance
180 |
181 | The second file, `genome-tags.csv`, provides the tag descriptions for the tag IDs in the genome file, in the following format:
182 |
183 | tagId,tag
184 |
185 | The `tagId` values are generated when the data set is exported, so they may vary from version to version of the MovieLens data sets.
186 |
187 | Please include the following citation if referencing tag genome data:
188 |
189 | > Jesse Vig, Shilad Sen, and John Riedl. 2012. The Tag Genome: Encoding Community Knowledge to Support Novel Interaction. ACM Trans. Interact. Intell. Syst. 2, 3: 13:1–13:44.
190 |
191 |
192 | Cross-Validation
193 | ----------------
194 |
195 | Prior versions of the MovieLens dataset included either pre-computed cross-folds or scripts to perform this computation. We no longer bundle either of these features with the dataset, since most modern toolkits provide this as a built-in feature. If you wish to learn about standard approaches to cross-fold computation in the context of recommender systems evaluation, see [LensKit](http://lenskit.org) for tools, documentation, and open-source code examples.
196 |
--------------------------------------------------------------------------------
/streamlit/sample.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import numpy as np
4 |
5 | st.title('streamlitのサンプルだお')
6 |
7 | DATE_COLUMN = 'date/time'
8 | DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
9 | 'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
10 |
11 |
12 | @st.cache
13 | def load_data(nrows):
14 | data = pd.read_csv(DATA_URL, nrows=nrows)
15 | lowercase = lambda x: str(x).lower()
16 | data.rename(lowercase, axis='columns', inplace=True)
17 | data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
18 | return data
19 |
20 | # Create a text element and let the reader know the data is loading.
21 | data_load_state = st.text('Loading data...')
22 | # Load 10,000 rows of data into the dataframe.
23 | data = load_data(10000)
24 | # Notify the reader that the data was successfully loaded.
25 | data_load_state.text('Loading data...done!')
26 |
27 | if st.checkbox('Show raw data'):
28 | st.subheader('Raw data')
29 | st.write(data)
30 |
31 |
32 |
33 | st.subheader('Number of pickups by hour')
34 | hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
35 | st.bar_chart(hist_values)
36 |
37 |
38 |
39 | # Some number in the range 0-23
40 | hour_to_filter = st.slider('hour', 0, 23, 17)
41 | filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
42 |
43 | st.write(hour_to_filter)
44 |
45 | st.text('This is some text.')
46 |
47 | #ボタン処理
48 | if st.button('ボタンの処理も作れる'):
49 | #ボタン押された
50 | st.write('Why hello there')
51 | else:
52 | st.write('Goodbye')
53 |
54 | #テキスト入力欄も作れるよ
55 | st.text_area('labelだお', value="")
--------------------------------------------------------------------------------