├── .gitignore
├── DEV_README.md
├── LICENSE
├── README.md
├── b2
├── __init__.py
├── algebra
│ ├── __init__.py
│ ├── context.py
│ ├── data_types.py
│ ├── dataframe.py
│ └── selection.py
├── b2.py
├── b2_magic.py
├── config.py
├── constants.py
├── context_types.py
├── showme.py
├── state_types.py
├── ui_comm.py
├── util
│ ├── __init__.py
│ ├── data_processing.py
│ ├── errors.py
│ ├── instructions.py
│ └── utils.py
└── vis_types.py
├── docs
├── Tutorial.ipynb
├── data8_instrumentation.md
└── logging.md
├── notebooks
├── data
│ ├── cars.json
│ └── pitchfork.csv
└── v1
│ ├── Basic Demo.ipynb
│ └── EndToEndTest.ipynb
├── package.json
├── requirements.txt
├── setup.cfg
├── setup.py
├── src
├── CellManager.ts
├── charts
│ └── vegaGen.ts
├── codefolding.ts
├── comm.ts
├── components
│ ├── ChartsViewLangingPage.tsx
│ ├── CloseButton.tsx
│ ├── ColumnItem.tsx
│ ├── EditableText.tsx
│ ├── MidasContainer.tsx
│ ├── MidasElement.tsx
│ ├── MidasSidebar.tsx
│ ├── ProfileShelfLandingPage.tsx
│ ├── ProfilerShelf.tsx
│ ├── SelectionItem.tsx
│ ├── SelectionShelf.tsx
│ └── SelectionShelfLandingPage.tsx
├── config.ts
├── constants.ts
├── elements.css
├── external
│ └── Jupyter.d.ts
├── index.tsx
├── logging.ts
├── setup.tsx
├── types.ts
└── utils.ts
├── tsconfig.json
├── tslint.json
└── webpack.config.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.zip
3 | *.key
4 | b2/scrap/
5 | *old.py
6 | midasenv/
7 | b2/widget/
8 | b2/static/
9 | b2/algebra/tests/
10 | sandbox/
11 | *.json
12 | notebooks/
13 | # this is generated by webpack
14 | *.scrap
15 | *.js
16 | .idea
17 | .scrap
18 | *.map
19 | # Byte-compiled / optimized / DLL files
20 | __pycache__/
21 | *.py[cod]
22 |
23 | # C extensions
24 | *.so
25 |
26 | # Distribution / packaging
27 | .Python
28 | env/
29 | build/
30 | develop-eggs/
31 | dist/
32 | downloads/
33 | eggs/
34 | .eggs/
35 | lib/
36 | lib64/
37 | parts/
38 | sdist/
39 | var/
40 | *.egg-info/
41 | .installed.cfg
42 | *.egg
43 |
44 | # PyInstaller
45 | # Usually these files are written by a python script from a template
46 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
47 | *.manifest
48 | *.spec
49 |
50 | # Installer logs
51 | pip-log.txt
52 | pip-delete-this-directory.txt
53 |
54 | # Unit test / coverage reports
55 | htmlcov/
56 | .tox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *,cover
63 |
64 | # Translations
65 | *.mo
66 | *.pot
67 |
68 | # Django stuff:
69 | *.log
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | .ipynb_checkpoints/
78 | node_modules/
79 | .vscode
80 |
81 | .mypy*
82 |
--------------------------------------------------------------------------------
/DEV_README.md:
--------------------------------------------------------------------------------
1 | # Development Guide
2 |
3 | Install requirements: `pip install -r requirements.txt`
4 |
5 | Symlink files instead of copying files:
6 |
7 | ```sh
8 | python setup.py develop
9 | yarn install
10 | npm run build # yarn watch if in dev mode for continuous update
11 | jupyter nbextension install --py --symlink B2
12 | ```
13 |
14 | You will need to understand how [git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) work.
15 |
16 | To test the Python pieces: `pytest -q ./tests/test_rendering.py`.
17 |
18 | Publish a new version to pypi with `python3 setup.py sdist upload`.
19 |
20 | It is also recommended that you install PyRight if you are using the VSCode editor, or PyCharm, which should come with type checking.
21 |
22 | When you change the JS code, you have to run `npm run watch` for the TypeScript to build and then you also have to refresh the notebook that you have open.
23 |
24 | When you change the Python side code, you can use the following at the beginning of your notebook, and you might need to rerun the cells that load in the library (and cells that depend on it)---you might also need to restart the kernel if that does not work.
25 |
26 | ```python
27 | %load_ext autoreload
28 | %autoreload 2
29 | ```
30 |
31 | In addition, know that if the JavaScript side triggers any computation in Python, the print messages will not surface---if you want to do better testing, you can use the `comm` to send a debug message (in place of printing) for the entry call, and then you can mock the input by running code, which behaves like normal executions and prints normally.
32 |
33 | ## Code Notes
34 |
35 | Code with `#REDZONE` are places where it is brittle and assumptions might be broken.
36 |
37 | ## Architecture
38 |
39 | ### Asynchrony
40 |
41 | Right when the user does the interaction, the pane is set to be busy---interactions are disabled. Then when it gets the last ack from the code (`after_selection`), it releases.
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # B2
2 |
3 | B2 is a Jupyter extension that augments your programming experience with interactive visualizations for data analysis. You can find the talk and demo [here](https://www.youtube.com/watch?v=wWz7R6RI0z8&ab_channel=YifanWu).
4 |
5 | **Load dataframes to visualizations in a dashboard area that you can interact with**
6 |
7 |
8 |
9 | **Using interactions to drive cell computation using _reactive cells_**
10 |
11 |
12 |
13 | And more---see [here](https://github.com/yifanwu/midas-exp-pub) for tutorials. If you ran into problems, feel free to [open an issue](https://github.com/yifanwu/b2/issues/new/choose), or email Yifan directly at yifanwu@berkeley.edu, or [via Twitter](https://twitter.com/yifanwu).
14 |
15 | ## Installation
16 |
17 | ```sh
18 | pip install b2-ext
19 | ```
20 |
21 | ## Development
22 |
23 | To build your own JS bundle:
24 |
25 | ```sh
26 | pip install -r requirements.txt
27 | python setup.py develop
28 | jupyter nbextension install --py --symlink b2
29 | ```
30 |
31 | ```sh
32 | npm run install
33 | npm run watch
34 | ```
35 |
36 | ## Deployment
37 |
38 | ```sh
39 | npm run build
40 | python setup.py sdist
41 | twine upload dist/*
42 | ```
43 |
44 | You may need `pip install twine` if you do not have `twine` already; as well as a PyPi account and permissions.
45 |
46 | ## People
47 |
48 | B2 is developed by [Yifan](http://yifanwu.net/) at UC Berkeley, with advising from [Joe](https://www2.eecs.berkeley.edu/Faculty/Homepages/hellerstein.html), [Arvind](https://arvindsatya.com/), and others.
49 |
50 | If you are interested in using B2 or participating in our user study, please send Yifan a message at yifanwu@berkeley.edu.
51 |
--------------------------------------------------------------------------------
/b2/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from warnings import warn
3 |
4 | from .b2 import B2
5 | from .algebra.dataframe import MidasDataFrame
6 |
7 | __all__ = ['B2', 'MidasDataFrame']
8 |
9 | __version__ = '0.0.5'
10 |
11 |
12 | def _jupyter_nbextension_paths():
13 | """Return metadata for the nbextension."""
14 | return [dict(
15 | section="notebook",
16 | # the path is relative to the `midas` directory
17 | src="static",
18 | # directory in the `nbextension/` namespace
19 | dest="b2",
20 | # _also_ in the `nbextension/` namespace
21 | require="b2/index")]
22 |
23 |
24 | def find_static_assets():
25 | warn("""To use the nbextension, you'll need to update
26 | the Jupyter notebook to version 4.2 or later.""")
27 | return []
28 |
--------------------------------------------------------------------------------
/b2/algebra/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yifanwu/b2/3dc11605a365a28ed07c8a2b253ff87fa43e549d/b2/algebra/__init__.py
--------------------------------------------------------------------------------
/b2/algebra/context.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy, copy
2 | from typing import List, Dict, Optional, cast, Tuple
3 | from collections import defaultdict
4 | # for development
5 | from IPython.core.debugger import set_trace
6 |
7 | from b2.util.errors import InternalLogicalError, debug_log
8 | from b2.state_types import DFName
9 | from b2.constants import ISDEBUG
10 |
11 | from .dataframe import RelationalOpType, MidasDataFrame, BaseOp, RelationalOp, DFInfo, VisualizedDFInfo, Where, JoinInfo, Select, create_predicate, Join
12 | from .selection import SelectionValue
13 |
14 | class Context(object):
15 | # dfs: Dict[DFName, DFInfo]
16 | join_info: Dict[Tuple[DFName, DFName], JoinInfo]
17 | # store it for easier df gen...
18 |
19 | def __init__(self, df_info_store: Dict[DFName, DFInfo], new_df_from_ops):
20 | """[summary]
21 |
22 | Arguments:
23 | get_dfs {Callable[[], List[MidasDataFrame]]} -- passed from state so that they share access to the up tp date state
24 | """
25 | self.df_info_store = df_info_store
26 | self.join_info = {}
27 | self.new_df_from_ops = new_df_from_ops
28 |
29 |
30 | def get_df(self, df_name: DFName) -> MidasDataFrame:
31 | found = self.df_info_store[df_name]
32 | if found:
33 | if isinstance(found, VisualizedDFInfo):
34 | return found.original_df
35 | else:
36 | return found.df
37 | else:
38 | raise InternalLogicalError(f"DF {df_name} not found")
39 |
40 |
41 | # takes in joinable columns on a left and right df, adds this information into our join_info dictionary
42 | # consider making it easier to enter
43 | def add_join_info(self, joins: JoinInfo):
44 | left_df = joins.left_df
45 | right_df = joins.right_df
46 | if left_df.df_name is not None and right_df.df_name is not None: # type: ignore
47 | self.join_info[(left_df.df_name, right_df.df_name)] = joins # type: ignore
48 | self.join_info[(right_df.df_name, left_df.df_name)] = joins.swap_left_right() # type: ignore
49 | else:
50 | raise InternalLogicalError("The DFs with join info should have df_names")
51 |
52 |
53 | # takes in a left_df and right_df, and returns all possible column names for them to be joined on
54 | # def get_join_info(self, left_df: MidasDataFrame, right_df: MidasDataFrame):
55 | # if left_df.df_name is not None and right_df.df_name is not None:
56 | # return
57 | # else:
58 | # raise InternalLogicalError("should have df_names")
59 |
60 |
61 | def apply_join_selection(self, join_info: JoinInfo, selections: List[SelectionValue]) -> RelationalOp:
62 | """
63 | Arguments:
64 | join_info {JoinInfo} -- Note that the left one is the "original base" and the right is the one joined
65 | selections {List[SelectionValue]} -- [description]
66 |
67 | Raises:
68 | NotImplementedError: if there are more than on ecolumn, do not handle for now.
69 |
70 | Returns:
71 | RelationalOp -- [description]
72 | """
73 | # if ISDEBUG: set_trace()
74 | # we know that join info must base baseop
75 | new_ops = cast(BaseOp, deepcopy(join_info.right_df._ops)) # type: ignore
76 | filtered_join_df = apply_non_join_selection(new_ops, selections)
77 | if len(join_info.columns) > 1:
78 | raise NotImplementedError("Currently not supported")
79 | base_col = join_info.columns[0].left_col.col_name
80 | join_col = join_info.columns[0].right_col.col_name
81 | index_column_df = self.new_df_from_ops(Select(join_col, filtered_join_df))
82 | # we must assign something
83 | selection_columns = "_".join([s.column.col_name for s in selections])
84 | index_column_df._suggested_df_name = f"{new_ops.df_name}_filtered_{selection_columns}"
85 | # then do the join
86 | # TODO: better to do the "in" operations
87 | new_base = deepcopy(join_info.left_df._ops) # type: ignore
88 | final_ops = Join(base_col, index_column_df, join_col, new_base)
89 | # if ISDEBUG: set_trace()
90 | return final_ops
91 |
92 | # get a bunch of bases and decide where the column comes from
93 | # we might have 2 here, and we need to decide which one to pick
94 | # based on the column names, this is easy if
95 | # Joins have a certain behaviors in datascience that
96 | # makes it hard to reason about which column things came from,
97 | # for now, let’s just give up. :(, maybe @shloak can take a stab? #TODO/FIXME
98 | def get_base_df_selection(self, s: SelectionValue) -> Optional[SelectionValue]:
99 | # look up df
100 | df = self.get_df(s.column.df_name)
101 | bases = find_all_baseops(df._ops)
102 |
103 | def find_base_with_column(bases: List[BaseOp]):
104 | for b in bases:
105 | a_df = self.get_df(b.df_name)
106 |
107 | # LEAKY abstraction --- indirectly depending on tables...
108 | if s.column.col_name in a_df.table.labels:
109 | # we are done
110 | return b
111 | base_op = find_base_with_column(bases)
112 | new_selection = deepcopy(s)
113 | if (base_op):
114 | new_selection.column.df_name = base_op.df_name
115 | return new_selection
116 | else:
117 | return None
118 | # raise InternalLogicalError(f"base selection should have been found for {s.column.df_name}filtered by columns {s.column.col_name}")
119 |
120 |
121 | def apply_selection(self, target_df: MidasDataFrame, selections: List[SelectionValue], is_union=False) -> Optional[MidasDataFrame]:
122 | if len(selections) == 0:
123 | return None
124 | # before we do any of that, just check to see if the filter is directly on the target_df itself?
125 | selections_on_base = map(self.get_base_df_selection, selections)
126 | selections_by_df = defaultdict(list)
127 | for s in selections_on_base:
128 | # note that if something is not found, we simply ignore it
129 | # this sometimes happens when we miscategorize.
130 | if s is not None:
131 | selections_by_df[s.column.df_name].append(s)
132 |
133 | if target_df.df_name in selections_by_df:
134 | raise InternalLogicalError(f"Shouldn't be using context to do the filter if the two DFs are the same, we got {target_df.df_name} as target, which is in {selections_by_df.keys()}")
135 |
136 | new_ops = target_df._ops
137 | # it doesn't really matter what order we apply these in
138 | for df_name in selections_by_df.keys():
139 | new_ops = self.apply_selection_from_single_df(new_ops, df_name, selections_by_df[df_name]) # type: ignore
140 | new_df = target_df.new_df_from_ops(new_ops) # type: ignore
141 | return new_df
142 |
143 |
144 | def find_joinable_base(self, current_bases: List[BaseOp], selection_base_df: DFName) -> Optional[JoinInfo]:
145 | """
146 | note that the current_base is left_df, and the base to join with is right_df
147 | """
148 | for b in current_bases:
149 | r = self.join_info.get((b.df_name, selection_base_df))
150 | if r is not None:
151 | return r
152 | return None
153 |
154 |
155 | def apply_selection_from_single_df(self, ops: RelationalOp, df_name: DFName, selections: List[SelectionValue]) -> RelationalOp:
156 | # here we can assume that all the selections have the same df
157 | bases = find_all_baseops(ops)
158 | # see if the selection list has anything in the bases
159 | non_join_base_list = list(filter(lambda b: b.df_name == df_name, bases))
160 | if len(non_join_base_list) > 0:
161 | non_join_base = non_join_base_list[0]
162 | local_base_df_name = non_join_base.df_name
163 | replacement_op = apply_non_join_selection(non_join_base, selections)
164 | else:
165 | # search for which one we can actually join with
166 | r = self.find_joinable_base(bases, df_name)
167 | if r:
168 | # it's always the right one (by construct)
169 | local_base_df_name = r.left_df.df_name
170 | replacement_op = self.apply_join_selection(r, selections)
171 | else:
172 | # NO OP
173 | if ISDEBUG:
174 | debug_log(f"No op for {df_name} selection because no join was found")
175 | return ops
176 |
177 | # 2. apply the replacement
178 | if replacement_op and local_base_df_name:
179 | return set_if_eq(deepcopy(ops), replacement_op, local_base_df_name)
180 | raise InternalLogicalError("Replacement Op is not set or the df_name is not set")
181 |
182 |
183 | ####################################
184 | ######## helper funcs ########
185 | ####################################
186 |
187 | def set_if_eq(original: RelationalOp, replacement: RelationalOp, df_name: DFName):
188 | should_return_replacement = False
189 | # note that we need the special casing because of the special case
190 | def _helper(op: RelationalOp, new_op: RelationalOp, parent_op: Optional[RelationalOp]):
191 | if (op.op_type == RelationalOpType.base):
192 | base_op = cast(BaseOp, op)
193 | if (base_op.df_name == df_name):
194 | # if parent_op is not defined, then we are literally replacing
195 | if parent_op is None:
196 | should_return_replacement = True
197 | return
198 | else:
199 | parent_op.child = new_op
200 | return
201 | elif (op.has_child()):
202 | return _helper(op.child, new_op, op)
203 | else:
204 | raise InternalLogicalError("Should either have child or be of base type")
205 |
206 | _helper(original, replacement, None)
207 | if should_return_replacement:
208 | return replacement
209 | else:
210 | return original
211 |
212 |
213 |
214 | def find_all_baseops(op: RelationalOp) -> List[BaseOp]:
215 | """takes the source op and returns all the baseops
216 | e.g. given that df and df2 are loaded in as external data,
217 | then the op representing `df.join("id", df2, "id")select(["sales"])`
218 | will return df and df2's respective `baseop`s.
219 |
220 | Arguments:
221 | op {RelationalOp} -- [description]
222 |
223 | Returns:
224 | List[BaseOp] -- [description]
225 | """
226 | if (op.op_type == RelationalOpType.base):
227 | base_op = cast(BaseOp, op)
228 | return [base_op]
229 | if (op.op_type == RelationalOpType.join):
230 | join_op = cast(Join, op)
231 | b1 = find_all_baseops(op.child)
232 | b2 = find_all_baseops(join_op.other._ops)
233 | return b1 + b2
234 | if (op.has_child()):
235 | return find_all_baseops(op.child)
236 | else:
237 | return []
238 |
239 |
240 | def apply_non_join_selection(ops: BaseOp, selections: List[SelectionValue]) -> RelationalOp:
241 | # it has to be BaseOp because it's used to generate the df to be replaced
242 | executable_predicates = list(map(create_predicate, selections))
243 | new_ops = copy(ops)
244 | for p in executable_predicates:
245 | new_ops = Where(p, new_ops)
246 | return new_ops
247 |
248 |
--------------------------------------------------------------------------------
/b2/algebra/data_types.py:
--------------------------------------------------------------------------------
1 | from typing import NewType, Dict, List, Any
2 |
3 | DFId = NewType('DFId', str)
4 |
5 | # Record = Dict[str, Any]
6 | # Relation = List[Record]
7 |
--------------------------------------------------------------------------------
/b2/algebra/selection.py:
--------------------------------------------------------------------------------
1 | from typing import List, cast, Set
2 | from enum import Enum
3 | import json
4 |
5 | from b2.util.utils import FG_BLUE, RESET_PRINT
6 | from b2.util.errors import InternalLogicalError
7 | from b2.state_types import DFName
8 |
9 | class ColumnRef(object):
10 | def __init__(self, col_name: str, df_name: DFName):
11 | self.col_name = col_name
12 | self.df_name = df_name
13 |
14 | def __eq__(self, other: 'ColumnRef'):
15 | if self.col_name != other.col_name:
16 | return False
17 | if self.df_name != other.df_name:
18 | return False
19 | return True
20 |
21 | def __repr__(self):
22 | return f"{{col_name: '{self.df_name}', df_name: '{self.col_name}'}}"
23 |
24 |
25 | class SelectionType(Enum):
26 | single_value = "single_value"
27 | numeric_range = "numeric_range"
28 | string_set = "string_set"
29 | empty = "empty"
30 |
31 |
32 | class SelectionValue(object):
33 | """
34 | column [ColumnRed]
35 | selection_type [SelectionType]
36 | """
37 | column: ColumnRef
38 | selection_type: SelectionType
39 | def __init__(self):
40 | raise InternalLogicalError("SelectionValue is abstract and should not be instantiated")
41 |
42 | def to_str(self):
43 | raise InternalLogicalError("SelectionValue is abstract and should not be instantiated")
44 |
45 |
46 | class EmptySelection(SelectionValue):
47 | def __init__(self, column: ColumnRef):
48 | self.column = column
49 | self.selection_type = SelectionType.empty
50 |
51 | def __eq__(self, other: SelectionValue):
52 | if other.selection_type != SelectionType.empty:
53 | return False
54 | if self.column != other.column:
55 | return False
56 | return True
57 |
58 | def to_str(self):
59 | raise InternalLogicalError("Should not try to make empty selections into strings")
60 |
61 | def __repr__(self):
62 | return f"{{column: {self.column}, val: {FG_BLUE}None{RESET_PRINT}, minVal: None, maxVal: None}}"
63 |
64 |
65 | class NumericRangeSelection(SelectionValue):
66 | def __init__(self, column: ColumnRef, minVal: float, maxVal: float):
67 | self.selection_type = SelectionType.numeric_range
68 | self.column = column
69 | self.minVal = minVal
70 | self.maxVal = maxVal
71 |
72 | def __eq__(self, other: SelectionValue):
73 | if other.selection_type != SelectionType.numeric_range:
74 | return False
75 | m_other = cast(NumericRangeSelection, other)
76 | if self.column != m_other.column:
77 | return False
78 | if self.minVal != m_other.minVal:
79 | return False
80 | if self.maxVal != m_other.maxVal:
81 | return False
82 | return True
83 |
84 | def __repr__(self):
85 | return f"{{column: {self.column}, minVal: {self.minVal}, maxVal: {self.maxVal}}}"
86 |
87 | def __str__(self) -> str:
88 | return self.__repr__()
89 |
90 | def to_str(self):
91 | return f'{{"{self.column.df_name}": {{"{self.column.col_name}": [{self.minVal}, {self.maxVal}]}}}}'
92 |
93 |
94 |
95 | class SetSelection(SelectionValue):
96 | def __init__(self, column: ColumnRef, val: Set):
97 | self.selection_type = SelectionType.string_set
98 | self.column = column
99 | self.val = val
100 |
101 | def __eq__(self, other: SelectionValue):
102 | if other.selection_type != SelectionType.string_set:
103 | return False
104 | if self.column != other.column:
105 | return False
106 |
107 | s_other = cast(SetSelection, other)
108 | # python has convenient set operations...
109 | if self.val != s_other.val:
110 | return False
111 | return True
112 |
113 | def __repr__(self):
114 | return f"{{column: {self.column}, val: {FG_BLUE}{self.val}{RESET_PRINT}}}"
115 |
116 | def __str__(self) -> str:
117 | return self.__repr__()
118 |
119 | def to_str(self):
120 | return f'{{"{self.column.df_name}": {{"{self.column.col_name}": {json.dumps(self.val)}}}}}'
121 |
122 |
123 | def diff_selection_value(new_selection: List[SelectionValue], old_selection: List[SelectionValue])-> List[SelectionValue]:
124 | """returns the difference between the values
125 | Arguments:
126 | new_selection {List[SelectionValue]} -- one selection
127 | old_selection {List[SelectionValue]} -- another selection
128 | Returns:
129 | returns
130 | - None if there are no changes
131 | - an empty selection if the selection is removed
132 | - all the new diffs as selections
133 | """
134 | def find_selection(a_selection: SelectionValue, selections: List[SelectionValue]):
135 | for s in selections:
136 | if s == a_selection:
137 | return True
138 | return False
139 |
140 | def find_df(df: ColumnRef, selections: List[SelectionValue]):
141 | for s in selections:
142 | if s.column == df:
143 | return True
144 | return False
145 |
146 | diff = []
147 | for s in new_selection:
148 | if not find_selection(s, old_selection):
149 | diff.append(s)
150 | for s in old_selection:
151 | if not find_df(s.column, new_selection):
152 | # this means that this item has been removed
153 | diff.append(EmptySelection(s.column))
154 | return diff
155 |
156 |
157 | def find_selections_with_df_name(current_selection: List[SelectionValue], df_name):
158 | r = []
159 | for s in current_selection:
160 | if s.column.df_name == df_name:
161 | r.append(s.column)
162 | return r
163 |
--------------------------------------------------------------------------------
/b2/b2_magic.py:
--------------------------------------------------------------------------------
1 | from IPython import get_ipython # type: ignore
2 | from IPython.core.magic import (Magics, magics_class, line_magic, cell_magic, line_cell_magic) # type: ignore
3 | from IPython.core.magic_arguments import (argument, magic_arguments, parse_argstring) # type: ignore
4 |
5 | from .ui_comm import UiComm
6 |
7 | @magics_class
8 | class B2Magic(Magics):
9 | ui_comm: UiComm
10 | def __init__(self, shell, ui_comm: UiComm):
11 | super(B2Magic, self).__init__(shell)
12 | self.ui_comm = ui_comm
13 |
14 | @cell_magic
15 | @magic_arguments()
16 | @argument('-disable', action="store_true", help="disable this reactive cell")
17 | @argument('-df', action='store', help='the name of the df, do not set this flag if you wish the cell to be ran for all interactions')
18 | # add another argument such that they can use append based
19 | # @argument('--append', action="store")
20 | def reactive(self, line: str, cell: str):
21 | args = parse_argstring(self.reactive, line)
22 | # do_append = "append" in args
23 | if args.disable:
24 | # self.ui_comm.send_debug_msg("disabled")
25 | self.ui_comm.remove_reactive_cell()
26 | # do NOT execute the cell
27 | return
28 | if args.df:
29 | # self.ui_comm.send_debug_msg(f"NEW cell magic with shell: {args.df} with cell: {cell}")
30 | self.ui_comm.add_reactive_cell(args.df)
31 | else:
32 | self.ui_comm.add_reactive_cell("")
33 |
34 | shell = get_ipython().get_ipython()
35 | shell.run_cell(cell)
--------------------------------------------------------------------------------
/b2/config.py:
--------------------------------------------------------------------------------
1 | class MidasConfig(object):
2 | def __init__(self, linked: bool):
3 | self.linked = linked
4 |
5 |
6 | IS_DEBUG = True
--------------------------------------------------------------------------------
/b2/constants.py:
--------------------------------------------------------------------------------
1 | ISDEBUG = False
2 | STUB_DISTRIBUTION_BIN = "10"
3 | MIDAS_CELL_COMM_NAME = "midas-cell-comm"
4 | MIDAS_RECOVERY_COMM_NAME = "midas-recovery-comm"
5 | DATA_SOURCE = "table"
6 | # the values will either be 0 (filtered) or 1 (original)
7 | IS_OVERVIEW_FIELD_NAME = "is_overview"
8 | COUNT_COL_NAME = "count"
9 | # to accomate the number of US states
10 | MAX_BINS = 100
11 | # note that max generated bins is much smaller than the allowed bins to make the chart we create a little nicer looking.
12 | MAX_GENERATED_BINS = 20
13 | MAX_DOTS = 10000
--------------------------------------------------------------------------------
/b2/context_types.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple, List
2 |
3 | class JoinInfo(NamedTuple):
4 | dfs: List[str]
5 | join_colums: List[str]
--------------------------------------------------------------------------------
/b2/showme.py:
--------------------------------------------------------------------------------
1 | from functools import reduce
2 | # from midas.midas_algebra.dataframe import MidasDataFrame, RelationalOpType
3 | from typing import Optional, Dict, cast
4 | from typing_extensions import Literal
5 | from datascience.tables import Table
6 | from pandas.api.types import is_string_dtype, is_numeric_dtype, is_datetime64_any_dtype
7 | from IPython.core.debugger import set_trace
8 |
9 | from .util.errors import type_check_with_warning, InternalLogicalError
10 | from .vis_types import EncodingSpec
11 |
12 |
13 | def toggle_x_y(selection_dimensions: Literal["", "x", "y", "xy"]):
14 | if selection_dimensions == "x":
15 | return "y"
16 | elif selection_dimensions == "y":
17 | return "x"
18 | else:
19 | return selection_dimensions
20 |
21 |
22 | def infer_encoding_helper(df: Table, selectable, is_groupby: bool):
23 | """infers encoding, subject to more parameters, ideally we pass in all of the operations, but for now, we just need is_groupby, which affects the encoding choices.
24 |
25 | Arguments:
26 | df {Table} -- [description]
27 | selectable {[type]} -- [description]
28 | is_groupby {bool} -- whether the ops were groupby
29 | """
30 | df_len = len(df.columns)
31 | if df_len == 2:
32 | first_col = df.labels[0]
33 | second_col = df.labels[1]
34 | selection_dimensions = ""
35 | if len(selectable) == 2:
36 | selection_dimensions = "xy"
37 | elif len(selectable) == 1 and next(iter(selectable)) == first_col:
38 | selection_dimensions = "x"
39 | elif len(selectable) == 1 and next(iter(selectable)) == second_col:
40 | selection_dimensions = "y"
41 | elif len(selectable) == 0:
42 | selection_dimensions = ""
43 | # check if there was a groupby, special case
44 | if is_groupby:
45 | # then the results have to be ordinal
46 | # whether it's multiclick or brush would depend on whether the value is numeric
47 | # if its a groupby, can make the assum0ption that the first one is the ordinal value and the second one is the quantitative value
48 | selection_type = "brush"
49 | sort = ""
50 |
51 | if is_string_dtype(df[first_col]):
52 | selection_type = "multiclick"
53 | # we will arrange it such that it's the second one that's numeric
54 | sort = "-y"
55 |
56 | return EncodingSpec("bar", first_col, "ordinal", second_col, "quantitative", selection_type, selection_dimensions, sort)
57 | if is_string_dtype(df[first_col]) and is_numeric_dtype(df[second_col]):
58 | return EncodingSpec("bar", first_col, "ordinal", second_col, "quantitative", "multiclick", selection_dimensions)
59 | elif is_numeric_dtype(df[first_col]) and is_string_dtype(df[second_col]):
60 | selection_dimensions = toggle_x_y(selection_dimensions)
61 | return EncodingSpec("bar", second_col, "ordinal", first_col, "quantitative", "multiclick", selection_dimensions)
62 | elif is_numeric_dtype(df[first_col]) and is_numeric_dtype(df[second_col]):
63 | return EncodingSpec("circle", first_col, "quantitative", second_col, "quantitative", "brush", selection_dimensions)
64 | elif is_datetime64_any_dtype(df[first_col]) and is_numeric_dtype(df[second_col]):
65 | return EncodingSpec("line", first_col, "temporal", second_col, "quantitative", "brush", selection_dimensions)
66 | elif is_numeric_dtype(df[first_col]) and is_datetime64_any_dtype(df[second_col]):
67 | selection_dimensions = toggle_x_y(selection_dimensions)
68 | return EncodingSpec("line", second_col, "temporal", first_col, "quantitative", "brush", selection_dimensions)
69 | raise InternalLogicalError(f"Corner case in spec gen")
70 | else:
71 | raise InternalLogicalError(f"Midas only supports visualization of two dimensional data for now")
72 |
--------------------------------------------------------------------------------
/b2/state_types.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import NamedTuple, Callable, List, NewType, Union, Any, Optional, Dict, Tuple
3 |
4 |
5 | DFName = NewType('DFName', str)
6 |
--------------------------------------------------------------------------------
/b2/util/__init__.py:
--------------------------------------------------------------------------------
1 | # the util methods should not depend on anything outside of the util directory
--------------------------------------------------------------------------------
/b2/util/data_processing.py:
--------------------------------------------------------------------------------
1 | from datascience import Table
2 | import json
3 | import numpy as np
4 | from math import log10, pow, floor
5 | from pandas import notnull
6 | from typing import Tuple
7 | from IPython.core.debugger import set_trace
8 |
9 | from b2.constants import IS_OVERVIEW_FIELD_NAME, MAX_BINS, STUB_DISTRIBUTION_BIN, MAX_GENERATED_BINS
10 | from b2.vis_types import FilterLabelOptions, EncodingSpec
11 |
12 | from .errors import InternalLogicalError
13 | from .utils import sanitize_string_for_var_name
14 |
15 |
16 | def get_chart_title(df_name: str):
17 | # one level of indirection in case we need to change in the future
18 | return df_name
19 |
20 |
21 | DATE_HIERARCHY = [
22 | ("Y", "year"),
23 | ("M", "month"),
24 | ("D", "day")
25 | ]
26 |
27 |
28 | # B2DataFrame
29 | def static_vega_gen(encoding: EncodingSpec, df):
30 | records = dataframe_to_dict(df, FilterLabelOptions.none)
31 | # data = json.dumps(records)
32 | if encoding.mark == "bar":
33 | barSpec = {
34 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
35 | "height": 200,
36 | "data": {"values": records},
37 | "mark": "bar",
38 | "encoding": {
39 | "x": {
40 | "field": encoding.x,
41 | "type": encoding.x_type
42 | },
43 | "y": {
44 | "field": encoding.y,
45 | "type": encoding.y_type,
46 | },
47 | }
48 | }
49 | if encoding.sort != "":
50 | barSpec["encoding"]["x"]["sort"] = encoding.sort
51 | return barSpec
52 | elif encoding.mark == "circle":
53 | scatterSpec = {
54 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
55 | "data": { "values": records },
56 | "mark": {"type": "point", "tooltip": 1},
57 | "encoding": {
58 | "x": {
59 | "field": encoding.x,
60 | "type": encoding.x_type,
61 | "scale": {"zero": 0}
62 | },
63 | "y": {"field": encoding.y, "type": encoding.y_type},
64 | }
65 | }
66 | return scatterSpec
67 | elif encoding.mark == "line":
68 | lineSpec = {
69 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
70 | "data": { "values": records },
71 | "mark": "line",
72 | "encoding": {
73 | "x": {"field": encoding.x, "type": encoding.x_type},
74 | "y": {"field": encoding.y, "type": encoding.y_type},
75 | },
76 | }
77 | return lineSpec
78 | else:
79 | return None
80 |
81 |
82 | def get_basic_group_vis(new_name, df_name, col_name):
83 | return f"{new_name} = {df_name}.group('{col_name}')\n{new_name}.vis()"
84 |
85 |
86 | def create_binning_code(bound, col_name, df_name, new_name, midas_reference_name):
87 | bin_column_name = f"{col_name}_bin"
88 | # lambda n: int(n/5) * 5
89 | if bound < 1:
90 | round_num = -1 * floor(log10(bound))
91 | binning_lambda = f"lambda x: 'null' if {midas_reference_name}.np.isnan(x) else round(int(x/{bound}) * {bound}, {round_num})"
92 | else:
93 | binning_lambda = f"lambda x: 'null' if {midas_reference_name}.np.isnan(x) else int(x/{bound}) * {bound}"
94 | bin_transform = f"{df_name}['{bin_column_name}'] = {df_name}.apply({binning_lambda}, '{col_name}')"
95 | grouping_transform = get_basic_group_vis(new_name, df_name, bin_column_name)
96 | code = f"{bin_transform}\n{grouping_transform}"
97 | return code
98 |
99 |
100 | def try_parsing_date_time_level(ref, col_value, col_name, df_name):
101 | parsed = col_value.astype(f'datetime64[{ref[0]}]')
102 | count = len(np.unique(parsed))
103 | new_col_name = sanitize_string_for_var_name(f"{col_name}_{ref[1]}")
104 | if count > 1:
105 | new_column = f"{df_name}['{col_name}_{ref[1]}'] = {df_name}['{col_name}'].astype('datetime64[{ref[0]}]')"
106 | new_name = f"{df_name}_{new_col_name}_dist"
107 | if count > MAX_BINS:
108 | bound = snap_to_nice_number(count/MAX_BINS)
109 | binning_lambda = f"lambda x: 'null' if np.isnan(x) else int(x/{bound}) * {bound}"
110 | bin_column_name = f"{new_col_name}_bin"
111 | bin_transform = f"{df_name}['{bin_column_name}'] = {df_name}.apply({binning_lambda}, '{col_name}')"
112 | grouping = get_basic_group_vis(new_name, df_name, new_col_name)
113 | code = f"{new_column}\n{bin_transform}\n{grouping}"
114 | return code
115 | else:
116 | grouping = get_basic_group_vis(new_name, df_name, new_col_name)
117 | code = f"{new_column}\n{grouping}"
118 | return code
119 | else:
120 | return None
121 |
122 |
123 | # MidasDataFrame
124 | def get_datetime_distribution_code(col_name, df):
125 | col_value = df.table.column(col_name)
126 | for h in DATE_HIERARCHY:
127 | r = try_parsing_date_time_level(h, col_value, col_name, df.df_name)
128 | if r:
129 | return (r, True, "")
130 | return ("", False, "Cannot parse the date time column")
131 |
132 |
133 | def get_numeric_distribution_code(current_max_bins, unique_vals, col_name, df_name, new_name, reference_name) -> Tuple[str, bool, str]:
134 | d_max = unique_vals[-1]
135 | d_min = unique_vals[0]
136 | min_bucket_size = (d_max - d_min) / MAX_GENERATED_BINS
137 | # imports = "import numpy as np"
138 | try:
139 | bound = snap_to_nice_number(min_bucket_size)
140 | code = create_binning_code(bound, col_name, df_name, new_name, reference_name)
141 | return (code, True, "")
142 | except InternalLogicalError as e:
143 | # let's still given them a stub code
144 | code = create_binning_code(STUB_DISTRIBUTION_BIN, col_name, df_name, new_name, reference_name)
145 | return (f"# Please fix the following \n{code}", False, f"We were not able to create a distribution for column {col_name}, df {df_name}, because of error: {e}.")
146 |
147 |
148 | def snap_to_nice_number(n: float):
149 | if n == np.inf:
150 | raise InternalLogicalError("Should not have gotten infinity")
151 | if n <= 0:
152 | raise InternalLogicalError(f"Got {n}")
153 | if (n <= 1):
154 | zeroes = pow(10, abs(int(log10(n))) + 1)
155 | new_num = snap_to_nice_number(n * zeroes)
156 | return new_num/zeroes
157 | # if it's less than 1, make it as big as one and then call the same function and return
158 | if (n <= 2):
159 | return 2
160 | elif (n <= 5):
161 | return 5
162 | elif (n <= 10):
163 | return 10
164 | # bigger than 10, just zero out the digits
165 | zeroes = pow(10, int(log10(n)))
166 | return (int(n / zeroes) + 1) * zeroes
167 |
168 |
169 | # taken from ipyvega
170 | def sanitize_dataframe(df: Table):
171 | """Sanitize a DataFrame to prepare it for serialization.
172 |
173 | copied from the ipyvega project
174 | * Make a copy
175 | * Convert categoricals to strings.
176 | * Convert np.bool_ dtypes to Python bool objects
177 | * Convert np.int dtypes to Python int objects
178 | * Convert floats to objects and replace NaNs/infs with None.
179 | * Convert DateTime dtypes into appropriate string representations
180 | """
181 | import numpy as np
182 |
183 | if df is None:
184 | return None
185 | # raise InternalLogicalError("Cannot sanitize empty df")
186 |
187 | df = df.copy()
188 |
189 | def to_list_if_array(val):
190 | if isinstance(val, np.ndarray):
191 | return val.tolist()
192 | else:
193 | return val
194 |
195 | for col_name in df.labels:
196 | dtype = df.column(col_name).dtype
197 | if str(dtype) == 'category':
198 | # XXXX: work around bug in to_json for categorical types
199 | # https://github.com/pydata/pandas/issues/10778
200 | df[col_name] = df[col_name].astype(str)
201 | elif str(dtype) == 'bool':
202 | # convert numpy bools to objects; np.bool is not JSON serializable
203 | df[col_name] = df[col_name].astype(object)
204 | elif np.issubdtype(dtype, np.integer):
205 | # convert integers to objects; np.int is not JSON serializable
206 | df[col_name] = df[col_name].astype(object)
207 | elif np.issubdtype(dtype, np.floating):
208 | # For floats, convert to Python float: np.float is not JSON serializable
209 | # Also convert NaN/inf values to null, as they are not JSON serializable
210 | col = df[col_name]
211 | bad_values = np.isnan(col) | np.isinf(col)
212 | df[col_name] = np.where(bad_values, None, col).astype(object)
213 | # col.astype(object)[~bad_values]= None
214 | elif str(dtype).startswith('datetime'):
215 | # Convert datetimes to strings
216 | # astype(str) will choose the appropriate resolution
217 | new_column = df[col_name].astype(str)
218 | new_column[new_column == 'NaT'] = ''
219 | df[col_name] = new_column
220 | elif dtype == object:
221 | # Convert numpy arrays saved as objects to lists
222 | # Arrays are not JSON serializable
223 | col = np.vectorize(to_list_if_array)(df[col_name])
224 | df[col_name] = np.where(notnull(col), col, None).astype(object)
225 | return df
226 |
227 | # B2DataFrame
228 | def dataframe_to_dict(df, include_filter_label: FilterLabelOptions):
229 | """[summary]
230 |
231 | Keyword Arguments:
232 | include_filter_label {bool} -- whether we should insert another column indicating (default: {False})
233 | """
234 | clean_df = sanitize_dataframe(df.table)
235 | if clean_df is None:
236 | return []
237 |
238 | def s(x):
239 | k = {}
240 | for i, v in enumerate(x):
241 | k[clean_df.labels[i]] = v
242 | if include_filter_label != FilterLabelOptions.none:
243 | k[IS_OVERVIEW_FIELD_NAME] = include_filter_label.value
244 | return k
245 | return list(map(s, clean_df.rows))
246 |
--------------------------------------------------------------------------------
/b2/util/errors.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Optional
2 | from b2.config import IS_DEBUG
3 |
4 | class bcolors:
5 | HEADER = '\033[95m'
6 | BLUE = '\033[94m'
7 | GREEN = '\033[92m'
8 | WARNING = '\033[93m'
9 | GREY = '\033[37m'
10 | FAIL = '\033[91m'
11 | ENDC = '\033[0m'
12 | BOLD = '\033[1m'
13 | UNDERLINE = '\033[4m'
14 |
15 |
16 | class MockComm(object):
17 | def __init__(self):
18 | pass
19 | def send(self, obj):
20 | print(bcolors.GREY + "sending", obj, bcolors.ENDC)
21 |
22 |
23 | class NullValueError(Exception):
24 | def __init__(self, message):
25 | super().__init__(message)
26 |
27 |
28 | class DebugException(Exception):
29 | def __init__(self, message):
30 | super().__init__(message)
31 |
32 |
33 | class WrongTypeError(Exception):
34 | def __init__(self, message):
35 | super().__init__(message)
36 |
37 |
38 | class DfNotFoundError(Exception):
39 | def __init__(self, message):
40 | super().__init__(message)
41 |
42 |
43 | class NotInRuntimeError(Exception):
44 | def __init__(self, message):
45 | super().__init__(message)
46 |
47 | class InternalLogicalError(Exception):
48 | def __init__(self, message):
49 | super().__init__(message)
50 |
51 |
52 | class UserError(Exception):
53 | def __init__(self, message):
54 | super().__init__(message)
55 |
56 |
57 | class TempDebuggingError(Exception):
58 | def __init__(self, message):
59 | super().__init__(message)
60 |
61 |
62 | class NotAllCaseHandledError(Exception):
63 | def __init__(self, message):
64 | super().__init__(message)
65 |
66 |
67 | def check_not_null(val: Any, err_msg: Optional[str]=None):
68 | if (val == None):
69 | raise NullValueError(err_msg)
70 |
71 |
72 | def type_check_with_warning(val: Any, t: Any):
73 | if not (isinstance(val, t)):
74 | err_msg = f"expected variable to be {t} but got {val} instead"
75 | raise WrongTypeError(err_msg)
76 |
77 |
78 | def report_error_to_user(msg: str):
79 | print(bcolors.WARNING + "[Warning] " + msg + bcolors.ENDC)
80 |
81 |
82 | def logging(function: str, msg: str):
83 | if IS_DEBUG:
84 | print(bcolors.GREEN + f"[{function}]\t\t" + msg + bcolors.ENDC)
85 |
86 |
87 | def debug_log(msg: str):
88 | if IS_DEBUG:
89 | print(bcolors.WARNING + msg + bcolors.ENDC)
90 |
--------------------------------------------------------------------------------
/b2/util/instructions.py:
--------------------------------------------------------------------------------
1 | HELP_INSTRUCTION = """
2 | # TODO
3 | """
--------------------------------------------------------------------------------
/b2/util/utils.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | import codecs
4 | from os import path
5 | import traceback
6 | import ast
7 | from re import sub
8 | import requests
9 | from pathlib import Path
10 | import time
11 | import numpy as np
12 |
13 | from typing import Tuple, List
14 | from IPython import get_ipython # type: ignore
15 | from IPython.core.debugger import set_trace
16 |
17 | from b2.constants import ISDEBUG
18 | from b2.util.errors import UserError, InternalLogicalError
19 |
20 | FG_BLUE = "\x1b[34m";
21 | RESET_PRINT = "\x1b[0m";
22 | FG_PURPLE = "\x1b[035m"
23 |
24 | def plot(v, center, zoom_start, radius):
25 | import folium
26 | import folium.plugins
27 | locs = v.to_numpy()
28 | us_map = folium.Map(location=center, zoom_start = zoom_start)
29 | heatmap = folium.plugins.HeatMap(locs.tolist(), radius = radius)
30 | us_map.add_child(heatmap)
31 | return us_map
32 |
33 | def plot_heatmap(locs_df, zoom_start=12, radius=12):
34 | """Plots a heatmap using the Folium library
35 |
36 | Arguments:
37 | locs_df {MidasDatabFrame} -- Should contain lat, lon (in that order)
38 |
39 | Keyword Arguments:
40 | zoom_start {int} -- the higher the value, the more zoomed out (default: {12})
41 | radius {int} -- how to aggregate the heatmap (default: {12})
42 | """
43 | # basic data cleaning
44 | # compute the center
45 | center_lat = np.average(locs_df[locs_df.labels[0]])
46 | center_lon = np.average(locs_df[locs_df.labels[1]])
47 | if np.isnan(center_lat):
48 | filtered = locs_df.where(locs_df.labels[0], lambda x: not np.isnan(x))
49 | diff_len = len(locs_df) - len(filtered)
50 | center_lat = np.average(filtered[filtered.labels[0]])
51 | center_lon = np.average(filtered[filtered.labels[1]])
52 | print(f"{FG_PURPLE}[Notification] Filtered out {diff_len} NaN values.{RESET_PRINT}")
53 | return plot(filtered, [center_lat, center_lon], zoom_start, radius)
54 | else:
55 | return plot(locs_df, [center_lat, center_lon], zoom_start, radius)
56 |
57 |
58 |
59 |
60 | def fetch_and_cache(data_url, file, data_dir="data", force=False):
61 | """
62 | Download and cache a url and return the file object.
63 |
64 | data_url: the web address to download
65 | file: the file in which to save the results.
66 | data_dir: (default="data") the location to save the data
67 | force: if true the file is always re-downloaded
68 |
69 | return: The pathlib.Path object representing the file.
70 | """
71 |
72 | data_dir = Path(data_dir)
73 | data_dir.mkdir(exist_ok = True)
74 | file_path = data_dir / Path(file)
75 | # If the file already exists and we want to force a download then
76 | # delete the file first so that the creation date is correct.
77 | if force and file_path.exists():
78 | file_path.unlink()
79 | if force or not file_path.exists():
80 | print('Downloading...', end=' ')
81 | resp = requests.get(data_url)
82 | with file_path.open('wb') as f:
83 | f.write(resp.content)
84 | print('Done!')
85 | last_modified_time = time.ctime(file_path.stat().st_mtime)
86 | else:
87 | last_modified_time = time.ctime(file_path.stat().st_mtime)
88 | print("Using cached version that was downloaded (UTC):", last_modified_time)
89 | return file_path
90 |
91 |
92 | def isnotebook():
93 | try:
94 | shell = get_ipython().__class__.__name__
95 | if shell == 'ZMQInteractiveShell':
96 | return True # Jupyter notebook or qtconsole
97 | elif shell == 'TerminalInteractiveShell':
98 | return False # Terminal running IPython
99 | else:
100 | return False # Other type (?)
101 | except NameError:
102 | return False # Probably standard Python interpreter
103 |
104 |
105 | def red_print(m):
106 | print(f"\x1b[31m{m}\x1b[0m")
107 |
108 | LOG_SQL_SETUP_LOG = """
109 | CREATE TABLE log (
110 | session_id TEXT,
111 | action TEXT,
112 | seconds_since_start INTEGER,
113 | optional_metadata TEXT
114 | );
115 | """
116 |
117 | LOG_SQL_SETUP_SESSION = """
118 | CREATE TABLE session (
119 | user_id TEXT,
120 | task_id TEXT,
121 | session_id TEXT,
122 | start_time TEXT
123 | );
124 | """
125 |
126 |
127 | def abs_path(p: str):
128 | """Make path absolute."""
129 | return path.join(path.dirname(path.abspath(__file__)), p)
130 |
131 |
132 | def check_path(p: str):
133 | if not path.exists(p):
134 | raise UserWarning(f"The path you provided, {p} does not exists")
135 |
136 |
137 | def sanitize_string_for_var_name(p: str):
138 | return sub('[^0-9a-zA-Z]+', '_', p)
139 |
140 |
141 |
142 |
143 | def get_content(path):
144 | """Get content of file."""
145 | with codecs.open(abs_path(path), encoding='utf-8') as f:
146 | return f.read()
147 |
148 |
149 | def get_random_string(stringLength=10):
150 | """Generate a random string of fixed length """
151 | letters = string.ascii_lowercase
152 | return ''.join(random.choice(letters) for i in range(stringLength))
153 |
154 |
155 | def _get_first_target_from_prev_line(stack):
156 | try:
157 | prev_line = stack[-3]
158 | code = prev_line.splitlines()[1]
159 | body = ast.parse(code.strip()).body[0]
160 | first_target = body.targets[0] # type: ignore
161 | return first_target
162 | except:
163 | return None
164 |
165 |
166 | def find_tuple_name():
167 | try:
168 | stack = traceback.format_stack()
169 | first_target = _get_first_target_from_prev_line(stack)
170 | a = first_target.elts[0].id # type: ignore
171 | b = first_target.elts[1].id # type: ignore
172 | return a, b
173 | except:
174 | return None
175 |
176 |
177 | def find_name(throw_error=False):
178 | try:
179 | stack = traceback.format_stack()
180 | first_target = _get_first_target_from_prev_line(stack)
181 | a = first_target.id # type: ignore
182 | if throw_error and (a is None):
183 | raise InternalLogicalError("We did not get a name when expected!")
184 | return a
185 | except:
186 | if throw_error:
187 | raise UserError("We expect you to assign this compute to a variable")
188 | return None
189 |
190 |
191 | ifnone = lambda a, b: b if a is None else a
192 |
193 |
194 | def get_min_max_tuple_from_list(values: List[float]) -> Tuple[float, float]:
195 | """sets in place the array if the values are not min and max
196 |
197 | Arguments:
198 | x_value {List[int]} -- [description]
199 |
200 | Returns:
201 | returns the modifed array in place
202 | """
203 | return (min(values), max(values))
204 |
--------------------------------------------------------------------------------
/b2/vis_types.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from b2.util.errors import UserError
3 | from typing import List, Any, Dict, Optional
4 | from typing_extensions import Literal
5 |
6 | class ChartType(Enum):
7 | bar_categorical = "bar_categorical"
8 | bar_linear = "bar_linear"
9 | scatter = "scatter"
10 | line = "line"
11 |
12 |
13 | # set to numerical values to save space.
14 | class FilterLabelOptions(Enum):
15 | filtered=False
16 | unfiltered=True
17 | none=2
18 |
19 |
20 | class Channel(Enum):
21 | x = "x"
22 | y = "y"
23 | color = "color"
24 |
25 |
26 | # class SelectionEvent(object):
27 | # def __init__(self, interaction_time: datetime, predicate: List[SelectionValue], df_name: DFName):
28 | # self.interaction_time = interaction_time
29 | # self.predicate = predicate
30 | # self.df_name = df_name
31 | # self.id = get_random_string(5)
32 |
33 | # def __repr__(self):
34 | # return f"df: {self.df_name}\n predicates: {self.predicate}"
35 |
36 |
37 | # basic stub for Vega typing
38 | VegaSpecType = Dict[str, Any]
39 |
40 | ENCODING_COUNT = 8
41 |
42 | class EncodingSpec(object):
43 | # note that this is synced with the vegaGen.ts file
44 | def __init__(self,
45 | mark: Literal["bar", "circle", "line"],
46 | x: str,
47 | x_type: Literal["ordinal", "quantitative", "temporal"],
48 | y: str,
49 | y_type: Literal["ordinal", "quantitative", "temporal"],
50 | selection_type: Literal["none", "multiclick", "brush"],
51 | selection_dimensions: Literal["", "x", "y", "xy"],
52 | sort: Literal["x", "y", "-y", "-x", ""] = ""
53 | ):
54 | """EncodingSpec object used for B2 to generate Vega-Lite specifications
55 |
56 | Arguments:
57 | mark {str} -- "bar" | "circle" | "line"
58 | x {str} -- column for x axis
59 | x_type {str} -- "ordinal" | "quantitative" | "temporal"
60 | y {str} -- column for y axis
61 | y_type {str} -- "ordinal" | "quantitative" | "temporal"
62 | selection_type {str} -- "none", "multiclick", "brush"
63 | selection_dimensions {str} -- "", "x", "y", "xy"
64 | sort optional{str} -- "", "x", "y", "-x", "-y"
65 | """
66 | self.mark = mark
67 | self.x = x
68 | self.x_type = x_type
69 | self.y = y
70 | self.y_type = y_type
71 | self.selection_dimensions = selection_dimensions
72 | self.selection_type = selection_type
73 | self.sort = sort
74 |
75 |
76 | def __eq__(self, other: 'EncodingSpec'):
77 | return self.to_json() == other.to_json()
78 |
79 |
80 | def __ne__(self, other: 'EncodingSpec'):
81 | return not self.__eq__(other)
82 |
83 |
84 | def __repr__(self):
85 | # FIXME: not sure why we have a "!r" here...
86 | # despite reading... https://stackoverflow.com/questions/38418070/what-does-r-do-in-str-and-repr
87 | return f"EncodingSpec({self.mark!r}, {self.x!r}, {self.x_type}, {self.y!r}, {self.y_type}, {self.selection_dimensions}, {self.selection_type!r}, {self.sort})"
88 |
89 | def to_hash(self):
90 | return f'{self.mark}_{self.x}_{self.x_type}_{self.y}_{self.y_type}_{self.selection_dimensions}_{self.selection_type}_{self.sort}'
91 |
92 |
93 | def to_args(self):
94 | return f'{{"mark"="{self.mark}", "x"="{self.x}", "x_type"="{self.x_type}", "y"="{self.y}", "y_type"="{self.y_type}", "selection_dimensions"="{self.selection_dimensions}", "selection_type"="{self.selection_type}", "sort"="{self.sort}"}}'
95 |
96 |
97 | def to_json(self):
98 | return f'{{"mark": "{self.mark}", "x": "{self.x}", "xType": "{self.x_type}", "y": "{self.y}", "yType": "{self.y_type}", "selectionDimensions": "{self.selection_dimensions}", "selectionType": "{self.selection_type}", "sort": "{self.sort}"}}'
99 |
--------------------------------------------------------------------------------
/docs/Tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Midas Tutorial\n",
8 | "\n",
9 | "Hello! Please follow the tutorial to learn the basics of Midas. Be sure to play around until you are comfortable. You will have about 20 minutes. Should you have any questions, please feel free to ask Yifan, who will be present during the entire session.\n",
10 | "\n",
11 | "## Introduction\n",
12 | "Midas is a Jupyter notebook library/extension that aids data exploration by providing relevant static visualizations. The key of Midas is that **the operations you perform in the interactive visualization space is also reflected in code space**---you will see what this means if you run the code cells below!\n",
13 | "\n",
14 | "## Dataframe Operations\n",
15 | "Midas is a special dataframe with syntax using that of the [data science module](http://data8.org/datascience/) from Data 8. Thw following are common operations that might be useful for querying:\n",
16 | "\n",
17 | "* SELECT: `df.select(['col_name', 'more_col_name'])` --- Note that columns are referenced as strings.\n",
18 | "* WHERER: `df.where('col_name', predicate)` -- the predicates are using lambda functions provided in the [`are`](http://data8.org/datascience/predicates.html) library, such as `are.above(8)` (as opposed to function overloading as seen in pandas, like `df[df['a']>8]`. If you wish to compare two columns, then you can use `.where('col1', preidcate, 'col2')`, such as `marbles.where(\"Price\", are.above, \"Amount\")`.\n",
19 | "* GROUP BY: `df.group('col_name', agg_fun)`, the default aggregation for a `group` is count, but you can also supple the aggregation by using existing aggregation functions such as Python's built in `sum`, `min`, `max` (or any of the `numpy` aggregation methods that work on arrays). The groupby operation is applied to all the columns that are not being grouped by on.\n",
20 | "* Apply general methods: `df.apply(map_fun, new_column_name)` -- for instance, if you want to derive a new column that was the original column plus 1, with the new column called \"incremented\", the function you can call is `df.apply(lambda x: x + 1, 'incremented')`.\n",
21 | "\n",
22 | "The following are useful for data modification:\n",
23 | "* `append_column(label, values)` appends a new column, note that values must be created via `make_array` (so that it's numpy compliant) \n",
24 | "* `append(array_of_new_values)` appends a new row\n",
25 | "\n",
26 | "Note that you can also access the columns as numpy arrays by using `df['col_name']`, which can be handy to use methods like `np.average(df['col_name'])`."
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Initiate Midas\n",
34 | "Import the library and create an instance, `m = Midas()`, and we call the Midas runtime variable. Per a single notebook, you can only have one Midas instsance.\n",
35 | "Then you will see that a dashboard-like area pops up to the right. You will see three areas, one is that of the data (yellow pane), showing the dataframes with acommpanying columns, and the others are the charts."
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "from midas import Midas\n",
45 | "m = Midas()\n",
46 | "\n",
47 | "# other utility libraries\n",
48 | "import numpy as np\n",
49 | "from datascience import Table, make_array\n",
50 | "from datascience.predicates import are"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "## Load data\n",
58 | "Midas takes in data from a few APIs, such as `from_df`, used below, which loads from pandas dataframe.\n",
59 | "Note that you can also use"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "disaster_df = m.read_table('https://vega.github.io/vega-datasets/data/disasters.csv')"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "## Seeing data\n",
76 | "\n",
77 | "Since a lot of basic visualization is highly predicatable, Midas attempts to visualize the basics for you directly. However, sometimes, you may want to change the encoding, which is also very easy to do in Midas---just specify `mark`, `x`, `y`, and if you have three columns, specify the third column for `color` or `size`."
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 4,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "# 🟡 05:15 PM 🟡\n",
87 | "disaster_df.append_column('Year_bin', disaster_df.apply(lambda x: int(x/20.0) * 20.0, 'Year'))\n",
88 | "Year_distribution = disaster_df.group('Year_bin')"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "# 🟡 04:04 PM 🟡\n",
98 | "disaster_df.append_column('Deaths_bin', disaster_df.apply(lambda x: int(x/200000.0) * 200000.0, 'Deaths'))\n",
99 | "Deaths_distribution = disaster_df.group('Deaths_bin')"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "# 🟡 03:55 PM 🟡\n",
109 | "disaster_df.append_column('Year_bin', disaster_df.apply(lambda x: int(x/20.0) * 20.0, 'Year'))\n",
110 | "Year_distribution = disaster_df.group('Year_bin')"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "Entity_distribution = disaster_df.group('Entity')"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "## Getting distribution from clicking on the columns pane\n",
127 | "Go ahead and click on the columns. After you click, two effects take place:\n",
128 | "1. a cell will be created that contains dataframe calls that derives the new filtered values, as well as the visualization calls. You will see that they have color emoji such as 🟠, these are indicators for you to better visually navigate.\n",
129 | "2. a chart is created that visualizes the data created in the pane on the right hand side\n",
130 | "\n",
131 | "If the chart is the wrong encoding, or if the groupign query is inacurate, fell free to modify the code. You can click on the 📊icon to get the current definition to your clipboard. Paste the code to a cell, and the results will be reflected in the chart automatically."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "# 🟡 03:37 PM 🟡\n",
141 | "disaster_df.append_column('Year_bin', disaster_df.apply(lambda x: int(x/20.0) * 20.0, 'Year'))\n",
142 | "Year_distribution = disaster_df.group('Year_bin')"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "metadata": {},
149 | "outputs": [],
150 | "source": [
151 | "# 🟡 03:37 PM 🟡\n",
152 | "disaster_df.append_column('Deaths_bin', disaster_df.apply(lambda x: int(x/200000.0) * 200000.0, 'Deaths'))\n",
153 | "Deaths_distribution = disaster_df.group('Deaths_bin')"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "## Accessing code with \"📋\"\n",
161 | "\n",
162 | "If you want to take the code with the selection apploed, click on the 📋icon and then the code will be in your clipboar --- use it however you want!"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "## Snapshot of the current state with \"📷\"\n",
170 | "Clicking on 📷 will insert a new cell with the current chart you see."
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "## Making selections\n",
178 | "All the existing visualizations are equipped with the ability to **select**.\n",
179 | "\n",
180 | "* With scatter plots, you can **brush** select on both the x and y axis.\n",
181 | "* With bar charts, you can either brush to select the x axis items or click.\n",
182 | "* With line charts, you can brush to select a range on the x axis.\n",
183 | "\n",
184 | "When you perform a selection, you will observe two effects\n",
185 | "1. the charts will be filtered with the new data\n",
186 | "2. a cell will be generated with the selections you have made---the newly generated cells will keep on appending to the document based on the previous cell executed, and if you keep on interacting, the old interactions will be commented out and the new selection will be selected."
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {},
193 | "outputs": [],
194 | "source": []
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {},
200 | "outputs": [],
201 | "source": [
202 | "# reset selections\n",
203 | "m.make_selections([])"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "## Navigating selections\n",
211 | "\n",
212 | "You will see that your selections are shown in the selection pane (blue). You can rename and click on the selections to make the selections again."
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "## Accessing selections programmatically\n",
220 | "\n",
221 | "Access selection in **predicate** form from the Midas runtime variable, `m` (you can assign it other names if you wish).\n",
222 | "- most recent selection: `m.current_selection`\n",
223 | "- all selections made in the past: `m.selection_history`\n",
224 | "\n",
225 | "Access selection results in **data** form, you have the following options:\n",
226 | "- access specific charts by the `.filtered_value`\n"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": null,
232 | "metadata": {},
233 | "outputs": [],
234 | "source": [
235 | "m.current_selection"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "m.selection_history"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {},
251 | "outputs": [],
252 | "source": []
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "## 🚧 Cleaning Data and Reactive State 🚧 (under developement)\n",
259 | "Often, the data requires some trimming and modification for analysis to continue. For instance, from the distribution of fires, you notice that only a couple fire sizes are extreme outliers, and you decide to ignore these points. \n",
260 | "\n",
261 | "However, you might want to keep the previous visualizations and selections, for this, you can use the `update` method to **synchronize state**, where the charts would directly relfect the result of the changes. In the cases where the selections are no longer relevant, such as when the relevant column is deleted, the charts will be deleted, but the cells will remain. You can of course create a new dataframe from which to derive charts from, in order to preserve the old ones. Note that you cannot update derived dataframes. So in our tutorial, only `disasters` can be updated."
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": []
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "## 🚧 Reactive Cells and Custom Visualizations 🚧(under developement)\n",
276 | "\n",
277 | "A reactive cell means that Midas will run it after interactions.\n",
278 | "Reactive cells can be used to inspect the state or computation related to the selection events.\n",
279 | "The APIs are currently not as stable so not exposed here!"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "# more interesting examples to come!\n",
289 | "%%reactive\n",
290 | "print(m.current_selection)"
291 | ]
292 | },
293 | {
294 | "cell_type": "markdown",
295 | "metadata": {},
296 | "source": [
297 | "## Using Joins for Analysis\n",
298 | "\n",
299 | "When performing analysis we often want to connect different sources of information. For instance, for this analysis, we might be interested in locating whether the number of fire has to do with average rainfall or temperatures.\n",
300 | "\n",
301 | "Even with joins, Midas can help you \"link\" the relevant tables together, given that you provide the information for how the two tables can be joined together, using the API, `a_df.can_join(another_df, 'column_name')`, where the two dataframes share teh same column name."
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": null,
307 | "metadata": {},
308 | "outputs": [],
309 | "source": [
310 | "# load data from a csv file\n",
311 | "stocks_df = m.read_table(\"https://vega.github.io/vega-datasets/data/sp500.csv\")\n",
312 | "# you can perform basic data cleaning \n",
313 | "stocks_df.append_column('year', table.apply(lambda x: x[-4:], 'date'))"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": null,
319 | "metadata": {},
320 | "outputs": [],
321 | "source": [
322 | "# providing Midas with join information.\n",
323 | "disaster_df.can_join(stocks_df, 'year')"
324 | ]
325 | }
326 | ],
327 | "metadata": {
328 | "kernelspec": {
329 | "display_name": "Python 3",
330 | "language": "python",
331 | "name": "python3"
332 | },
333 | "language_info": {
334 | "codemirror_mode": {
335 | "name": "ipython",
336 | "version": 3
337 | },
338 | "file_extension": ".py",
339 | "mimetype": "text/x-python",
340 | "name": "python",
341 | "nbconvert_exporter": "python",
342 | "pygments_lexer": "ipython3",
343 | "version": "3.7.4"
344 | }
345 | },
346 | "nbformat": 4,
347 | "nbformat_minor": 2
348 | }
349 |
--------------------------------------------------------------------------------
/docs/data8_instrumentation.md:
--------------------------------------------------------------------------------
1 | # Data8 datascience module instrumentation
2 |
3 | ## loader functions
4 |
5 | These are fine
6 |
7 | - `from_rows`
8 | - `from_records`
9 | - `from_file`
10 | - `from_df`
11 | - `from_array`
12 |
13 | ## queries
14 |
15 | These should also be fine, but some of them are a single value, should they be visualized???
16 |
17 | More primitive SQL operators
18 |
19 | - `apply`
20 | - `copy`
21 | - `drop` (the compliment of select)
22 | - `sort`
23 | - `pivot_bin` (no idea)
24 | - `stack` (no idea)
25 |
26 | Shortcuts to SQL functions --- for all of these, we are just going to return normal tables?
27 |
28 | - `num_rows`
29 | - `first`
30 | - `stats`
31 | - `percentile`
32 | - `bin`
33 |
34 | **Queries that are usually done once**
35 | And they do not tend to participate in queries since you probably do not want to this many times, these are not instrumented. When they are returned, they are returned as a normal, un-instrumented table, if if the user wants to do something with this, they would have to register it back in again.
36 | > maybe talk to joe/arvind about this.
37 |
38 | - `sample`
39 | - `shuffle`
40 | - `sample_from_distribution`
41 | - `split`
42 |
43 | ## Accessors
44 |
45 | These we support on the API level, but are not part of our context
46 |
47 | - `rows`
48 | - `row`
49 | - `labels`
50 | - `num_columns`
51 | - `column`
52 | - `values`
53 | - `column_index`
54 |
55 | - `plot`
56 | - `bar`
57 | - `group_bar`
58 | - `barh`
59 | - `group_barh`
60 | - `scatter`
61 | - `hist`
62 | - `hist_of_counts`
63 | - `boxplot`
64 |
65 | ## Mutations
66 |
67 | - `append`
68 | - `append_column`
69 | - `remove` (this removes rows)
70 | - `relabel`
71 | - `move_to_start`
72 | - `move_to_end`
73 |
74 |
75 | **that has nothing to do with the core data**
76 |
77 | - `set_format`
78 |
79 |
--------------------------------------------------------------------------------
/docs/logging.md:
--------------------------------------------------------------------------------
1 | # Experiment Logging Design
2 |
3 | There are two ways to perform the analysis:
4 |
5 | - M1: selections vs. coding
6 | - M2: what methods were invoked
7 | - M3: insight
8 |
9 | The schema for the log will contain the following columns:
10 |
11 | - `action`
12 | - `seconds_since_start`
13 | - `optional_metadata`
14 |
15 | In order to perform M1, look for the following two values in `action`:
16 |
17 | - coding: `code_execution`
18 | - ui : all of the UI operations, or focus on `ui_selection`
19 |
20 | M2 offers more detailed information, and contains the following functions:
21 |
22 | API
23 |
24 | - `load_data`
25 | - `add_join_info`
26 |
27 | UI
28 |
29 | - `snapshot_single`
30 | - `snapshot_all`
31 | - `move_chart`
32 | - `resize_midas_area`
33 | - `hide_midas`,
34 | - `show_midas`
35 | - `hide_columns_pane`
36 | - `show_columns_pane`
37 | - `show_chart` (previously, `toggle_chart`)
38 | - `hide_chart`
39 | - `column_click`
40 | - `ui_selection`
41 | - `get_code`
42 | - `remove_df`
43 | - `navigate_to_definition_cell` (previously, `change_visual`)
44 | - `hide_midas` (previously, `toggle_midas`)
45 | - `show_midas`
46 | - `show_selection_cells`
47 | - `hide_selection_cells`
48 | - `navigate_to_original_cell`
49 |
50 | Note that we will also have `code_selection` calls, note that all `ui_selection`s are followed by `code_selection`, but not all `code_selection`s are preceded by a `ui_selection`, in that case, the selection was triggered by the user.
51 |
52 | M3 is pretty straightforward, it's captured as a markdown cell being ran,
53 |
54 | - `markdown-rendered`
55 |
--------------------------------------------------------------------------------
/notebooks/data/pitchfork.csv:
--------------------------------------------------------------------------------
1 | reviewid,title,artist,url,score,best_new_music,author,author_type,pub_date,pub_weekday,pub_day,pub_month,pub_year
2 | "22703","mezzanine","massive attack","http://pitchfork.com/reviews/albums/22703-mezzanine/","9.3","0","nate patrin","contributor","2017-01-08","6","8","1","2017"
3 | "22721","prelapsarian","krallice","http://pitchfork.com/reviews/albums/22721-prelapsarian/","7.9","0","zoe camp","contributor","2017-01-07","5","7","1","2017"
4 | "22659","all of them naturals","uranium club","http://pitchfork.com/reviews/albums/22659-all-of-them-naturals/","7.3","0","david glickman","contributor","2017-01-07","5","7","1","2017"
5 | "22661","first songs","kleenex, liliput","http://pitchfork.com/reviews/albums/22661-first-songs/","9.0","1","jenn pelly","associate reviews editor","2017-01-06","4","6","1","2017"
6 | "22725","new start","taso","http://pitchfork.com/reviews/albums/22725-new-start/","8.1","0","kevin lozano","tracks coordinator","2017-01-06","4","6","1","2017"
7 | "22722","insecure (music from the hbo original series)","various artists","http://pitchfork.com/reviews/albums/22722-insecure-music-from-the-hbo-original-series/","7.4","0","vanessa okoth-obbo","contributor","2017-01-05","3","5","1","2017"
8 | "22704","stillness in wonderland","little simz","http://pitchfork.com/reviews/albums/22704-little-simz-stillness-in-wonderland/","7.1","0","katherine st. asaph","contributor","2017-01-05","3","5","1","2017"
9 | "22694","tehillim","yotam avni","http://pitchfork.com/reviews/albums/22694-tehillim/","7.0","0","andy beta","contributor","2017-01-05","3","5","1","2017"
10 | "22714","reflection","brian eno","http://pitchfork.com/reviews/albums/22714-reflection/","7.7","0","andy beta","contributor","2017-01-04","2","4","1","2017"
11 | "22724","filthy america its beautiful","the lox","http://pitchfork.com/reviews/albums/22724-filthy-america-its-beautiful/","5.3","0","ian cohen","contributor","2017-01-04","2","4","1","2017"
12 | "22715","clear sounds/perfetta","harry bertoia","http://pitchfork.com/reviews/albums/22715-clear-soundsperfetta/","8.0","0","marc masters","contributor","2017-01-04","2","4","1","2017"
13 | "22745","run the jewels 3","run the jewels","http://pitchfork.com/reviews/albums/22745-run-the-jewels-3/","8.6","1","sheldon pearce","associate staff writer","2017-01-03","1","3","1","2017"
14 | "22700","nadir","steven warwick","http://pitchfork.com/reviews/albums/22700-nadir/","7.6","0","thea ballard","contributor","2017-01-03","1","3","1","2017"
15 | "22720","december 99th","yasiin bey","http://pitchfork.com/reviews/albums/22720-december-99th/","3.5","0","marcus j. moore","contributor","2017-01-02","0","2","1","2017"
16 | "22699","don't smoke rock","smoke dza, pete rock","http://pitchfork.com/reviews/albums/22699-dont-smoke-rock/","7.4","0","dean van nguyen","contributor","2017-01-02","0","2","1","2017"
17 | "22665","punk45: les punks: the french connection (the first wave of french punk 1977-80)","various artists","http://pitchfork.com/reviews/albums/22665-punk45-les-punks-the-french-connection-the-first-wave-of-french-punk-1977-80/","6.6","0","louis pattison","contributor","2017-01-02","0","2","1","2017"
18 | "22666","brnshj (puncak)","senyawa","http://pitchfork.com/reviews/albums/22666-brnshj-puncak/","7.4","0","philip sherburne","contributing editor","2017-01-02","0","2","1","2017"
19 | "22719","merry christmas lil mama","chance the rapper, jeremih","http://pitchfork.com/reviews/albums/22719-merry-christmas-lil-mama/","8.1","0","sheldon pearce","associate staff writer","2016-12-30","4","30","12","2016"
20 | "22667","///// effectual","jamire williams","http://pitchfork.com/reviews/albums/22667-effectual/","7.2","0","benjamin scheim","contributor","2016-12-30","4","30","12","2016"
21 | "22691","love you to death","the-dream","http://pitchfork.com/reviews/albums/22691-love-you-to-death/","7.0","0","rebecca haithcoat","contributor","2016-12-29","3","29","12","2016"
--------------------------------------------------------------------------------
/notebooks/v1/Basic Demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Demo Scenario\n",
8 | "We are going to be analyzing some rental data"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 25,
14 | "metadata": {},
15 | "outputs": [
16 | {
17 | "name": "stdout",
18 | "output_type": "stream",
19 | "text": [
20 | "\u001b[93mfound name m\u001b[0m\n"
21 | ]
22 | }
23 | ],
24 | "source": [
25 | "from midas import Midas\n",
26 | "m = Midas()"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Basic Profiling and Visualizations\n",
34 | "When loading data into Midas, we show a basic profiler on the side."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 26,
40 | "metadata": {},
41 | "outputs": [
42 | {
43 | "name": "stdout",
44 | "output_type": "stream",
45 | "text": [
46 | "\u001b[93mfound name contacts_df\u001b[0m\n",
47 | "\u001b[93m+ Addign df contacts_df\u001b[0m\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "contacts_df = m.read_table(\"/Users/yifanwu/Dev/midas/notebooks/rental/contacts.csv\")"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 24,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/plain": [
63 | "{}"
64 | ]
65 | },
66 | "execution_count": 24,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | }
70 | ],
71 | "source": [
72 | "contacts_df.table._formats"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 16,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "\u001b[93mfound name contacts_df_id_host_anon\u001b[0m\n",
85 | "\u001b[93m+ Addign df contacts_df_id_host_anon\u001b[0m\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "# [MIDAS] You selected the following projection on contacts_df at time 11/29/2019, 6:21:26 PM\n",
91 | "contacts_df_id_host_anon = contacts_df.select(['id_host_anon'])"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 11,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "name": "stdout",
101 | "output_type": "stream",
102 | "text": [
103 | "\u001b[93mfound name contacts_df_id_host_anon\u001b[0m\n",
104 | "\u001b[93m+ Addign df contacts_df_id_host_anon\u001b[0m\n"
105 | ]
106 | }
107 | ],
108 | "source": [
109 | "# [MIDAS] You selected the following projection on contacts_df at time 11/29/2019, 6:14:12 PM\n",
110 | "contacts_df_id_host_anon = contacts_df.select(['id_host_anon'])"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 3,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "\u001b[93mfound name contacts_df_id_guest_anon\u001b[0m\n",
123 | "\u001b[93m+ Addign df contacts_df_id_guest_anon\u001b[0m\n"
124 | ]
125 | }
126 | ],
127 | "source": [
128 | "# [MIDAS] You selected the following projection on contacts_df at time 11/29/2019, 5:06:02 PM\n",
129 | "contacts_df_id_guest_anon = contacts_df.select(['id_guest_anon'])"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 20,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "ename": "NameError",
139 | "evalue": "name 'contacts_df_id_guest_anon' is not defined",
140 | "output_type": "error",
141 | "traceback": [
142 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
143 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
144 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcontacts_df_id_guest_anon\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_formats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
145 | "\u001b[0;31mNameError\u001b[0m: name 'contacts_df_id_guest_anon' is not defined"
146 | ]
147 | }
148 | ],
149 | "source": [
150 | "contacts_df_id_guest_anon._formats"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 19,
156 | "metadata": {},
157 | "outputs": [
158 | {
159 | "data": {
160 | "text/plain": [
161 | "id_host_anon\n",
162 | "5426897d-960d-4013-9e38-606ae746793c\n",
163 | "f30417c5-6df4-45ac-bfc2-6ad1cce398ab\n",
164 | "13cbf50a-3272-45d4-9866-a06b6ea1b99a\n",
165 | "01614601-d5a4-4776-ab9b-c10d3b865bf0\n",
166 | "f2fed6f3-4c5c-453d-9e64-37c62b8bd06d\n",
167 | "90334ef3-f489-45a0-89e0-d18d370e4a1c\n",
168 | "10dc3fc4-694f-44b2-aaac-9dbace0ebabd\n",
169 | "7e0696cb-6321-4d60-a8e9-793ee14724fd\n",
170 | "1c990fca-5b09-4eca-bdcf-d9e923112f04\n",
171 | "2bd81140-3d71-4fd9-add3-5998f9263c57\n",
172 | "... (27877 rows omitted)"
173 | ]
174 | },
175 | "execution_count": 19,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "m.dfs['contacts_df_id_host_anon'].df"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "## Interactive Charts are Generated"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "# now let's say that there is an interesting chart the user wants to look at\n",
198 | "contact_channel_first_df = contacts_df.select([\"contact_channel_first\"])"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {
205 | "scrolled": true
206 | },
207 | "outputs": [],
208 | "source": [
209 | "interactions_df = contacts_df.select([\"m_interactions\"])"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {},
215 | "source": [
216 | "### Oberservation\n",
217 | "From the chart we can see that there must be some data scleaning issues."
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "## Access Selections Stream"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": null,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "contact_channel_stream = contact_channel_first_df.get_stream()\n",
234 | "contact_channel_stream"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "metadata": {},
241 | "outputs": [],
242 | "source": [
243 | "contact_channel_stream.current"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "contacts_df.project([contacts_df.ts_booking_at]).pandas_value"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "room_type_df = listings_df.project([listings_df.room_type]).assign(\"room_type_df\")"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "# note the code below does not yet work!"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "total_reviews_df = listings_df.project([listings_df.total_reviews]).assign(\"total_reviews_df\")\n"
280 | ]
281 | },
282 | {
283 | "cell_type": "markdown",
284 | "metadata": {},
285 | "source": [
286 | "## Reactive Cells\n",
287 | "Cells on state that will change will be reactively called"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": null,
293 | "metadata": {},
294 | "outputs": [],
295 | "source": [
296 | "def transform(predicate):\n",
297 | " m.ui_comm.send_debug_msg(f\"Transform {predicate}\")\n",
298 | " new_mdf = contacts_df.apply_selection(contact_channel_stream.current)\n",
299 | " new_mdf.project([new_mdf.m_guests]).assign(\"m_guests\")\n",
300 | "\n",
301 | "contact_channel_stream.add_callback(transform)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": null,
307 | "metadata": {},
308 | "outputs": [],
309 | "source": [
310 | "new_mdf = contacts_df.apply_selection(contact_channel_stream.current)\n",
311 | "m_guests = new_mdf.project([new_mdf.m_guests]).assign(\"m_guests\")\n",
312 | "m_guests.pandas_value"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": null,
318 | "metadata": {},
319 | "outputs": [],
320 | "source": [
321 | "contact_channel_stream.ref_to_predicate_list"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": null,
327 | "metadata": {},
328 | "outputs": [],
329 | "source": [
330 | "m.event_loop.tick_funcs['contact_channel_first_df'] = []"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": null,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "p = contact_channel_stream.current\n",
340 | "m.event_loop.tick('contact_channel_first_df', p)"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": null,
346 | "metadata": {},
347 | "outputs": [],
348 | "source": [
349 | "predicate = contact_channel_stream.current\n",
350 | "new_mdf = contacts_df.apply_selection(contact_channel_stream.current)\n",
351 | "m_guests = new_mdf.project([new_mdf.m_guests]).assign(\"m_guests\") \n"
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {},
357 | "source": [
358 | "## Working with more than one table"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": null,
364 | "metadata": {},
365 | "outputs": [],
366 | "source": [
367 | "listings = m.read_table(\"/Users/yifanwu/Dev/midas/notebooks/rental/listings.csv\")\n",
368 | "users = m.read_table(\"/Users/yifanwu/Dev/midas/notebooks/rental/users.csv\")"
369 | ]
370 | }
371 | ],
372 | "metadata": {
373 | "kernelspec": {
374 | "display_name": "Python 3",
375 | "language": "python",
376 | "name": "python3"
377 | },
378 | "language_info": {
379 | "codemirror_mode": {
380 | "name": "ipython",
381 | "version": 3
382 | },
383 | "file_extension": ".py",
384 | "mimetype": "text/x-python",
385 | "name": "python",
386 | "nbconvert_exporter": "python",
387 | "pygments_lexer": "ipython3",
388 | "version": "3.7.4"
389 | }
390 | },
391 | "nbformat": 4,
392 | "nbformat_minor": 2
393 | }
394 |
--------------------------------------------------------------------------------
/notebooks/v1/EndToEndTest.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%load_ext autoreload\n",
10 | "%autoreload 2 "
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from midas import Midas\n",
20 | "m = Midas()"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "raw = [\n",
30 | " 'a', [1,2,3,4,5,6,7,8,9,10],\n",
31 | " 'b', [10,20,30,40,50,60,70,80,90,100],\n",
32 | " 'c', [100,200,300,400,500,600,700,800,900,1000]\n",
33 | "]"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "sales = [\n",
43 | " 'a', [1,2,3,4,5,6],\n",
44 | " 'quality', ['high', 'low', 'high', 'high', 'high', 'low']\n",
45 | "]"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "raw_df = m.with_columns(raw)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "sales_df = m.with_columns(sales)"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "sales_df.can_join(raw_df, 'a')"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "a_df = raw_df.select(['a']) "
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "m.dfs['sales_df'].df.ops.child"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": [
99 | "m.dfs['sales_df'].df.ops.other"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "a = '1'"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "from typing import cast\n",
118 | "type(cast(int, a))"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "m.context.join_info.get(('sales_df','raw_df')).left_df.ops\n"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "# [MIDAS] You selected the following from a_df at time 11/28/2019, 12:20:57 PM\n",
137 | "m.add_selection_by_interaction(\"a_df\", {\"x\":[1,2]})"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "b_df = raw_df.select(['b'])\n",
147 | "c_df = raw_df.select(['c'])"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "m.dfs[\"b_df\"].df"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": [
165 | "a_stream = a_df.get_stream()"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "a_stream.current"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": [
183 | "# add join info\n",
184 | "from \n",
185 | "b_df.add_join_info()"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "raw_df.join(sales_df)"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": null,
200 | "metadata": {},
201 | "outputs": [],
202 | "source": [
203 | "from datascience import are\n",
204 | "t2 = raw_df.where('b', are.below_or_equal_to(60)).select(['a'])"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "a_stream = t.get_stream()"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {},
220 | "outputs": [],
221 | "source": [
222 | "a_stream.current"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {},
229 | "outputs": [],
230 | "source": [
231 | "def transform(predicate):\n",
232 | " m.ui_comm.send_debug_msg(f\"Transform {predicate}\")\n",
233 | " new_mdf = all_df.apply_selection(predicate)\n",
234 | " new_mdf.project([new_mdf.c]).assign(\"c_df\")\n",
235 | "\n",
236 | "a_stream.add_callback(transform)"
237 | ]
238 | }
239 | ],
240 | "metadata": {
241 | "kernelspec": {
242 | "display_name": "Python 3",
243 | "language": "python",
244 | "name": "python3"
245 | },
246 | "language_info": {
247 | "codemirror_mode": {
248 | "name": "ipython",
249 | "version": 3
250 | },
251 | "file_extension": ".py",
252 | "mimetype": "text/x-python",
253 | "name": "python",
254 | "nbconvert_exporter": "python",
255 | "pygments_lexer": "ipython3",
256 | "version": "3.7.4"
257 | }
258 | },
259 | "nbformat": 4,
260 | "nbformat_minor": 2
261 | }
262 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "b2",
3 | "version": "0.0.5",
4 | "description": "Reification of interactions in computational notebooks",
5 | "repository": {
6 | "type": "git",
7 | "url": "git+https://github.com/yifanwu/b2.git"
8 | },
9 | "author": "",
10 | "license": "BSD-3-Clause",
11 | "bugs": {
12 | "url": "https://github.com/yifanwu/b2/issues"
13 | },
14 | "homepage": "https://github.com/yifanwu/b2#readme",
15 | "scripts": {
16 | "format": "prettier --write 'src/*.ts'",
17 | "lint": "prettier --check 'src/*.ts'",
18 | "build": "webpack --mode production",
19 | "watch": "webpack --mode development --watch"
20 | },
21 | "dependencies": {
22 | "@jupyter-widgets/base": "2.0.1",
23 | "@nteract/data-explorer": "^7.1.2",
24 | "@types/jqueryui": "^1.12.7",
25 | "@types/react": "^16.9.1",
26 | "@types/react-addons-update": "^0.14.20",
27 | "@types/react-dom": "^16.8.5",
28 | "@types/react-editext": "^3.1.0",
29 | "@types/webpack-env": "*",
30 | "array-move": "^2.2.0",
31 | "jquery": "^3.4.1",
32 | "jqueryui": "^1.11.1",
33 | "react": "^16.10.2",
34 | "react-addons-update": "^15.6.2",
35 | "react-dom": "^16.9.0",
36 | "react-editext": "^3.8.0",
37 | "react-grid-layout": "^0.16.6",
38 | "react-sortable-hoc": "^1.10.1",
39 | "react-transition-group": "^4.2.2",
40 | "style-loader": "^1.0.0",
41 | "styled-components": "^4.4.0",
42 | "tslint": "^5.18.0",
43 | "tslint-react": "^4.0.0",
44 | "vega": "^5.10.0",
45 | "vega-embed": "^4.2.5",
46 | "vega-lite": "^4.6.0"
47 | },
48 | "devDependencies": {
49 | "css-loader": "^3.1.0",
50 | "prettier": "^1.18.2",
51 | "ts-loader": "^6.0.4",
52 | "typescript": "^3.5.2",
53 | "webpack": "^4.35.2",
54 | "webpack-cli": "^3.3.5"
55 | },
56 | "private": true
57 | }
58 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | typing_extensions
2 | jupyter
3 | pandas
4 | datascience
5 | asttokens
6 | pyperclip
7 | vega
8 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | license_file = LICENSE
4 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | LONG_DESCRIPTION = """
2 | B2
3 | ============
4 |
5 | Reification of interactions in Jupyter Notebook.
6 |
7 | For more information, see https://github.com/yifanwu/b2.
8 | """
9 |
10 | DESCRIPTION = "B2: easy EDA in Jupyter Notebook"
11 | NAME = "b2-ext"
12 | PACKAGE_DATA = {'b2': ['static/*.js',
13 | 'static/*.js.map',
14 | 'static/*.html']}
15 | AUTHOR = 'Yifan Wu'
16 | AUTHOR_EMAIL = 'yifanwu@berkeley.edu'
17 | URL = 'http://github.com/yifanwu/b2'
18 | DOWNLOAD_URL = 'http://github.com/yifanwu/b2'
19 | LICENSE = 'BSD 3-clause'
20 | DATA_FILES = [
21 | ('share/jupyter/nbextensions/b2', [
22 | 'b2/static/index.js',
23 | 'b2/static/index.js.map'
24 | ]),
25 | ('etc/jupyter/nbconfig/notebook.d' , ['b2.json'])
26 | ]
27 | # EXTRAS_REQUIRE = {'foldcode': ['codemirror/addon/fold/foldcode']}
28 |
29 |
30 | import io
31 | import os
32 | import re
33 |
34 | try:
35 | from setuptools import setup, find_packages
36 | except ImportError:
37 | from distutils.core import setup
38 |
39 |
40 | def read(path, encoding='utf-8'):
41 | path = os.path.join(os.path.dirname(__file__), path)
42 | with io.open(path, encoding=encoding) as fp:
43 | return fp.read()
44 |
45 |
46 | def version(path):
47 | """Obtain the packge version from a python file e.g. pkg/__init__.py
48 |
49 | See .
50 | """
51 | version_file = read(path)
52 | version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""",
53 | version_file, re.M)
54 | if version_match:
55 | return version_match.group(1)
56 | raise RuntimeError("Unable to find version string.")
57 |
58 |
59 | VERSION = version('b2/__init__.py')
60 |
61 |
62 | setup(name=NAME,
63 | version=VERSION,
64 | description=DESCRIPTION,
65 | long_description=LONG_DESCRIPTION,
66 | author=AUTHOR,
67 | author_email=AUTHOR_EMAIL,
68 | url=URL,
69 | download_url=DOWNLOAD_URL,
70 | license=LICENSE,
71 | packages=find_packages(),
72 | package_data=PACKAGE_DATA,
73 | data_files=DATA_FILES,
74 | # extras_require=EXTRAS_REQUIRE,
75 | include_package_data=True,
76 | classifiers=[
77 | 'Development Status :: 2 - Pre-Alpha',
78 | 'Environment :: Other Environment',
79 | 'Intended Audience :: Science/Research',
80 | 'License :: OSI Approved :: Apache Software License',
81 | 'Natural Language :: English',
82 | 'Programming Language :: Python',
83 | 'Programming Language :: Python :: 3.7'],
84 | )
85 |
--------------------------------------------------------------------------------
/src/CellManager.ts:
--------------------------------------------------------------------------------
1 | import { LogDebug, commentUncommented, LogSteps, getEmojiEnnotatedComment, foldCode, showOrHideSelectionCells, findQueryCell, selectCell, deleteAllSelectionCells } from "./utils";
2 | import { MIDAS_SELECTION_FUN, CELL_METADATA_FUN_TYPE, MIDAS_COLAPSE_CELL_CLASS, MIDAS_CURRENT_CLASS } from "./constants";
3 | import { FunKind } from "./types";
4 | import { LoggerFunction, LogEntryBase } from "./logging";
5 |
6 | interface SingleCell {
7 | code: string;
8 | cell: any;
9 | time: Date;
10 | step: number;
11 | funKind: FunKind;
12 | }
13 |
14 |
15 | export default class CellManager {
16 | currentStep: number;
17 | cellsCreated: SingleCell[];
18 | midasInstanceName: string;
19 | prevFocus?: string;
20 | currentFocus?: string;
21 | lastExecutedCell?: any;
22 | lastExecutedCellPos?: number;
23 | reactiveCells: Map>;
24 | reactiveCellsReverse: Map;
25 | showSelectionCells: boolean;
26 | logger: LoggerFunction;
27 |
28 | constructor(midasInstanceName: string, logger: LoggerFunction) {
29 | this.recordReactiveCell = this.recordReactiveCell.bind(this);
30 | this.toggleSelectionCells = this.toggleSelectionCells.bind(this);
31 |
32 | this.currentStep = 0;
33 | this.cellsCreated = [];
34 | this.midasInstanceName = midasInstanceName;
35 | this.prevFocus = undefined;
36 | this.currentFocus = undefined;
37 | this.lastExecutedCell = null;
38 | this.lastExecutedCellPos = null;
39 | this.reactiveCells = new Map();
40 | this.reactiveCellsReverse = new Map();
41 | this.showSelectionCells = true;
42 | this.logger = logger;
43 |
44 | // make sure that there is currently no highlighted cellconst
45 | const allCells = Jupyter.notebook.get_cells();
46 | allCells.forEach((c: any) => {
47 | c.element.removeClass(MIDAS_CURRENT_CLASS);
48 | });
49 | }
50 |
51 | setFocus(dfName?: string) {
52 | this.prevFocus = this.currentFocus;
53 | this.currentFocus = dfName;
54 | }
55 |
56 | /**
57 | * called by snapshot features.
58 | * @param div
59 | * @param comments
60 | */
61 | executeCapturedCells(div: string, comments: string) {
62 | this.createCell(`#${comments}\nfrom IPython.display import HTML, display\ndisplay(HTML("""${div}"""))`, "chart", true);
63 | }
64 |
65 | runReactiveCells(dfName: string) {
66 | // "" is for all reactive cells
67 | function getCell(c: number) {
68 | const cIdxMsg = Jupyter.notebook.get_msg_cell(c);
69 | if (cIdxMsg) {
70 | const idx = Jupyter.notebook.find_cell_index(cIdxMsg);
71 | if (idx > -1) {
72 | LogDebug(`Found cell for ${dfName} with ${c}`);
73 | return idx;
74 | }
75 | }
76 | LogDebug(`One of the cells is no longer found for ${c}`);
77 | }
78 |
79 | function processCells(cells: Set) {
80 | let cellIdxs: any[] = [];
81 | let newCells = new Set();
82 | cells.forEach((c) => {
83 | const r = getCell(c);
84 | if (r) {
85 | cellIdxs.push(r);
86 | newCells.add(c);
87 | }
88 | });
89 | LogSteps(`[${dfName}] Reactively executing cells ${cellIdxs}`);
90 | Jupyter.notebook.execute_cells(cellIdxs);
91 | return newCells;
92 | }
93 |
94 | // processed separately to ensure that the splicing would work correctly
95 | const allCells = this.reactiveCells.get("");
96 | const dfCells = this.reactiveCells.get(dfName);
97 | if (allCells) {
98 | const newSet = processCells(allCells);
99 | // TODO: update the newSet
100 | }
101 | if (dfCells) processCells(dfCells);
102 | }
103 |
104 |
105 | recordReactiveCell(dfName: string, cellId: number) {
106 | if (!this.reactiveCells.has(dfName)) {
107 | this.reactiveCells.set(dfName, new Set());
108 | }
109 | this.reactiveCells.get(dfName).add(cellId);
110 | this.reactiveCellsReverse.set(cellId, dfName);
111 | }
112 |
113 | removeReactiveCell(cellId: number) {
114 | const dfName = this.reactiveCellsReverse.get(cellId);
115 | const cellSet = this.reactiveCells.get(dfName);
116 | cellSet.delete(cellId);
117 | }
118 |
119 | /**
120 | * This is triggered by the interactions
121 | * TODO: rename to indicate that this is used just by the interactions
122 | * @param funName
123 | * @param params
124 | */
125 | executeFunction(funName: string, params: string) {
126 | const text = `${this.midasInstanceName}.${funName}(${params})`;
127 | if ((funName === MIDAS_SELECTION_FUN) && this.prevFocus && this.currentFocus) {
128 | const cell = this.cellsCreated[this.cellsCreated.length - 1].cell;
129 | const oldCode = cell.get_text();
130 |
131 | const emojiComment = getEmojiEnnotatedComment("interaction");
132 | const newCode = commentUncommented(oldCode, text);
133 | // now make sure the code is foled!
134 | const newText = emojiComment + "\n" + newCode.join("\n");
135 | cell.set_text(newText);
136 | this.executeCell(cell, "interaction");
137 | // 1 because we want to leave the emoji
138 | // -1 because the last line is the line that executes
139 | foldCode(cell.code_mirror, 1, newCode.length - 1);
140 | } else {
141 | this.createCell(text, "interaction", true);
142 | }
143 | return;
144 | }
145 |
146 | getLastExecutedCellIdx() {
147 | if (this.lastExecutedCell) {
148 | const idx = Jupyter.notebook.find_cell_index(this.lastExecutedCell);
149 | if (idx !== null) {
150 | return idx;
151 | }
152 | return this.lastExecutedCellPos;
153 | }
154 | }
155 |
156 | /**
157 | * note that we chose not to scroll for this
158 | * because if we had competing scrolls (e.g., w/ a reactive cell), then the experinece may get confusing.
159 | * @param code
160 | * @param funKind
161 | */
162 | createCell(code: string, funKind: FunKind, shouldExecute: boolean) {
163 | // check if this has alredy been executed before
164 | if (funKind === "query") {
165 | const foundCell = findQueryCell(code);
166 | if (foundCell) {
167 | if (shouldExecute) {
168 | this.executeCell(foundCell, funKind);
169 | } else {
170 | // just scroll to it
171 | selectCell(foundCell, true);
172 | }
173 | return;
174 | }
175 | }
176 | // actually create if needed
177 | let cell;
178 | const idx = this.getLastExecutedCellIdx();
179 | if (idx) {
180 | cell = Jupyter.notebook.insert_cell_at_index("code", idx + 1);
181 | } else {
182 | LogDebug("Last executed cell not found!");
183 | const allCells = Jupyter.notebook.get_cells();
184 | const insertIdx = allCells.length;
185 | cell = Jupyter.notebook.insert_cell_at_index("code", insertIdx);
186 | }
187 | cell.metadata[CELL_METADATA_FUN_TYPE] = funKind;
188 |
189 | // modify content
190 | const comment = getEmojiEnnotatedComment(funKind);
191 | cell.set_text(comment + "\n" + code);
192 |
193 | // if we need to hide it
194 | if ((funKind === "interaction") && (!this.showSelectionCells)) {
195 | cell.element.addClass(MIDAS_COLAPSE_CELL_CLASS);
196 | }
197 |
198 | // update internal state
199 | this.cellsCreated.push({
200 | code,
201 | funKind,
202 | cell,
203 | step: this.currentStep,
204 | time: new Date()
205 | });
206 |
207 | if (shouldExecute) {
208 | this.executeCell(cell, funKind);
209 | } else {
210 | // FIXME: this shouldn't even happen...
211 | selectCell(cell, false);
212 | }
213 | return cell;
214 | }
215 |
216 | toggleSelectionCells() {
217 | this.showSelectionCells = !this.showSelectionCells;
218 | showOrHideSelectionCells(this.showSelectionCells);
219 | const action = this.showSelectionCells ? "show_selection_cells" : "hide_selection_cells";
220 | const entry: LogEntryBase = {
221 | action,
222 | actionKind: "ui_control",
223 | };
224 | this.logger(entry);
225 | }
226 |
227 | deleteAllSelectionCells() {
228 | if (confirm(`Are you sure you want to remove all selection cells so far? This cannot be undone.`)) {
229 | deleteAllSelectionCells();
230 | }
231 | }
232 |
233 | /**
234 | * we can use one of the following two:
235 | * - Jupyter.notebook.insert_cell_at_index(type, index);
236 | * - Jupyter.notebook.insert_cell_above("code");
237 | *
238 | * we are going to try with inserting at a fixed place
239 | */
240 | executeCell(cell: any, funKind: FunKind) {
241 | cell.execute();
242 | this.currentStep += 1;
243 | if (funKind === "query" || funKind === "chart") {
244 | selectCell(cell, true);
245 | }
246 | return cell.cell_id;
247 | }
248 |
249 | /**
250 | * To be called by the event listener to update CellManager
251 | * @param cell
252 | */
253 | updateLastExecutedCell(cell: any) {
254 | // remove prev
255 | if (this.lastExecutedCell) {
256 | this.lastExecutedCell.element.removeClass(MIDAS_CURRENT_CLASS);
257 | }
258 | this.lastExecutedCell = cell;
259 | // add new
260 | cell.element.addClass(MIDAS_CURRENT_CLASS);
261 | }
262 | updateLastExecutedCellPos(idx: number) {
263 | this.lastExecutedCellPos = idx;
264 | }
265 | }
--------------------------------------------------------------------------------
/src/charts/vegaGen.ts:
--------------------------------------------------------------------------------
1 | import { IS_OVERVIEW_FIELD_NAME, CHART_HEIGHT } from "../constants";
2 | import { LogInternalError } from "../utils";
3 |
4 | type SelectionType = "multiclick" | "brush" | "none";
5 | export type SelectionDimensions = "" | "x" | "y" | "xy";
6 | type SortType = "x" | "-x" | "y" | "-y" | "";
7 |
8 | // note that this is synced with the vis_types.py file
9 | export interface EncodingSpec {
10 | mark: "bar" | "circle" | "line";
11 | x: string;
12 | xType: "ordinal" | "quantitative" | "temporal";
13 | y: string;
14 | yType: "ordinal" | "quantitative" | "temporal";
15 | selectionType: SelectionType;
16 | selectionDimensions: SelectionDimensions;
17 | sort: SortType;
18 | size?: string;
19 | }
20 |
21 | export function multiSelectedField(e: EncodingSpec) {
22 | if ((e.selectionDimensions === "x") || (e.selectionDimensions === "y")) {
23 | return e[e.selectionDimensions];
24 | }
25 | return LogInternalError("cannot call multiSelectedField on such spec");
26 | }
27 |
28 | const colorSpec = {
29 | "field": IS_OVERVIEW_FIELD_NAME, "type": "nominal",
30 | "scale": {"range": ["#003E6B", "#9FB3C8"], "domain": [false, true]},
31 | // @ts-ignore
32 | "legend": null
33 | };
34 |
35 | const selectedColorSpec = {
36 | "field": IS_OVERVIEW_FIELD_NAME, "type": "nominal",
37 | "scale": {"range": ["#fd8d3c", "#fdae6b"], "domain": [false, true]},
38 | // @ts-ignore
39 | "legend": null
40 | };
41 |
42 | // for the field "zoom", under top-level "selection"
43 | const zoomSelection = {
44 | "type": "interval",
45 | "bind": "scales",
46 | "translate": "[mousedown[!event.shiftKey], window:mouseup] > window:mousemove!",
47 | "zoom": "wheel!"
48 | };
49 |
50 | // for the field "brush", under top-level "selection"
51 | function brushSelection(selectionKind: SelectionDimensions) {
52 | let result = {
53 | "type": "interval",
54 | // "resolve": "union",
55 | // "on": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!",
56 | // "translate": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!",
57 | // @ts-ignore
58 | // "zoom": null
59 | // the following is needed for the brush layer to not activate
60 | "empty": "none"
61 | };
62 | if (selectionKind === "x") {
63 | result["encodings"] = ["x"];
64 | } else if (selectionKind === "y") {
65 | result["encodings"] = ["y"];
66 | }
67 | return result;
68 | }
69 |
70 | function getSelectionDimensionsToArray(s: SelectionDimensions) {
71 | if (s === "") {
72 | LogInternalError("Should only be called if there are selection dimensions");
73 | }
74 | return s.split("");
75 | }
76 |
77 | function genSelection(selectionType: SelectionType, selectionDimensions: SelectionDimensions) {
78 | if (selectionDimensions === "") {
79 | return {
80 | // "zoom": zoomSelection
81 | };
82 | }
83 | if (selectionType === "multiclick") {
84 | return {
85 | // "zoom": zoomSelection,
86 | "select": {
87 | "type": "multi",
88 | "encodings": getSelectionDimensionsToArray(selectionDimensions),
89 | // note that this empty is important for the selections to work
90 | "empty": "none"
91 | }
92 | };
93 | }
94 | if (selectionType === "brush") {
95 | return {
96 | // "zoom": zoomSelection,
97 | "brush": brushSelection(selectionDimensions)
98 | };
99 | }
100 | LogInternalError(`Only two selection types are supported, but you specified ${selectionType}`);
101 | // roll with it?
102 | return {
103 | // "zoom": zoomSelection
104 | };
105 | }
106 |
107 | function genSelectionReference(selectionType: SelectionType) {
108 | if (selectionType === "multiclick") {
109 | return "select";
110 | }
111 | return "brush";
112 | }
113 |
114 | function modifySpecForSort(spec: any, sort: SortType, x: string, y: string) {
115 |
116 | if ((sort === "y") || (sort === "-y")) {
117 | spec["transform"] = [{
118 | "calculate": `datum.is_overview ? datum['${y}'] : null`,
119 | "as": "sort_order"
120 | }];
121 | const order = (sort === "-y")
122 | ? "descending"
123 | : "ascending"
124 | ;
125 | spec["encoding"]["x"]["sort"] = {"field": "sort_order", "order": order};
126 |
127 | } else if ((sort === "x") || (sort === "-x")) {
128 | spec["transform"] = [{
129 | "calculate": `datum.is_overview ? datum['${x}'] : null`,
130 | "as": "sort_order"
131 | }];
132 | const order = (sort === "-x")
133 | ? "descending"
134 | : "ascending"
135 | ;
136 | spec["encoding"]["y"]["sort"] = {"field": "sort_order", "order": order};
137 | }
138 | // modify in place
139 | return;
140 | }
141 |
142 |
143 | export function genVegaSpec(encoding: EncodingSpec, dfName: string, data: any[]) {
144 | switch (encoding.mark) {
145 | case "bar": {
146 | let barSpec = {
147 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
148 | "description": `Midas Generated Visualization of dataframe ${dfName}`,
149 | "height": CHART_HEIGHT,
150 | "data": {
151 | "values": data
152 | },
153 | "encoding": {
154 | "x": {
155 | "field": encoding.x,
156 | "type": encoding.xType
157 | },
158 | "y": {
159 | "field": encoding.y,
160 | "type": encoding.yType,
161 | // @ts-ignore
162 | "stack": null
163 | },
164 | "opacity": {
165 | "value": 0.5
166 | },
167 | },
168 | };
169 | if (encoding.selectionDimensions === "") {
170 | barSpec["mark"] = "bar";
171 | barSpec["encoding"]["color"] = colorSpec;
172 | } else {
173 | barSpec["layer"] = [
174 | {
175 | "mark": {"type": "bar", "tooltip": true},
176 | "encoding": {
177 | "color": colorSpec
178 | },
179 | "selection": genSelection(encoding.selectionType, encoding.selectionDimensions),
180 | },
181 | {
182 | "mark": {"type": "bar", "tooltip": true},
183 | "transform": [
184 | {
185 | "filter": {
186 | "selection": genSelectionReference(encoding.selectionType)
187 | }
188 | }
189 | ],
190 | "encoding": {
191 | "color": selectedColorSpec
192 | }
193 | }
194 | ];
195 | barSpec["resolve"] = {"scale": {"color": "independent"}};
196 | }
197 | modifySpecForSort(barSpec, encoding.sort, encoding.x, encoding.y);
198 | return barSpec;
199 | }
200 | case "circle": {
201 | const circleSpec = {
202 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
203 | "height": CHART_HEIGHT,
204 | "description": `Midas for ${dfName}`,
205 | "data": { "values": data },
206 | "selection": genSelection(encoding.selectionType, encoding.selectionDimensions),
207 | "mark": {"type": "point", "tooltip": true},
208 | "encoding": {
209 | "x": {
210 | "field": encoding.x,
211 | "type": encoding.xType,
212 | "scale": {"zero": false}
213 | },
214 | "y": {"field": encoding.y, "type": encoding.yType},
215 | "color": colorSpec,
216 | "opacity": {"value": 0.5}
217 | }
218 | };
219 | // if this is click, then we need to add extra highlighting
220 | if (encoding.selectionType === "multiclick") {
221 | circleSpec["encoding"]["fill"] = {
222 | "condition": [{"test": {"selection": "select"}, "value": "red"}],
223 | "value": "none"
224 | };
225 | }
226 |
227 | modifySpecForSort(circleSpec, encoding.sort, encoding.x, encoding.y);
228 | return circleSpec;
229 | }
230 | case "line": {
231 | const lineSpec = {
232 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
233 | "description": dfName,
234 | "height": CHART_HEIGHT,
235 | "data": { "values": data },
236 | "selection": genSelection(encoding.selectionType, encoding.selectionDimensions),
237 | "mark": "line",
238 | "encoding": {
239 | "x": {"field": encoding.x, "type": encoding.xType},
240 | "y": {"field": encoding.y, "type": encoding.yType},
241 | "color": colorSpec,
242 | },
243 | "opacity": {"value": 0.5}
244 | };
245 | modifySpecForSort(lineSpec, encoding.sort, encoding.x, encoding.y);
246 | return lineSpec;
247 | }
248 | default:
249 | throw Error(`${encoding.mark} not handled`);
250 | }
251 | }
--------------------------------------------------------------------------------
/src/codefolding.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * All the following code is taken from
3 | *
4 | * https://github.com/ipython-contrib/jupyter_contrib_nbextensions/blob/master/src/jupyter_contrib_nbextensions/nbextensions/codefolding/
5 | */
6 |
7 | import { INTERACT_EMOJI } from "./constants";
8 |
9 | function restoreFolding(cell: any, codecell: any) {
10 | if (cell.metadata.code_folding === undefined || !(cell instanceof codecell.CodeCell)) {
11 | return;
12 | }
13 | // visit in reverse order, as otherwise nested folds un-fold outer ones
14 | let lines = cell.metadata.code_folding.slice().sort();
15 | for (let idx = lines.length - 1; idx >= 0; idx--) {
16 | let line = lines[idx];
17 | let opts = cell.code_mirror.state.foldGutter.options;
18 | let linetext = cell.code_mirror.getLine(line);
19 | if (linetext !== undefined) {
20 | cell.code_mirror.foldCode(CodeMirror.Pos(line, 0), opts.rangeFinder);
21 | }
22 | else {
23 | // the line doesn't exist, so we should remove it from metadata
24 | cell.metadata.code_folding = lines.slice(0, idx);
25 | }
26 | cell.code_mirror.refresh();
27 | }
28 | }
29 |
30 | function activate_cm_folding (cm: any) {
31 | let gutters = cm.getOption("gutters").slice();
32 | if ($.inArray("CodeMirror-foldgutter", gutters) < 0) {
33 | gutters.push("CodeMirror-foldgutter");
34 | cm.setOption("gutters", gutters);
35 | }
36 |
37 | /* set indent or brace folding */
38 | let opts: any = true;
39 | if (Jupyter.notebook) {
40 | opts = {
41 | rangeFinder: new CodeMirror.fold.combine(
42 | CodeMirror.fold.firstline,
43 | CodeMirror.fold.magic,
44 | CodeMirror.fold.blockcomment,
45 | cm.getMode().fold === "indent" ? CodeMirror.fold.indent : CodeMirror.fold.brace
46 | )
47 | };
48 | }
49 | cm.setOption("foldGutter", opts);
50 | }
51 |
52 | function updateMetadata (cm: any) {
53 | let list = cm.getAllMarks();
54 | let lines = [];
55 | for (let i = 0; i < list.length; i++) {
56 | if (list[i].__isFold) {
57 | let range = list[i].find();
58 | lines.push(range.from.line);
59 | }
60 | }
61 | /* User can click on gutter of unselected cells, so make sure we store metadata in the correct cell */
62 | let cell = Jupyter.notebook.get_selected_cell();
63 | if (cell.code_mirror !== cm) {
64 | let cells = Jupyter.notebook.get_cells();
65 | let ncells = Jupyter.notebook.ncells();
66 | for (let k = 0; k < ncells; k++) {
67 | let _cell = cells[k];
68 | if (_cell.code_mirror === cm ) { cell = _cell; break; }
69 | }
70 | }
71 | cell.metadata.code_folding = lines;
72 | }
73 |
74 | function regFoldHelper() {
75 | CodeMirror.registerHelper("fold", "firstline", function(cm: any, start: any) {
76 | let mode = cm.getMode(), Token = mode.lineComment;
77 | if (start.line === 0) {
78 | let lineText = cm.getLine(start.line);
79 | let found = lineText.lastIndexOf(Token, 0);
80 | if (found === 0) {
81 | // the following is customization
82 | // if there is a blue emoji then do not comment the last line out
83 | // ideally we can access the metadata, but hack works for now
84 | const end = lineText.includes(INTERACT_EMOJI)
85 | ? cm.lastLine() - 1
86 | : cm.lastLine()
87 | ;
88 | return {
89 | from: CodeMirror.Pos(start.line, null),
90 | to: CodeMirror.Pos(end, null)
91 | };
92 | }
93 | }
94 | return null;
95 | });
96 | }
97 |
98 | function regMagicHelper() {
99 | CodeMirror.registerHelper("fold", "magic", function(cm: any, start: any) {
100 | let mode = cm.getMode(), Token = "%%";
101 | if (start.line === 0) {
102 | let lineText = cm.getLine(start.line);
103 | let found = lineText.lastIndexOf(Token, 0);
104 | if (found === 0) {
105 | const end = cm.lastLine();
106 | return {
107 | from: CodeMirror.Pos(start.line, null),
108 | to: CodeMirror.Pos(end, null)
109 | };
110 | }
111 | }
112 | return null;
113 | });
114 | }
115 |
116 | function regBlockHelper() {
117 | CodeMirror.registerHelper("fold", "blockcomment", function(cm: any, start: any) {
118 | let mode = cm.getMode(), Token = mode.lineComment;
119 | let lineText = cm.getLine(start.line);
120 | let found = lineText.lastIndexOf(Token, 0);
121 | if (found === 0) { // current line is a comment
122 | if (start.line === 0) {
123 | found = -1;
124 | } else {
125 | lineText = cm.getLine(start.line - 1);
126 | found = lineText.lastIndexOf(Token, 0);
127 | }
128 | if (start.line === 0 || found !== 0) { // no previous comment line
129 | let end = start.line;
130 | for (let i = start.line + 1; i <= cm.lastLine(); ++i) { // final comment line
131 | lineText = cm.getLine(i);
132 | found = lineText.lastIndexOf(Token, 0);
133 | if (found === 0) {
134 | end = i;
135 | } else {
136 | break;
137 | }
138 | }
139 | if (end > start.line) {
140 | return {from: CodeMirror.Pos(start.line, null),
141 | to: CodeMirror.Pos(end, null)};
142 | }
143 | }
144 | }
145 | return null;
146 | });
147 | }
148 |
149 | function toggleFolding () {
150 | let cm;
151 | let pos = {line: 0, ch: 0, xRel: 0};
152 | if (Jupyter.notebook !== undefined) {
153 | cm = Jupyter.notebook.get_selected_cell().code_mirror;
154 | if (Jupyter.notebook.mode === "edit") {
155 | pos = cm.getCursor();
156 | }
157 | }
158 | else {
159 | cm = Jupyter.editor.codemirror;
160 | pos = cm.getCursor();
161 | }
162 | let opts = cm.state.foldGutter.options;
163 | cm.foldCode(pos, opts.rangeFinder);
164 | }
165 |
166 |
167 |
168 | export function setUpCodeFolding(codecell: any, requirejs: any, configmod: any) {
169 |
170 | // putting here for scoping
171 | function initExistingCells() {
172 | let cells = Jupyter.notebook.get_cells();
173 | let ncells = Jupyter.notebook.ncells();
174 | for (let i = 0; i < ncells; i++) {
175 | let cell = cells[i];
176 | if ((cell instanceof codecell.CodeCell)) {
177 | activate_cm_folding(cell.code_mirror);
178 | /* restore folding state if previously saved */
179 | restoreFolding(cell, codecell);
180 | cell.code_mirror.on("fold", updateMetadata);
181 | cell.code_mirror.on("unfold", updateMetadata);
182 | }
183 | }
184 |
185 | // REDZONE: if anything else listens to create.Cell, gotta watch out
186 | Jupyter.notebook.events.unbind("create.Cell");
187 | Jupyter.notebook.events.on("create.Cell", createCell);
188 | }
189 |
190 | function createCell(_: any, nbcell: any) {
191 | const cell = nbcell.cell;
192 | if ((cell instanceof codecell.CodeCell)) {
193 | activate_cm_folding(cell.code_mirror);
194 | cell.code_mirror.on("fold", updateMetadata);
195 | cell.code_mirror.on("unfold", updateMetadata);
196 | // queue restoring folding, to run once metadata is set, hopefully.
197 | // This can be useful if cells are un-deleted, for example.
198 | setTimeout(function () { restoreFolding(cell, codecell); }, 500);
199 | }
200 | }
201 |
202 | function on_config_loaded () {
203 | if (Jupyter.notebook !== undefined) {
204 | // register actions with ActionHandler instance
205 | let prefix = "auto";
206 | let name = "toggle-codefolding";
207 | let action = {
208 | icon: "fa-comment-o",
209 | help : "Toggle codefolding",
210 | help_index : "ec",
211 | id : "toggle_codefolding",
212 | handler : toggleFolding
213 | };
214 | let action_full_name = Jupyter.keyboard_manager.actions.register(action, name, prefix);
215 |
216 | // define keyboard shortcuts
217 | let edit_mode_shortcuts = {};
218 | edit_mode_shortcuts[params.codefolding_hotkey] = action_full_name;
219 |
220 | // register keyboard shortcuts with keyboard_manager
221 | Jupyter.notebook.keyboard_manager.edit_shortcuts.add_shortcuts(edit_mode_shortcuts);
222 | Jupyter.notebook.keyboard_manager.command_shortcuts.add_shortcuts(edit_mode_shortcuts);
223 | }
224 | else {
225 | // we're in edit view
226 | let extraKeys = Jupyter.editor.codemirror.getOption("extraKeys");
227 | extraKeys[params.codefolding_hotkey] = toggleFolding;
228 | CodeMirror.normalizeKeyMap(extraKeys);
229 | console.log("[codefolding] binding hotkey", params.codefolding_hotkey);
230 | Jupyter.editor.codemirror.setOption("extraKeys", extraKeys);
231 | }
232 | }
233 |
234 | let params = {
235 | codefolding_hotkey : "Alt-f",
236 | init_delay : 1000
237 | };
238 |
239 | // updates default params with any specified in the provided config data
240 | let update_params = function (config_data: any) {
241 | for (let key in params) {
242 | if (config_data.hasOwnProperty(key)) {
243 | params[key] = config_data[key];
244 | }
245 | }
246 | };
247 |
248 | let conf_sect: any;
249 | if (Jupyter.notebook) {
250 | // we're in notebook view
251 | conf_sect = Jupyter.notebook.config;
252 | }
253 | else if (Jupyter.editor) {
254 | // we're in file-editor view
255 | conf_sect = new configmod.ConfigSection("notebook", {base_url: Jupyter.editor.base_url});
256 | conf_sect.load();
257 | }
258 | else {
259 | // we're some other view like dashboard, terminal, etc, so bail now
260 | return;
261 | }
262 |
263 | conf_sect.loaded
264 | .then(function () { update_params(conf_sect.data); })
265 | .then(on_config_loaded);
266 |
267 | if (Jupyter.notebook) {
268 | regFoldHelper();
269 | regMagicHelper();
270 | regBlockHelper();
271 | /* require our additional custom codefolding modes before initialising fully */
272 | if (Jupyter.notebook._fully_loaded) {
273 | setTimeout(function () {
274 | console.log("Codefolding: Wait for", params.init_delay, "ms");
275 | initExistingCells();
276 | }, params.init_delay);
277 | }
278 | else {
279 | Jupyter.notebook.events.one("notebook_loaded.Notebook", initExistingCells);
280 | }
281 | }
282 | else {
283 | activate_cm_folding(Jupyter.editor.codemirror);
284 | setTimeout(function () {
285 | console.log("Codefolding: Wait for", params.init_delay, "ms");
286 | Jupyter.editor.codemirror.refresh();
287 | }, params.init_delay);
288 | }
289 | }
--------------------------------------------------------------------------------
/src/comm.ts:
--------------------------------------------------------------------------------
1 | ///
2 | import { MIDAS_CELL_COMM_NAME, MIDAS_RECOVERY_COMM_NAME, MIDAS_SELECTION_FUN } from "./constants";
3 | import { LogSteps, LogDebug, LogInternalError, setupCellManagerUIChanges, getContainerFunctions, setupJupyterEvents, enableMidasInteractions, createMenuBtnGroup } from "./utils";
4 | import { createMidasComponent } from "./setup";
5 | import { AlertType, FunKind } from "./types";
6 | import { MidasSidebar } from "./components/MidasSidebar";
7 | import CellManager from "./CellManager";
8 | import { setupLogger, LoggerFunction, LogTask, LogDataframeInteraction } from "./logging";
9 |
10 | type CommandLoad = { type: string };
11 | type BasicLoad = { type: string; value: string };
12 |
13 | type InitLoad = {
14 | type: string; // initialize
15 | name: string;
16 | loggerId: string;
17 | };
18 |
19 | type ExecuteCodeLoad = {
20 | type: string;
21 | funKind: FunKind;
22 | code: string;
23 | shouldRun: boolean;
24 | };
25 |
26 | type ExecuteSelectionLoad = {
27 | type: string;
28 | params: string;
29 | dfname: string;
30 | };
31 |
32 | type ExecuteFunCallLoad = {
33 | type: string;
34 | funName: string;
35 | params: string;
36 | };
37 |
38 | type NotificationCommLoad = {
39 | type: string;
40 | style: string;
41 | value: string;
42 | // note that optional might have json issues
43 | dfName?: string;
44 | columnName?: string;
45 | };
46 |
47 | type SynchronizeSelectionLoad = {
48 | type: string;
49 | dfName: string;
50 | selection: any;
51 | };
52 |
53 | type UpdateCommLoad = {
54 | type: string;
55 | dfName: string;
56 | newData: any;
57 | code: string;
58 | };
59 |
60 | type AddReactiveCell = {
61 | type: string;
62 | dfName: string;
63 | };
64 |
65 | type ProfilerComm = {
66 | type: string;
67 | dfName: string;
68 | columns: string; // decoded to ProfilerColumns
69 | };
70 |
71 | type ChartRenderComm = {
72 | type: string;
73 | dfName: string;
74 | data: string;
75 | encoding: string;
76 | code: string;
77 | hashVal: string;
78 | };
79 |
80 | type MidasCommLoad = CommandLoad
81 | | BasicLoad
82 | | InitLoad
83 | | AddReactiveCell
84 | | ExecuteFunCallLoad
85 | | ExecuteCodeLoad
86 | | NotificationCommLoad
87 | | ProfilerComm
88 | | ChartRenderComm
89 | | UpdateCommLoad
90 | | SynchronizeSelectionLoad
91 | | ExecuteSelectionLoad;
92 |
93 | export function openRecoveryComm() {
94 | const comm = Jupyter.notebook.kernel.comm_manager.new_comm(MIDAS_RECOVERY_COMM_NAME);
95 | LogDebug("Sending recovery message...");
96 | comm.send({});
97 | }
98 |
99 | /**
100 | * Makes the comm responsible for discovery of which visualization
101 | * corresponds to which cell, accomplished through inspecting the
102 | * metadata of the message sent.
103 | *
104 | * We need to keep track of whether this is first time because
105 | * we do not want to add the event listen on Jupyter more than once
106 | * --- the event listeners are somehow still persistent even with page refresh..
107 | * Makecomm should be idempotent to page refresh.
108 | */
109 | export function makeComm(is_first_time = true) {
110 | LogSteps("makeComm");
111 |
112 | Jupyter.notebook.kernel.comm_manager.register_target(MIDAS_CELL_COMM_NAME,
113 | function (comm: any, msg: any) {
114 | const set_on_msg = (onMessage: (r: MidasSidebar) => void ) => {
115 | comm.on_msg(onMessage);
116 | };
117 |
118 | /**
119 | * initialization
120 | * - set up cellManager
121 | * - set up MidasSidebar
122 | * - set up ProfilerShelf
123 | * as well as all their required comm messages and controls (e..g, to cellmanager functions)
124 | */
125 | comm.on_msg((msg: any) => {
126 | const load = msg.content.data as InitLoad;
127 | const midasInstanceName = load.name;
128 | const loggerId = load.loggerId;
129 | if (load.type !== "initialize") {
130 | throw LogInternalError("Should send intiialize message first!");
131 | }
132 | // modify notebook css
133 | const notebookDiv = document.getElementById("notebook");
134 | // Set Style / Append Style
135 | notebookDiv.style.display = "flex";
136 | notebookDiv.style.flexDirection = "row";
137 | notebookDiv.style.maxWidth = "100%";
138 | const notebookContainerDiv = document.getElementById("notebook-container");
139 | // Set Style / Append Style
140 | notebookContainerDiv.style.flexGrow = "1";
141 | notebookContainerDiv.style.marginLeft = "10pt";
142 | notebookContainerDiv.style.marginRight = "0pt";
143 | notebookContainerDiv.style.width = "10%";
144 |
145 | const logger = setupLogger(loggerId);
146 | createMenuBtnGroup();
147 |
148 | const cellManager = new CellManager(midasInstanceName, logger);
149 | const setUIItxFocus = cellManager.setFocus.bind(cellManager);
150 | const executeCapturedCells = cellManager.executeCapturedCells.bind(cellManager);
151 | setupCellManagerUIChanges(cellManager);
152 | const containerFunctions = getContainerFunctions(comm, logger, setUIItxFocus, executeCapturedCells);
153 |
154 | const columnSelectMsg = (column: string, tableName: string) => {
155 | const payload = {
156 | command: "column-selected",
157 | column,
158 | df_name: tableName,
159 | };
160 | const entry: LogDataframeInteraction = {
161 | action: "column_click",
162 | actionKind: "selection",
163 | dfName: tableName,
164 | };
165 | logger(entry);
166 | comm.send(payload);
167 | };
168 |
169 |
170 | const ref = createMidasComponent(
171 | columnSelectMsg,
172 | logger,
173 | containerFunctions
174 | );
175 |
176 | const on_msg = makeOnMsg(ref, cellManager, logger);
177 | set_on_msg(on_msg);
178 |
179 | if (is_first_time) {
180 | setupJupyterEvents(cellManager, logger);
181 | }
182 | });
183 |
184 | comm.on_close(function (msg: any) {
185 | LogSteps(`CommClose`, msg);
186 | });
187 | });
188 | }
189 |
190 |
191 | function makeOnMsg(refToSidebar: MidasSidebar, cellManager: CellManager, logger: LoggerFunction) {
192 | let refToMidas = refToSidebar.getMidasContainerRef();
193 | let refToProfilerShelf = refToSidebar.getProfilerShelfRef();
194 |
195 | return function on_msg(msg: any) {
196 | const load = msg.content.data as MidasCommLoad;
197 | switch (load.type) {
198 | case "notification": {
199 | const errorLoad = load as NotificationCommLoad;
200 | const alertType = AlertType[errorLoad.style];
201 | refToMidas.addAlert(errorLoad.value, alertType);
202 | if (alertType === AlertType.error && errorLoad.dfName && errorLoad.dfName !== "" && errorLoad.columnName) {
203 | refToProfilerShelf.markAsErrored(errorLoad.columnName, errorLoad.dfName);
204 | }
205 | return;
206 | }
207 | case "after_selection": {
208 | const selectionLoad = load as SynchronizeSelectionLoad;
209 | refToMidas.drawBrush(selectionLoad.selection);
210 | cellManager.runReactiveCells(selectionLoad.dfName);
211 | // now release
212 | enableMidasInteractions();
213 | return;
214 | }
215 | case "task-start": {
216 | const loggerLoad = load as BasicLoad;
217 | const entry: LogTask = {
218 | action: "task_start",
219 | actionKind: "task_start",
220 | taskId: loggerLoad.value,
221 | };
222 | logger(entry);
223 | return;
224 | }
225 | case "reactive": {
226 | const cellId = msg.parent_header.msg_id;
227 | const reactiveLoad = load as AddReactiveCell;
228 | cellManager.recordReactiveCell(reactiveLoad.dfName, cellId);
229 | LogDebug(`Success adding cell to ${reactiveLoad.dfName} for cell ${cellId}`);
230 | return;
231 | }
232 | case "deactive": {
233 | const cellId = msg.parent_header.msg_id;
234 | cellManager.removeReactiveCell(cellId);
235 | }
236 | case "execute_selection": {
237 | // note that this case is a special case of the execute_fun
238 | const selectionLoad = load as ExecuteSelectionLoad;
239 | cellManager.executeFunction(MIDAS_SELECTION_FUN, selectionLoad.params);
240 | return;
241 | }
242 | case "execute_fun": {
243 | const executeLoad = load as ExecuteFunCallLoad;
244 | cellManager.executeFunction(executeLoad.funName, executeLoad.params);
245 | return;
246 | }
247 | case "create_cell": {
248 | const cellLoad = load as ExecuteCodeLoad;
249 | cellManager.createCell(cellLoad.code, cellLoad.funKind, cellLoad.shouldRun);
250 | return;
251 | }
252 | case "profiler": {
253 | const cellId = msg.parent_header.msg_id;
254 | const dataLoad = load as ProfilerComm;
255 | LogSteps("Profiler", dataLoad.dfName);
256 | const tableName = dataLoad.dfName;
257 | const columnItems = JSON.parse(dataLoad.columns);
258 | refToProfilerShelf.addOrReplaceTableItem(tableName, columnItems, cellId);
259 | return;
260 | }
261 |
262 | case "chart_render": {
263 | const chartRenderLoad = load as ChartRenderComm;
264 | LogSteps("Chart", chartRenderLoad.dfName);
265 | const cellId = msg.parent_header.msg_id;
266 | const encoding = JSON.parse(chartRenderLoad.encoding);
267 | const data = JSON.parse(chartRenderLoad.data);
268 | refToMidas.addDataFrame(
269 | chartRenderLoad.dfName,
270 | encoding,
271 | data,
272 | cellId,
273 | chartRenderLoad.code,
274 | chartRenderLoad.hashVal);
275 | return;
276 | }
277 | case "chart_update_data": {
278 | // note that unlike chart_render, updates should NOT scroll!
279 | const updateLoad = load as UpdateCommLoad;
280 | refToMidas.replaceData(updateLoad.dfName, updateLoad.newData, updateLoad.code);
281 | return;
282 | }
283 | }
284 | };
285 | }
286 |
--------------------------------------------------------------------------------
/src/components/ChartsViewLangingPage.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { SYS_NAME } from "../constants";
3 |
4 | export const ChartsViewLandingPage: React.StatelessComponent<{}> = () => {
5 | return <>
6 |
7 |
8 |
Load Data
9 |
To load data, use .from_file("path/to/your_data.csv"), the columns will show up to the right.
10 |
Load Charts
11 |
For any dataframes, you can visualize it here with .vis().
12 |
Making Interactions
13 |
14 | All the loaded charts by default are augmented with interactivity by default.
15 | You can select a subset of the data by shift click with a bar chart, or shift-drag to draw the brush, with a scatter plot or line chart.
16 |
17 |
Recording/Restoring Interactions
18 |
19 | By default, your interactions are executed via a "log" in a code cell to the left. You can look at the code to get a sense of what you have interacted with. You can also execute the code by uncommenting the relevant selections.
20 |
21 |
Toggle and Resize Panes
22 |
23 | To toggle this pane (restoring the traditional notebook view), click on Toggle {SYS_NAME}, from the menu bar on the top. Similarly, you can click Toggle Column Shelf to just hide the pane to the right.
24 |
25 |
To resize, you can also drag the left edge of the the main (blue)pane---the resizer will be highlighted with a darker shade of blue when you hover over.