├── DataFrame_HiveContext.ipynb ├── ETL_SPARK_PYTHON └── project │ ├── Spark_ETL.py │ ├── lib │ └── __init__.py │ ├── main.py │ ├── task_2.py │ ├── task_3.py │ └── test │ └── __init__.py ├── README.md ├── Untitled-Copy1.ipynb ├── dataFrame_basics.ipynb ├── dataFrame_complexDataType.ipynb ├── dataFrame_joins.ipynb ├── dataFrame_sql_functions.ipynb ├── dataFrame_storageLevels.ipynb ├── dataFrame_transformationVSactions.ipynb ├── dataFrame_window.ipynb ├── dataFrane_SQL - Databricks_files ├── Databricks_Logo_Side_Nav_Gray.svg ├── basic.css ├── bootstrap.min.css ├── feedback.min.css ├── jquery-ui.min.css ├── main.css ├── main.js.download ├── print.css ├── source_code_pro.css ├── spinner.gif └── spinner.svg ├── dataFrane_SQL.ipynb ├── dataframe_API.ipynb ├── pics ├── Spark2_Structured_Streaming_myLearning_infoGraphics.jpg ├── Spark2_myLearning_inforGraphics.jpg ├── dag_crossJoin.JPG ├── dag_crossJoin_2.JPG ├── dag_filter.JPG ├── dag_filter_2.JPG ├── dag_groupBy.JPG ├── dag_innerJoin.JPG ├── dag_innerJoin_2.JPG ├── dag_leftJoin.JPG ├── dag_sort.JPG ├── dag_sort_2.JPG ├── explain_plan.jpg ├── explain_plan_detail.jpg ├── explain_plan_groupBy.jpg ├── explain_plan_joinHint.jpg ├── explain_plan_joins.jpg ├── explain_plan_physical.jpg ├── explain_plan_sql_vs_dataFrame.jpg ├── partition_rePartition.jpg ├── queryOptimization_hint.jpg ├── spark-yarn-client.png ├── spark-yarn-table.png ├── spark_cluster.png ├── spark_local.png ├── streaming_1.JPG ├── streaming_2.JPG ├── streaming_3.JPG ├── streaming_steps_to_start_pipeline.JPG ├── streaming_steps_to_start_pipeline_1.JPG ├── streaming_steps_to_start_pipeline_2.JPG ├── streaming_steps_to_start_pipeline_3.JPG ├── streaming_windowing.JPG ├── streaming_windowing_lateEvents.JPG ├── streaming_windowing_steps.JPG ├── streaming_windowing_watermarking_append.JPG ├── streaming_windowing_watermarking_update.JPG ├── transformations_narrowVSwide.jpg └── yarn-cluster.png ├── sparkStreaming.ipynb ├── sparkStreaming_basic_1.ipynb ├── sparkStreaming_basic_2.ipynb ├── spark_explain_plan.ipynb ├── streaming_basics.ipynb ├── test_1.ipynb └── test_data ├── ctry_summary.csv ├── global_flow_of_people.csv ├── retail_data.csv └── testFile_4.csv /DataFrame_HiveContext.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["#HiveContext\nfrom pyspark.sql import HiveContext\nhiveContext = HiveContext(sc)\nhiveContext.sql(\"show schemas\").show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#functions for sqlContext and hiveContext are the same"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["tmp_list = [{'name':'vivek','number':100}]\ndf = spark.createDataFrame(tmp_list)\ndf.show()"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["hiveContext.registerDataFrameAsTable(df,\"myTable\")"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["hiveContext.sql(\"show tables\").show()\nhiveContext.sql(\"select * from myTable\").show()"],"metadata":{},"outputs":[],"execution_count":5}],"metadata":{"name":"DataFrame_HiveContext","notebookId":95839911839169},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/Spark_ETL.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ETL pipeline using Apache Spark! 3 | MySQL -> Spark-DataFrame(HiveContext) -> HIVE(hdfs) 4 | 5 | todo 6 | 4. create new partitions each time 7 | 6. mode ETL, like cleanning rejecting record & scd2 ? 8 | ''' 9 | 10 | import sys, getopt 11 | import ConfigParser 12 | from pyspark import SparkContext, SparkConf, HiveContext 13 | 14 | def getMysqlConn(url, driver, dbtable, user, password): 15 | print (" ############ inside getMysqlConn" ) 16 | df_dataset = sqlContext.read.format("jdbc").options( 17 | url = url, 18 | driver=driver, 19 | dbtable=dbtable, 20 | user=user, 21 | password=password 22 | ).load() 23 | 24 | return df_dataset 25 | try: 26 | conf= SparkConf().setAppName("Spark_ETL") 27 | sc=SparkContext(conf=conf) 28 | sqlContext=HiveContext(sc) 29 | 30 | conf = ConfigParser.ConfigParser() 31 | conf.read("param.config") 32 | 33 | url = conf.get("MySQL","url") 34 | driver = conf.get("MySQL","driver") 35 | dbtable_A = conf.get("MySQL","dbtable_A") 36 | dbtable_B = conf.get("MySQL","dbtable_B") 37 | user = conf.get("MySQL","user") 38 | password = conf.get("MySQL","password") 39 | HiveSchema = conf.get("HiveSchema","schema") 40 | 41 | print (url,driver,dbtable_A,dbtable_B,user,password) 42 | 43 | #check if we have received arguments as inputs to the script 44 | strWeek = sys.argv[1] if len(sys.argv) == 2 else "all_weeks" 45 | 46 | #logic to derive where clause based on input provided 47 | strSql = "" if strWeek == "all_weeks" else " where week = \'" + strWeek + "\'" 48 | 49 | 50 | df_dataset_A = getMysqlConn(url,driver,dbtable_A,user,password) 51 | df_dataset_B = getMysqlConn(url,driver,dbtable_B,user,password) 52 | 53 | 54 | df_dataset_A.registerTempTable("dataset_A") 55 | df_dataset_B.registerTempTable("dataset_B") 56 | 57 | df_dataset_A_filtered = sqlContext.sql("select * from dataset_A " + strSql) 58 | df_dataset_B_filtered = sqlContext.sql("select * from dataset_B") 59 | 60 | #df_dataset_A_filtered.printSchema() 61 | #df_dataset_B_filtered.printSchema() 62 | 63 | #rename price field to price_b as we have samiller field in both the tables! 64 | df_join = df_dataset_A_filtered.join(df_dataset_B_filtered.withColumnRenamed("price","price_B"),['productid']) 65 | 66 | #dump the data 67 | df_join.saveAsTable(HiveSchema + ".table_A_Joined_B_" + datetime.datetime.now().strftime('%Y_%m_%d_%H%M%S')) 68 | 69 | 70 | except Exception as e: 71 | print (" ### Exception ###: ", e) -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/lib/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/task_2.py: -------------------------------------------------------------------------------- 1 | #task_2 2 | 3 | import sys, getopt 4 | from pyspark import SparkContext, SparkConf, HiveContext 5 | 6 | #check if we have received arguments as inputs to the script 7 | if len(sys.argv) == 2: 8 | # get weeks data 9 | #check if week passed as param 10 | # if use use it 11 | strWeek = sys.argv[1] 12 | else: 13 | #else fetch all data as defauly 14 | strWeek = "all_weeks" 15 | 16 | 17 | conf= SparkConf().setAppName("Asdasd") 18 | sc=SparkContext(conf=conf) 19 | sqlContext=HiveContext(sc) 20 | 21 | #connect to Mysql 22 | df_dataset_A = sqlContext.read.format("jdbc").options( 23 | url ="jdbc:mysql://nn01.itversity.com:3306/retail_export", 24 | driver="com.mysql.jdbc.Driver", 25 | dbtable="dataset_A", 26 | user="retail_dba", 27 | password="itversity" 28 | ).load() 29 | 30 | df_dataset_B = sqlContext.read.format("jdbc").options( 31 | url ="jdbc:mysql://nn01.itversity.com:3306/retail_export", 32 | driver="com.mysql.jdbc.Driver", 33 | dbtable="dataset_B", 34 | user="retail_dba", 35 | password="itversity" 36 | ).load() 37 | 38 | 39 | #logic to derive where clause based on input provided 40 | strSql = "" 41 | if strWeek <> "all_weeks": 42 | strSql = " where week = \'" + strWeek + "\'" 43 | 44 | df_dataset_A.registerTempTable("dataset_A") 45 | df_dataset_B.registerTempTable("dataset_B") 46 | 47 | 48 | #table names 49 | table_a = "vivekb123.table_a_"+ strWeek.replace("-","_") 50 | table_b = "vivekb123.table_b_"+ strWeek.replace("-","_") 51 | 52 | #overwrite table if exist 53 | sqlContext.sql("DROP TABLE IF EXISTS " + table_a) 54 | sqlContext.sql("DROP TABLE IF EXISTS " + table_b) 55 | 56 | #dump the data 57 | df_dataset_A_filtered = sqlContext.sql("select * from dataset_A " + strSql).saveAsTable(table_a) 58 | df_dataset_B_filtered = sqlContext.sql("select * from dataset_B").saveAsTable(table_b) 59 | 60 | 61 | -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/task_3.py: -------------------------------------------------------------------------------- 1 | #task_3 2 | 3 | #3. Join the two tables in Hive into table AB. The column “productid” can be used for that join. 4 | 5 | #TODO 6 | #1. take table names as input 7 | 8 | import sys, getopt 9 | from pyspark import SparkContext, SparkConf, HiveContext 10 | 11 | 12 | conf= SparkConf().setAppName("Asdasd") 13 | sc=SparkContext(conf=conf) 14 | sqlContext=HiveContext(sc) 15 | 16 | #read tables from HIVE schema directly 17 | df_table_a = sqlContext.sql("SELECT * FROM vivekb123.table_a_2016_10_16") 18 | 19 | df_table_b = sqlContext.sql("SELECT * FROM vivekb123.table_B_2016_10_16") 20 | 21 | #rename price field to price_b as we have samiller field in both the tables! 22 | df_join2 = df_table_a.join(df_table_b.withColumnRenamed("price","price_B"),['productid']) 23 | 24 | 25 | #dump the data 26 | df_join2.saveAsTable("vivekb123.table_A_able_B_2016_10_16") 27 | 28 | 29 | -------------------------------------------------------------------------------- /ETL_SPARK_PYTHON/project/test/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark with Python 2 | > Follow me on, [LinkedIn](https://www.linkedin.com/in/vivek-bombatkar/), [Github](https://github.com/vivek-bombatkar) 3 | 4 | 5 | ## My Spark practice notes. 6 | Learning is a continuous process. Though I am using Spark from quite a long time now, I never noted down my practice exercise. With this repo, I am documenting it! 7 | 8 | I have used databricks free community cloude for this excercises, link: 9 | https://community.cloud.databricks.com/login.html 10 | 11 | ![Spark2_Structured_Streaming_myLearning_infoGraphics](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/Spark2_Structured_Streaming_myLearning_infoGraphics.jpg) 12 | 13 | ![Spark2_myLearning_inforGraphics](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/Spark2_myLearning_inforGraphics.jpg) 14 | 15 | ### spark_explain_plan 16 | [spark_explain_plan notebook](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/spark_explain_plan.ipynb) 17 | 18 | ### DAG and explain plan 19 | https://www.tutorialkart.com/apache-spark/dag-and-physical-execution-plan/ 20 | 21 | ***How Apache Spark builds a DAG and Physical Execution Plan ? *** 22 | 1. User submits a spark application to the Apache Spark. 23 | 2. Driver is the module that takes in the application from Spark side. 24 | 3. Driver identifies transformations and actions present in the spark application. These identifications are the tasks. 25 | 4. Based on the flow of program, these tasks are arranged in a graph like structure with directed flow of execution from task to task forming no loops in the graph (also called DAG). DAG is pure logical. 26 | 5. This logical DAG is converted to Physical Execution Plan. Physical Execution Plan contains stages. 27 | 6. Some of the subsequent tasks in DAG could be combined together in a single stage. 28 | Based on the nature of transformations, Driver sets stage boundaries. 29 | 7. There are two transformations, namely 30 | a. narrow transformations : Transformations like Map and Filter that does not require the data to be shuffled across the partitions. 31 | b. wide transformations : Transformations like ReduceByKey that does require the data to be shuffled across the partitions. 32 | 8. Transformation that requires data shuffling between partitions, i.e., a wide transformation results in stage boundary. 33 | 9. DAG Scheduler creates a Physical Execution Plan from the logical DAG. Physical Execution Plan contains tasks and are bundled to be sent to nodes of cluster. 34 | 35 | 36 | ### Catalyst optimizer 37 | http://data-informed.com/6-steps-to-get-top-performance-from-the-changes-in-spark-2-0/ 38 | 39 | What is Catalyst? Catalyst is the name of Spark’s integral query optimizer and execution planner for Dataset/DataFrame. 40 | 41 | Catalyst is where most of the “magic” happens to improve the execution speed of your code. But in any complex system, “magic” is unfortunately not good enough to always guarantee optimal performance. Just as with relational databases, it is valuable to learn a bit about exactly how the optimizer works in order to understand its planning and tune your applications. 42 | 43 | In particular, Catalyst can perform sophisticated refactors of complex queries. However, almost all of its optimizations are qualitative and rule-based rather than quantitative and statistics-based. For example, Spark knows how and when to do things like combine filters, or move filters before joins. Spark 2.0 even allows you to define, add, and test out your own additional optimization rules at runtime. [1][2] 44 | 45 | On the other hand, Catalyst is not designed to perform many of the common optimizations that RDBMSs have performed for decades, and that takes some understanding and getting used to. 46 | 47 | For example, Spark doesn’t “own” any storage, so it does not build on-disk indexes, B-Trees, etc. (although its parquet file support, if used well, can get you some related features). Spark has been optimized for the volume, variety, etc. of big data – so, traditionally, it has not been designed to maintain and use statistics about a stable dataset. E.g., where an RDBMS might know that a specific filter will eliminate most records, and apply it early in the query, Spark 2.0 does not know this fact and won’t perform that optimization 48 | 49 | 50 | ### Catalyst, the optimizer and Tungsten, the execution engine! 51 | https://db-blog.web.cern.ch/blog/luca-canali/2016-09-spark-20-performance-improvements-investigated-flame-graphs 52 | 53 | *** Note in particular the steps marked with (*), they are optimized with who-stage code generation 54 | 55 | Code generation is the key 56 | The key to understand the improved performance is with the new features in Spark 2.0 for whole-stage code generation. 57 | 58 | 59 | ### Deep dive into the new Tungsten execution engine 60 | https://databricks.com/blog/2016/05/23/apache-spark-as-a-compiler-joining-a-billion-rows-per-second-on-a-laptop.html 61 | 62 | 1. The explain() function in the expression below has been extended for whole-stage code generation. In the explain output, when an operator has a star around it (*), whole-stage code generation is enabled. In the following case, Range, Filter, and the two Aggregates are both running with whole-stage code generation. Exchange, however, does not implement whole-stage code generation because it is sending data across the network. 63 | 64 | ```python 65 | spark.range(1000).filter("id > 100").selectExpr("sum(id)").explain() 66 | 67 | == Physical Plan == 68 | *Aggregate(functions=[sum(id#201L)]) 69 | +- Exchange SinglePartition, None 70 | +- *Aggregate(functions=[sum(id#201L)]) 71 | +- *Filter (id#201L > 100) 72 | +- *Range 0, 1, 3, 1000, [id#201L] 73 | ``` 74 | 75 | 2. Vectorization 76 | The idea here is that instead of processing data one row at a time, the engine batches multiples rows together in a columnar format, and each operator uses simple loops to iterate over data within a batch. Each next() call would thus return a batch of tuples, amortizing the cost of virtual function dispatches. These simple loops would also enable compilers and CPUs to execute more efficiently with the benefits mentioned earlier. 77 | 78 | 79 | ### Catalyst Optimizer 80 | https://data-flair.training/blogs/spark-sql-optimization-catalyst-optimizer/ 81 | 82 | 1. Fundamentals of Catalyst Optimizer 83 | In the depth, Catalyst contains the tree and the set of rules to manipulate the tree. 84 | Trees 85 | A tree is the main data type in the catalyst. A tree contains node object. For each node, there is a node 86 | Rules 87 | We can manipulate tree using rules. We can define rules as a function from one tree to another tree. 88 | 2. 89 | a. Analysis - Spark SQL Optimization starts from relation to be computed. It is computed either from abstract syntax tree (AST) returned by SQL parser or dataframe object created using API. 90 | b. Logical Optimization - In this phase of Spark SQL optimization, the standard rule-based optimization is applied to the logical plan. It includes constant folding, predicate pushdown, projection pruning and other rules. 91 | c. In this phase, one or more physical plan is formed from the logical plan, using physical operator matches the Spark execution engine. And it selects the plan using the cost model. 92 | d. Code Generation - It involves generating Java bytecode to run on each machine. Catalyst uses the special feature of Scala language, “Quasiquotes” to make code generation easier because it is very tough to build code generation engines. 93 | 94 | 95 | ### cost based optimization 96 | https://databricks.com/blog/2017/08/31/cost-based-optimizer-in-apache-spark-2-2.html 97 | 98 | ***Query Benchmark and Analysis*** 99 | We took a non-intrusive approach while adding these cost-based optimizations to Spark by adding a global config spark.sql.cbo.enabled to enable/disable this feature. In Spark 2.2, this parameter is set to false by default. 100 | 1. At its core, Spark’s Catalyst optimizer is a general library for representing query plans as trees and sequentially applying a number of optimization rules to manipulate them. 101 | 2. A majority of these optimization rules are based on heuristics, i.e., they only account for a query’s structure and ignore the properties of the data being processed, 102 | 3. ANALYZE TABLE command 103 | CBO relies on detailed statistics to optimize a query plan. To collect these statistics, users can issue these new SQL commands described below: 104 | ANALYZE TABLE table_name COMPUTE STATISTICS 105 | 106 | 107 | ### sigmod_spark_sql 108 | http://people.csail.mit.edu/matei/papers/2015/sigmod_spark_sql.pdf 109 | ``` 110 | 111 | ``` 112 | 113 | ### working-with-udfs-in-apache-spark 114 | https://blog.cloudera.com/blog/2017/02/working-with-udfs-in-apache-spark/ 115 | 116 | - It’s important to understand the performance implications of Apache Spark’s UDF features. Python UDFs for example (such as our CTOF function) result in data being serialized between the executor JVM and the Python interpreter running the UDF logic – this significantly reduces performance as compared to UDF implementations in Java or Scala. Potential solutions to alleviate this serialization bottleneck include: 117 | 118 | - Accessing a Hive UDF from PySpark as discussed in the previous section. The Java UDF implementation is accessible directly by the executor JVM. Note again that this approach only provides access to the UDF from the Apache Spark’s SQL query language. 119 | Making use of the approach also shown to access UDFs implemented in Java or Scala from PySpark, as we demonstrated using the previously defined Scala UDAF example. 120 | 121 | - Another important component of Spark SQL to be aware of is the Catalyst query optimizer. Its capabilities are expanding with every release and can often provide dramatic performance improvements to Spark SQL queries; however, arbitrary UDF implementation code may not be well understood by Catalyst (although future features[3] which analyze bytecode are being considered to address this). As such, using Apache Spark’s built-in SQL query functions will often lead to the best performance and should be the first approach considered whenever introducing a UDF can be avoided 122 | 123 | 124 | ### spark-functions-vs-udf-performance 125 | https://stackoverflow.com/questions/38296609/spark-functions-vs-udf-performance 126 | 127 | 128 | ![explain_plan_physical](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/explain_plan_physical.jpg) 129 | 130 | ![explain_plan_sql_vs_dataFrame](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/explain_plan_sql_vs_dataFrame.jpg) 131 | 132 | ![queryOptimization_hint](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/queryOptimization_hint.jpg) 133 | 134 | ![explain_plan_joins](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/explain_plan_joins.jpg) 135 | 136 | ![explain_plan_joinHint](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/explain_plan_joinHint.jpg) 137 | 138 | ![explain_plan_groupBy](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/explain_plan_groupBy.jpg) 139 | 140 | ![partition_rePartition](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/partition_rePartition.jpg) 141 | 142 | ![transformations_narrowVSwide](https://github.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/blob/master/pics/transformations_narrowVSwide.jpg) 143 | 144 | 145 | ### spark-submit 146 | > https://spark.apache.org/docs/2.2.0/submitting-applications.html 147 | > https://www.cloudera.com/documentation/enterprise/5-4-x/topics/cdh_ig_running_spark_on_yarn.html 148 | > https://jaceklaskowski.gitbooks.io/mastering-apache-spark/yarn/ 149 | > https://blog.cloudera.com/blog/2014/05/apache-spark-resource-management-and-yarn-app-models/ 150 | 151 | ```python 152 | ./bin/spark-submit \ 153 | --class \ 154 | --master \ 155 | --deploy-mode \ 156 | --conf = \ 157 | ... # other options 158 | \ 159 | [application-arguments] 160 | 161 | --class: The entry point for your application (e.g. org.apache.spark.examples.SparkPi) 162 | --master: The master URL for the cluster (e.g. spark://23.195.26.187:7077) 163 | --deploy-mode: Whether to deploy your driver on the worker nodes (cluster) or locally as an external client (client) (default: client) † 164 | --conf: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap “key=value” in quotes (as shown). 165 | application-jar: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an hdfs:// path or a file:// path that is present on all nodes. 166 | application-arguments: Arguments passed to the main method of your main class, if any 167 | ``` 168 | 169 | #### java vs python code execution 170 | 171 | | java class | python script | 172 | | --- | --- | 173 | | --class 'class path of java main application' | (at the end of spark-submit) 'fully qualified path of the main python script' | 174 | | ex. --class com.abc.project1.Main | /opt/src/project1/module1/main.py 'pass the parameters' | 175 | | --jars 'assembly jar (or “uber” jar) containing your code and its dependencies, to be distributed with your application' | --py-files 'add .py, .zip or .egg files to be distributed with your application.' | 176 | 177 | #### 178 | 179 | | local | local[n] | local[n,f] | yarn | 180 | | --- | --- | --- | --- | 181 | | Run locally with one worker thread, no parallelism | Run locally with K worker threads , set this to the number of cores. local[*] Run with as many worker threads as logical cores | Run Spark locally with n worker threads and F maxFailures | Connect to a YARN cluster in client or cluster mode depending on the value of --deploy-mode. The cluster location will be found based on the HADOOP_CONF_DIR or YARN_CONF_DIR variable | 182 | 183 | #### YARN client vs cluster 184 | > Deploy modes are all about where the Spark driver runs. 185 | 186 | | YARN client | YARN cluster | 187 | | --- | --- | 188 | | driver runs on the host where the job is submitted | the driver runs in the ApplicationMaster on a cluster host chosen by YARN. | 189 | | client that launches the application needs to be alive | clientdoesn't need to continue running for the entire lifetime of the application | 190 | | | | 191 | 192 | 193 | | Spark local mode | Spark Cluster mode | 194 | | --- | --- | 195 | | | | 196 | 197 | 198 | 199 | 200 | 201 | ### Drivers and Executors 202 | 203 | | IMP Concepts | | 204 | |--- |--- | 205 | | **Application** | single job, a sequence of jobs, a long-running service issuing new commands as needed or an interactive exploration session.| 206 | | **Spark Driver** | driver is the process running the spark context. This driver is responsible for converting the application to a directed graph of individual steps to execute on the cluster. There is one driver per application. 207 | | **Spark Application Master** | responsible for negotiating resource requests made by the driver with YARN and finding a suitable set of hosts/containers in which to run the Spark applications. There is one Application Master per application. | 208 | | Spark Executor | A single JVM instance on a node that serves a single Spark application. An executor runs multiple tasks over its lifetime, and multiple tasks concurrently. A node may have several Spark executors and there are many nodes running Spark Executors for each client application. | 209 | | Spark Task | represents a unit of work on a partition of a distributed dataset. | 210 | 211 | 212 | ### Dataframe opration on multiple columns 213 | > https://medium.com/@mrpowers/performing-operations-on-multiple-columns-in-a-pyspark-dataframe-36e97896c378 214 | 215 | > ***'Parsed Logical Plan' --> 'Analyzed Logical Plan' --> 'Optimized Logical Plan' --> 'Physical Plan'*** 216 | 217 | > Spark is smart enough to optimized (in Physical Plan) the multiple operation done in for kind of loop on dataframe 218 | 219 | #### Below 2 code snipped will produce similler Physical Plan 220 | 221 | ```python 222 | for col in data_frame.columns: 223 | df_res= data_frame.withColumn() \ 224 | .withColumn() 225 | 226 | ``` 227 | 228 | ```python 229 | df_res= data_frame.select(*(when(col(c) ... ,...).otherwise(col(c)).alias(c) for c in data_frame.columns )) 230 | 231 | ``` 232 | 233 | 234 | 235 | ### Spark job monitoring 236 | > https://databricks.com/blog/2015/06/22/understanding-your-spark-application-through-visualization.html 237 | 238 | Spark History Server web UI 239 | 240 | #### a. Event timeline of spark events 241 | > The ability to view Spark events in a timeline is useful for identifying the bottlenecks in an application. 242 | 243 | - Event timeline available in three levels 244 | - across all jobs 245 | - within one job 246 | - within one stage. 247 | 248 | 249 | 250 | #### b. DAG 251 | 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /Untitled-Copy1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "hahaha\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "print (\"hahaha\")" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [ 29 | { 30 | "ename": "TypeError", 31 | "evalue": "'builtin_function_or_method' object has no attribute '__getitem__'", 32 | "output_type": "error", 33 | "traceback": [ 34 | "\u001b[1;31m\u001b[0m", 35 | "\u001b[1;31mTypeError\u001b[0mTraceback (most recent call last)", 36 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[1;34m\"hihi\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 37 | "\u001b[1;31mTypeError\u001b[0m: 'builtin_function_or_method' object has no attribute '__getitem__'" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "for i in range[1:10]:\n", 43 | " print \"hihi\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [] 63 | } 64 | ], 65 | "metadata": { 66 | "kernelspec": { 67 | "display_name": "Python 2", 68 | "language": "python", 69 | "name": "python2" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 2 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython2", 81 | "version": "2.7.12" 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 1 86 | } 87 | -------------------------------------------------------------------------------- /dataFrame_basics.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["tmp_list_noSchema = [('A',1),('B',2)]\ndf = spark.createDataFrame(tmp_list,['ID','NO'])\ndf.collect()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#{}, infer schema\ntmp_list = [{'ID': 'A','NO': 1}]\nspark.createDataFrame(tmp_list).collect()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["#row object\n#parallelize\nfrom pyspark.sql import Row\ntmp_row = Row('ID','NO')\nrdd = sc.parallelize(tmp_list_noSchema)\nnew_rdd = rdd.map(lambda x: tmp_row(*x))\ndf = spark.createDataFrame(new_rdd)\ndf.collect()"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["#pandas\nimport pandas as pd\n#pd_df = pd.DataFrame({'ID': 'AA','NO': 10})\npd_df = pd.DataFrame([['A',10],['B',20]])\ndf = spark.createDataFrame(pd_df)\ndf.collect()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#registerDataFrameAsTable\ndf.createOrReplaceTempView(\"tbl1\")\nspark.sql(\"select * from tbl1\").show()\nsqlContext.registerDataFrameAsTable(df,\"tbl_final\")\n#sqlContext.createExternalTable(\"tbl1\")\nsqlContext.sql(\"show tables\").show()\nsqlContext.sql(\"select * from tbl_final\").show()"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#registerFunction\n#udf.register\nfrom pyspark.sql.types import IntegerType\ndef strLen(x):\n return len(x)\n\n#sqlContext.registerFunction(\"strLen\", strLen, IntegerType())\nsqlContext.udf.register(\"strLen\", strLen, IntegerType())\nsqlContext.sql(\"select strLen('vivek_bombatkar')\").show()\nsqlContext.sql(\"select strLen(ID) from tbl1\").show()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["df.show()\ndf.groupBy().avg().show()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["df.withColumn(\"NO_2\",df.NO + 10).show()\ndf.withColumnRenamed(\"NO\",\"NO_2\").show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["#pivot\ndf.groupBy(\"ID\").pivot(\"ID\").sum(\"NO\").show()"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":10}],"metadata":{"name":"dataFrame_basics","notebookId":4072594792063499},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_complexDataType.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["\ndf = spark.createDataFrame([{'name':'vivek B','id':100},{'name':'poll S','id':200},{'name':'peter M','id':150}])\ndf.show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#Structs\nfrom pyspark.sql.functions import struct, col\ndf_struct = df.select(struct(\"id\",\"name\").alias(\"struct\"))\ndf_struct.show()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["df_struct.createOrReplaceTempView(\"vw_struct\")\ndf_struct.select(\"struct.name\").show()\ndf_struct.select(col(\"struct\").getField(\"id\")).show()"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["df_struct.select(\"struct.*\").show()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#Arrays\nfrom pyspark.sql.functions import split, size, array_contains\ndf.select(split(\"name\", \" \").alias(\"array_name\")).show()\ndf.select(split(\"name\", \" \").alias(\"array_name\")).selectExpr(\"array_name[1]\").show()\ndf.select(size(split(col(\"name\"), \" \"))).show()"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["df.select(array_contains(split(col(\"name\"), \" \"), \"vivek\")).show()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#Explode\nfrom pyspark.sql.functions import explode\ndf.withColumn(\"splited\" , split(\"name\", \" \")).withColumn(\"exploded\", explode(\"splited\")).select(\"ID\", \"exploded\").show()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["#Maps\nfrom pyspark.sql.functions import create_map\ndf.select(create_map(\"id\",\"name\")).show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["df.select(create_map(\"id\",\"name\").alias(\"mapping_col\")).selectExpr(\"mapping_col[150]\").show()"],"metadata":{},"outputs":[],"execution_count":9}],"metadata":{"name":"dataFrame_complexDataType","notebookId":573656066332560},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_joins.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["list_A = [{'name':'A','ID':10},{'name':'B','ID':20}]\nlist_B = [{'ID':10,'ADDR':'DE'},{'ID':30,'ADDR':'IND'}]\ndf_A = spark.createDataFrame(list_A)\ndf_B = spark.createDataFrame(list_B)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["df_A.crossJoin(df_B).show()\ndf_A.crossJoin(df_B.select(\"ADDR\")).select(\"ID\",\"name\",\"ADDR\").show()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["#explain(True)\n#join : inner, cross, outer, full, full_outer, left, left_outer, right, right_outer, left_semi, and left_anti\ndf_A.join(df_B,df_A.ID == df_B.ID, 'inner').show()\ndf_A.join(df_B,df_A.ID == df_B.ID, 'leftouter').show()\ndf_A.join(df_B,df_A.ID == df_B.ID, 'leftouter').explain(True)"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["#hint\ndf_A.join(df_B.hint(\"broadcast\"),\"ID\").explain()\ndf_A.join(df_B,df_A.ID == df_B.ID, 'inner').explain()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["df_A.intersect(df_B).show()"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#repartition\ndf_res = df_A.crossJoin(df_B.select(\"ADDR\")).select(\"ID\",\"name\",\"ADDR\")\ndf_res.show()\ndf_res.repartition(2,\"ADDR\").show()\ndf_res.repartition(\"ADDR\").show()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#crosstab\ndf_A.crosstab(\"ID\",\"name\").show()\ndf_B.crosstab(\"ID\",\"ADDR\").show()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["#cube & rollup\n#https://stackoverflow.com/questions/37975227/what-is-the-difference-between-cube-and-groupby-operators\ndf_A.cube(\"ID\",\"name\").count().show()\ndf_A.rollup(\"ID\",\"name\").count().show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["#describe\ndf_A.describe().show()"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["df_A.cube(\"ID\",\"name\").count().show()\ndf_A.cube(\"ID\",\"name\").count().fillna(100).show()"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["def printIt(df):\n print(df.ID)\n \ndf_A.foreach(printIt)"],"metadata":{},"outputs":[],"execution_count":11}],"metadata":{"name":"dataFrame_joins","notebookId":690121464804339},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_sql_functions.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["from pyspark.sql import functions"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#TODO"],"metadata":{},"outputs":[],"execution_count":2}],"metadata":{"name":"dataFrame_sql_functions","notebookId":71081247235839},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_storageLevels.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["tmp_list = [('A',10),('B',20)]\ndf = spark.createDataFrame(tmp_list)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["df.storageLevel"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["df.cache().storageLevel"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["from pyspark.storagelevel import StorageLevel \n\ndf.persist(StorageLevel.DISK_ONLY_2).storageLevel"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["df.persist(StorageLevel.MEMORY_ONLY).storageLevel"],"metadata":{},"outputs":[],"execution_count":5}],"metadata":{"name":"dataFrame_storageLevels","notebookId":2944671690971069},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_transformationVSactions.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["#Transformations are leazy\ndf_num_range = spark.range(100).toDF(\"nums\")"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["\n#Narrow Transformations\ndf_narrow = df_num_range.where(\"nums % 2 = 0\")\n#Dataframes are immutable\n#Actions\ndf_new.take(5)"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["#http://backtobazics.com/big-data/spark/apache-spark-flatmap-example/"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#Wide Transformations (shffes)\ndf_wide = df_num_range.where(\"nums % 2 = 0\").count()\nprint df_wide"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["df_ctry_summary = spark.read.csv(\"/FileStore/tables/ctry_summary.csv\")\ndf_ctry_summary.printSchema()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#get data schema from file \n#and ignore header\ndf_ctry_summary = spark.read.option(\"inferSchema\", \"true\").option(\"header\",\"true\").csv(\"/FileStore/tables/ctry_summary.csv\")\ndf_ctry_summary.printSchema()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["#Explain plans are a bit arcane\ndf_grp_ctry = df_ctry_summary.sort(\"count\").explain()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["# shuffle Spark will output two hundred shuffle partitions\n\nspark.conf.set(\"spark.sql.shuffel.partitions\", \"5\")\ndf_grp_ctry = df_ctry_summary.sort(\"count\").explain()"],"metadata":{},"outputs":[],"execution_count":9}],"metadata":{"name":"01012018_1","notebookId":3023601934734119},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrame_window.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["df_people_flow = spark.read.option(\"inferSchema\",\"true\").option(\"header\",\"true\").csv(\"/FileStore/tables/global_flow_of_people.csv\")\n"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["df_people_flow.printSchema()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["from pyspark.sql import Window\nfrom pyspark.sql.functions import rank, min\nwindow = Window.partitionBy(\"country_orig\").orderdBy(\"regionflow_1990\").rowsBetween(-1, 1)\ndf_people_flow.select(rank().over(window), min(\"regionflow_1990\").over(window))\n"],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"dataFrame_window","notebookId":2944671690971079},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/Databricks_Logo_Side_Nav_Gray.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Databricks_Logo_Side_Nav_Orange 5 | Created with Sketch Beta. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/basic.css: -------------------------------------------------------------------------------- 1 | 2 | 3 | .dropzoneJarFiles { 4 | width: 490px; 5 | height: 210px; 6 | } 7 | 8 | .dropzoneDocFile { 9 | width: 200px; 10 | height: 210px; 11 | } 12 | 13 | /* The MIT License */ 14 | .dropzone, 15 | .dropzone *, 16 | .dropzone-previews, 17 | .dropzone-previews * { 18 | -webkit-box-sizing: border-box; 19 | -moz-box-sizing: border-box; 20 | box-sizing: border-box; 21 | } 22 | .dropzone { 23 | position: relative; 24 | border: 1px solid rgba(0,0,0,0.08); 25 | background: rgba(0,0,0,0.02); 26 | padding: 1em; 27 | } 28 | .dropzone.dz-clickable { 29 | cursor: pointer; 30 | } 31 | .dropzone.dz-clickable .dz-message, 32 | .dropzone.dz-clickable .dz-message span { 33 | cursor: pointer; 34 | } 35 | .dropzone.dz-clickable * { 36 | cursor: default; 37 | } 38 | .dropzone .dz-message { 39 | opacity: 1; 40 | -ms-filter: none; 41 | filter: none; 42 | } 43 | .dropzone.dz-drag-hover { 44 | border-color: rgba(0,0,0,0.15); 45 | background: rgba(0,0,0,0.04); 46 | } 47 | .dropzone.dz-started .dz-message { 48 | display: none; 49 | } 50 | .dropzone .dz-preview, 51 | .dropzone-previews .dz-preview { 52 | background: rgba(255,255,255,0.8); 53 | position: relative; 54 | display: inline-block; 55 | margin: 17px; 56 | vertical-align: top; 57 | border: 1px solid #acacac; 58 | padding: 6px 6px 6px 6px; 59 | } 60 | .dropzone .dz-preview.dz-file-preview [data-dz-thumbnail], 61 | .dropzone-previews .dz-preview.dz-file-preview [data-dz-thumbnail] { 62 | display: none; 63 | } 64 | .dropzone .dz-preview .dz-details, 65 | .dropzone-previews .dz-preview .dz-details { 66 | width: 100px; 67 | height: 100px; 68 | position: relative; 69 | background: #ebebeb; 70 | padding: 5px; 71 | margin-bottom: 22px; 72 | } 73 | .dropzone .dz-preview .dz-details .dz-filename, 74 | .dropzone-previews .dz-preview .dz-details .dz-filename { 75 | overflow: hidden; 76 | height: 100%; 77 | } 78 | .dropzone .dz-preview .dz-details img, 79 | .dropzone-previews .dz-preview .dz-details img { 80 | position: absolute; 81 | top: 0; 82 | left: 0; 83 | width: 100px; 84 | height: 100px; 85 | } 86 | .dropzone .dz-preview .dz-details .dz-size, 87 | .dropzone-previews .dz-preview .dz-details .dz-size { 88 | position: absolute; 89 | bottom: -28px; 90 | left: 3px; 91 | height: 28px; 92 | line-height: 28px; 93 | } 94 | .dropzone .dz-preview.dz-error .dz-error-mark, 95 | .dropzone-previews .dz-preview.dz-error .dz-error-mark { 96 | display: block; 97 | } 98 | .dropzone .dz-preview.dz-success .dz-success-mark, 99 | .dropzone-previews .dz-preview.dz-success .dz-success-mark { 100 | display: block; 101 | } 102 | 103 | .dropzone .dz-preview.dz-error .dz-error-mark svg, 104 | .dropzone-previews .dz-preview.dz-error .dz-error-mark svg, 105 | .dropzone .dz-preview.dz-success .dz-success-mark svg, 106 | .dropzone-previews .dz-preview.dz-success .dz-success-mark svg { 107 | display: none; 108 | } 109 | 110 | .dropzone .dz-preview:hover .dz-details img, 111 | .dropzone-previews .dz-preview:hover .dz-details img { 112 | display: none; 113 | } 114 | .dropzone .dz-preview .dz-success-mark, 115 | .dropzone-previews .dz-preview .dz-success-mark, 116 | .dropzone .dz-preview .dz-error-mark, 117 | .dropzone-previews .dz-preview .dz-error-mark { 118 | display: none; 119 | position: absolute; 120 | width: 40px; 121 | height: 40px; 122 | font-size: 30px; 123 | text-align: center; 124 | right: -10px; 125 | top: -10px; 126 | } 127 | .dropzone .dz-preview .dz-success-mark, 128 | .dropzone-previews .dz-preview .dz-success-mark { 129 | color: #8cc657; 130 | } 131 | .dropzone .dz-preview .dz-error-mark, 132 | .dropzone-previews .dz-preview .dz-error-mark { 133 | color: #ee162d; 134 | } 135 | .dropzone .dz-preview .dz-progress, 136 | .dropzone-previews .dz-preview .dz-progress { 137 | position: absolute; 138 | top: 100px; 139 | left: 6px; 140 | right: 6px; 141 | height: 6px; 142 | background: #d7d7d7; 143 | display: none; 144 | } 145 | .dropzone .dz-preview .dz-progress .dz-upload, 146 | .dropzone-previews .dz-preview .dz-progress .dz-upload { 147 | position: absolute; 148 | top: 0; 149 | bottom: 0; 150 | left: 0; 151 | width: 0%; 152 | background-color: #8cc657; 153 | } 154 | .dropzone .dz-preview.dz-processing .dz-progress, 155 | .dropzone-previews .dz-preview.dz-processing .dz-progress { 156 | display: block; 157 | } 158 | .dropzone .dz-preview .dz-error-message, 159 | .dropzone-previews .dz-preview .dz-error-message { 160 | display: none; 161 | position: absolute; 162 | top: -5px; 163 | left: -20px; 164 | background: rgba(245,245,245,0.8); 165 | padding: 8px 10px; 166 | color: #800; 167 | min-width: 140px; 168 | max-width: 500px; 169 | z-index: 500; 170 | } 171 | .dropzone .dz-preview:hover.dz-error .dz-error-message, 172 | .dropzone-previews .dz-preview:hover.dz-error .dz-error-message { 173 | display: block; 174 | } 175 | 176 | 177 | 178 | .dropzone .dz-preview.dz-success .dz-success-mark, 179 | .dropzone-previews .dz-preview.dz-success .dz-success-mark { 180 | opacity: 1; 181 | -ms-filter: none; 182 | filter: none; 183 | } 184 | .dropzone .dz-preview.dz-error .dz-error-mark, 185 | .dropzone-previews .dz-preview.dz-error .dz-error-mark { 186 | opacity: 1; 187 | -ms-filter: none; 188 | filter: none; 189 | } 190 | .dropzone .dz-preview.dz-error .dz-progress .dz-upload, 191 | .dropzone-previews .dz-preview.dz-error .dz-progress .dz-upload { 192 | background: #ee1e2d; 193 | } 194 | .dropzone .dz-preview .dz-error-mark, 195 | .dropzone-previews .dz-preview .dz-error-mark, 196 | .dropzone .dz-preview .dz-success-mark, 197 | .dropzone-previews .dz-preview .dz-success-mark { 198 | display: block; 199 | opacity: 0; 200 | -ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=0)"; 201 | filter: alpha(opacity=0); 202 | -webkit-transition: opacity 0.4s ease-in-out; 203 | -moz-transition: opacity 0.4s ease-in-out; 204 | -o-transition: opacity 0.4s ease-in-out; 205 | -ms-transition: opacity 0.4s ease-in-out; 206 | transition: opacity 0.4s ease-in-out; 207 | background-image: url("../images/spritemap.png"); 208 | background-repeat: no-repeat; 209 | } 210 | @media all and (-webkit-min-device-pixel-ratio:1.5),(min--moz-device-pixel-ratio:1.5),(-o-min-device-pixel-ratio:1.5/1),(min-device-pixel-ratio:1.5),(min-resolution:138dpi),(min-resolution:1.5dppx) { 211 | .dropzone .dz-preview .dz-error-mark, 212 | .dropzone-previews .dz-preview .dz-error-mark, 213 | .dropzone .dz-preview .dz-success-mark, 214 | .dropzone-previews .dz-preview .dz-success-mark { 215 | background-image: url("../images/spritemap@2x.png"); 216 | -webkit-background-size: 428px 406px; 217 | -moz-background-size: 428px 406px; 218 | background-size: 428px 406px; 219 | } 220 | } 221 | .dropzone .dz-preview .dz-error-mark span, 222 | .dropzone-previews .dz-preview .dz-error-mark span, 223 | .dropzone .dz-preview .dz-success-mark span, 224 | .dropzone-previews .dz-preview .dz-success-mark span { 225 | display: none; 226 | } 227 | .dropzone .dz-preview .dz-error-mark, 228 | .dropzone-previews .dz-preview .dz-error-mark { 229 | background-position: -268px -123px; 230 | } 231 | .dropzone .dz-preview .dz-success-mark, 232 | .dropzone-previews .dz-preview .dz-success-mark { 233 | background-position: -268px -163px; 234 | } 235 | .dropzone .dz-preview .dz-progress .dz-upload, 236 | .dropzone-previews .dz-preview .dz-progress .dz-upload { 237 | -webkit-animation: loading 0.4s linear infinite; 238 | -moz-animation: loading 0.4s linear infinite; 239 | -o-animation: loading 0.4s linear infinite; 240 | -ms-animation: loading 0.4s linear infinite; 241 | animation: loading 0.4s linear infinite; 242 | -webkit-transition: width 0.3s ease-in-out; 243 | -moz-transition: width 0.3s ease-in-out; 244 | -o-transition: width 0.3s ease-in-out; 245 | -ms-transition: width 0.3s ease-in-out; 246 | transition: width 0.3s ease-in-out; 247 | -webkit-border-radius: 2px; 248 | border-radius: 2px; 249 | position: absolute; 250 | top: 0; 251 | left: 0; 252 | width: 0%; 253 | height: 100%; 254 | background-image: url("../images/spritemap.png"); 255 | background-repeat: repeat-x; 256 | background-position: 0px -400px; 257 | } 258 | @media all and (-webkit-min-device-pixel-ratio:1.5),(min--moz-device-pixel-ratio:1.5),(-o-min-device-pixel-ratio:1.5/1),(min-device-pixel-ratio:1.5),(min-resolution:138dpi),(min-resolution:1.5dppx) { 259 | .dropzone .dz-preview .dz-progress .dz-upload, 260 | .dropzone-previews .dz-preview .dz-progress .dz-upload { 261 | background-image: url("../images/spritemap@2x.png"); 262 | -webkit-background-size: 428px 406px; 263 | -moz-background-size: 428px 406px; 264 | background-size: 428px 406px; 265 | } 266 | } 267 | .dropzone .dz-preview.dz-success .dz-progress, 268 | .dropzone-previews .dz-preview.dz-success .dz-progress { 269 | display: block; 270 | opacity: 0; 271 | -ms-filter: "progid:DXImageTransform.Microsoft.Alpha(Opacity=0)"; 272 | filter: alpha(opacity=0); 273 | -webkit-transition: opacity 0.4s ease-in-out; 274 | -moz-transition: opacity 0.4s ease-in-out; 275 | -o-transition: opacity 0.4s ease-in-out; 276 | -ms-transition: opacity 0.4s ease-in-out; 277 | transition: opacity 0.4s ease-in-out; 278 | } 279 | 280 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/feedback.min.css: -------------------------------------------------------------------------------- 1 | .feedback-btn{font-size:12px;position:fixed;bottom:-3px;right:110px;width:auto;} 2 | #feedback-module p{font-size:13px;} 3 | #feedback-note-tmp{width:444px;height:auto;min-height:90px;outline:none;font-family: Helvetica Neue,Helvetica,Arial,sans-serif;padding:4px;} 4 | #feedback-note-tmp:focus,#feedback-overview-note:focus{border:1px solid #64b7cc;} 5 | #feedback-canvas{position:absolute;top:0;left:0;} 6 | #feedback-welcome{top:30%;left:50%;margin-left:-270px;display:block;position:fixed;} 7 | .feedback-logo{background:url(images/feedback_icons.png) -0px -0px no-repeat;width:34px;margin-bottom:16px;font-size:16px;font-weight:normal;line-height:32px;padding-left:40px;height:32px;} 8 | .feedback-next-btn{width:72px;height:29px;line-height:27px;float:right;font-size:13px;padding:0 8px;} 9 | .feedback-back-btn{width:72px;height:29px;line-height:27px;float:right;font-size:13px;padding:0 8px;margin-right:20px;} 10 | .feedback-submit-btn{width:72px;height:29px;line-height:27px;float:right;font-size:13px;padding:0 8px;} 11 | .feedback-close-btn{width:72px;height:29px;line-height:27px;float:right;font-size:13px;padding:0 8px;} 12 | .feedback-helper{background:rgba(0,0,0,0);cursor:default;} 13 | .feedback-helper[data-type="highlight"]>.feedback-helper-inner{background:rgba(0,68,255,0.1);} 14 | #feedback-close{cursor:pointer;position:absolute;background:url(images/feedback_icons.png) -0px -64px;width:30px;height:30px;} 15 | .feedback-wizard-close{cursor:pointer;position:absolute;top:2px;right:2px;background:url(images/feedback_icons.png) -0px -34px;width:30px;height:30px;opacity:0.65;} 16 | .feedback-wizard-close:hover{opacity:1;} 17 | #feedback-welcome-error,#feedback-overview-error{display:none;color:#f13e3e;float:right;margin-right:30px;font-size:13px;line-height:29px;} 18 | #feedback-overview-error{margin-top:20px;} 19 | #feedback-highlighter{display:none;bottom:100px;right:100px;position:fixed;width:540px;height:275px;} 20 | #feedback-overview{display:none;top:10%;left:50%;margin-left:-420px;position:fixed;width:840px!important;height:auto;} 21 | #feedback-submit-error,#feedback-submit-success{top:30%;left:50%;margin-left:-300px;display:block;position:fixed;width:600px;height:auto;} 22 | .feedback-btn{outline:0;background-clip:padding-box;-webkit-box-shadow:0 4px 16px rgba(0,0,0,.2);-moz-box-shadow:0 4px 16px rgba(0,0,0,.2);box-shadow:0 4px 16px rgba(0,0,0,.2);z-index:40000;} 23 | .feedback-btn-gray{text-align:center;cursor:pointer;font-family:helvetica neue,helvetica,arial;border:1px solid #dcdcdc;border:1px solid rgba(0,0,0,0.1);color:#444;border-radius:2px;background-color:#f5f5f5;background-image:-webkit-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-moz-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-ms-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-o-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:linear-gradient(top,#f5f5f5,#f1f1f1);} 24 | .feedback-btn-gray:hover{color:#333;border:1px solid #c6c6c6;background-color:#f8f8f8;background-image:-webkit-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-moz-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-ms-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-o-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:linear-gradient(top,#f8f8f8,#f1f1f1);} 25 | .feedback-btn-blue{text-align:center;cursor:pointer;font-family:Helvetica Neue,Helvetica,arial;border-radius:2px;background-color:#357ae8;background-image:-webkit-linear-gradient(top,#4d90fe,#357ae8);background-image:-moz-linear-gradient(top,#4d90fe,#357ae8);background-image:-ms-linear-gradient(top,#4d90fe,#357ae8);background-image:-o-linear-gradient(top,#4d90fe,#357ae8);background-image:linear-gradient(top,#4d90fe,#357ae8);border:1px solid #2f5bb7;color:#fff;} 26 | #feedback-note-tmp,#feedback-overview-note{resize:none;} 27 | #feedback-welcome,#feedback-highlighter,#feedback-overview,#feedback-submit-success,#feedback-submit-error{font-family:Helvetica Neue,Helvetica,Arial,sans-serif;z-index:40000;background:#fff;border:1px solid rgba(0,0,0,.333);padding:30px 42px;width:540px;border:1px solid rgba(0,0,0,.333);outline:0;-webkit-box-shadow:0 4px 16px rgba(0,0,0,.2);-moz-box-shadow:0 4px 16px rgba(0,0,0,.2);box-shadow:0 4px 16px rgba(0,0,0,.2);background:#fff;background-clip:padding-box;box-sizing: border-box;-moz-box-sizing: border-box;-webkit-box-sizing: border-box;-webkit-transform: translateZ(0);} 28 | .feedback-sethighlight,.feedback-setblackout{-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;background-color:#f5f5f5;background-image:-webkit-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-moz-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-ms-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:-o-linear-gradient(top,#f5f5f5,#f1f1f1);background-image:linear-gradient(top,#f5f5f5,#f1f1f1);color:#444;border:1px solid #dcdcdc;border:1px solid rgba(0,0,0,0.1);-webkit-border-radius:2px;-moz-border-radius:2px;border-radius:2px;cursor:default;font-size:11px;font-weight:bold;text-align:center;white-space:nowrap;margin-right:16px;height:30px;line-height:28px;min-width:90px;outline:0;padding:0 8px;display:inline-block;float:left; 29 | } 30 | .feedback-setblackout{margin-top:10px;clear:both;} 31 | .feedback-sethighlight div{background:url(images/feedback_icons.png) 0px -94px;width:16px;height:16px;margin-top:7px;float:left;} 32 | .feedback-setblackout div{background:url(images/feedback_icons.png) -16px -94px;width:16px;height:16px;margin-top:7px;float:left;} 33 | .feedback-sethighlight:hover,.feedback-setblackout:hover{-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;background-color:#f8f8f8;background-image:-webkit-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-moz-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-ms-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:-o-linear-gradient(top,#f8f8f8,#f1f1f1);background-image:linear-gradient(top,#f8f8f8,#f1f1f1);border:1px solid #c6c6c6;color:#333;} 34 | .feedback-active{-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,.1);box-shadow:inset 0 1px 2px rgba(0,0,0,.1);background-color:#eee;background-image:-webkit-linear-gradient(top,#eee,#e0e0e0);background-image:-moz-linear-gradient(top,#eee,#e0e0e0);background-image:-ms-linear-gradient(top,#eee,#e0e0e0);background-image:-o-linear-gradient(top,#eee,#e0e0e0);background-image:linear-gradient(top,#eee,#e0e0e0);border:1px solid #ccc;color:#333;} 35 | #feedback-highlighter label {float:left;margin:0 0 0 10px;line-height:30px;font-size:13px;font-weight:normal;} 36 | #feedback-highlighter label.lower{margin-top:10px;} 37 | .feedback-buttons{float:right;margin-top:20px;clear:both;} 38 | #feedback-module h3{font-weight:bold;font-size:15px;margin:8px 0;} 39 | .feedback-additional{margin-bottom:20px!important;} 40 | #feedback-overview-description{float:left;} 41 | #feedback-overview-note{width:314px;padding:4px;height:90px;outline:none;font-family: Helvetica Neue,Helvetica,Arial,sans-serif;} 42 | #feedback-overview-screenshot{float:right;} 43 | .feedback-screenshot{max-width:396px;padding:1px;border:1px solid #adadad;} 44 | #feedback-overview-description-text span{font-size:14px;margin:8px 0;color:#666;padding-left:10px;background:url(images/feedback_icons.png) -30px -34px no-repeat;margin-left:0px;} 45 | #feedback-browser-info,#feedback-page-info,#feedback-page-structure,#feedback-additional-none{margin-top:16px;display:none;} 46 | #feedback-user-info{margin-top:16px;} 47 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/jquery-ui.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.11.4 - 2015-03-11 2 | * http://jqueryui.com 3 | * Includes: core.css, accordion.css, autocomplete.css, button.css, datepicker.css, dialog.css, draggable.css, menu.css, progressbar.css, resizable.css, selectable.css, selectmenu.css, slider.css, sortable.css, spinner.css, tabs.css, tooltip.css, theme.css 4 | * To view and modify this theme, visit http://jqueryui.com/themeroller/?ffDefault=Trebuchet%20MS%2CTahoma%2CVerdana%2CArial%2Csans-serif&fwDefault=bold&fsDefault=1.1em&cornerRadius=4px&bgColorHeader=f6a828&bgTextureHeader=gloss_wave&bgImgOpacityHeader=35&borderColorHeader=e78f08&fcHeader=ffffff&iconColorHeader=ffffff&bgColorContent=eeeeee&bgTextureContent=highlight_soft&bgImgOpacityContent=100&borderColorContent=dddddd&fcContent=333333&iconColorContent=222222&bgColorDefault=f6f6f6&bgTextureDefault=glass&bgImgOpacityDefault=100&borderColorDefault=cccccc&fcDefault=1c94c4&iconColorDefault=ef8c08&bgColorHover=fdf5ce&bgTextureHover=glass&bgImgOpacityHover=100&borderColorHover=fbcb09&fcHover=c77405&iconColorHover=ef8c08&bgColorActive=ffffff&bgTextureActive=glass&bgImgOpacityActive=65&borderColorActive=fbd850&fcActive=eb8f00&iconColorActive=ef8c08&bgColorHighlight=ffe45c&bgTextureHighlight=highlight_soft&bgImgOpacityHighlight=75&borderColorHighlight=fed22f&fcHighlight=363636&iconColorHighlight=228ef1&bgColorError=b81900&bgTextureError=diagonals_thick&bgImgOpacityError=18&borderColorError=cd0a0a&fcError=ffffff&iconColorError=ffd27a&bgColorOverlay=666666&bgTextureOverlay=diagonals_thick&bgImgOpacityOverlay=20&opacityOverlay=50&bgColorShadow=000000&bgTextureShadow=flat&bgImgOpacityShadow=10&opacityShadow=20&thicknessShadow=5px&offsetTopShadow=-5px&offsetLeftShadow=-5px&cornerRadiusShadow=5px 5 | * Copyright 2015 jQuery Foundation and other contributors; Licensed MIT */ 6 | 7 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-clearfix{min-height:0}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0)}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important}.ui-icon{display:block;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-accordion .ui-accordion-header{display:block;cursor:pointer;position:relative;margin:2px 0 0 0;padding:.5em .5em .5em .7em;min-height:0;font-size:100%}.ui-accordion .ui-accordion-icons{padding-left:2.2em}.ui-accordion .ui-accordion-icons .ui-accordion-icons{padding-left:2.2em}.ui-accordion .ui-accordion-header .ui-accordion-header-icon{position:absolute;left:.5em;top:50%;margin-top:-8px}.ui-accordion .ui-accordion-content{padding:1em 2.2em;border-top:0;overflow:auto}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-button{display:inline-block;position:relative;padding:0;line-height:normal;margin-right:.1em;cursor:pointer;vertical-align:middle;text-align:center;overflow:visible}.ui-button,.ui-button:link,.ui-button:visited,.ui-button:hover,.ui-button:active{text-decoration:none}.ui-button-icon-only{width:2.2em}button.ui-button-icon-only{width:2.4em}.ui-button-icons-only{width:3.4em}button.ui-button-icons-only{width:3.7em}.ui-button .ui-button-text{display:block;line-height:normal}.ui-button-text-only .ui-button-text{padding:.4em 1em}.ui-button-icon-only .ui-button-text,.ui-button-icons-only .ui-button-text{padding:.4em;text-indent:-9999999px}.ui-button-text-icon-primary .ui-button-text,.ui-button-text-icons .ui-button-text{padding:.4em 1em .4em 2.1em}.ui-button-text-icon-secondary .ui-button-text,.ui-button-text-icons .ui-button-text{padding:.4em 2.1em .4em 1em}.ui-button-text-icons .ui-button-text{padding-left:2.1em;padding-right:2.1em}input.ui-button{padding:.4em 1em}.ui-button-icon-only .ui-icon,.ui-button-text-icon-primary .ui-icon,.ui-button-text-icon-secondary .ui-icon,.ui-button-text-icons .ui-icon,.ui-button-icons-only .ui-icon{position:absolute;top:50%;margin-top:-8px}.ui-button-icon-only .ui-icon{left:50%;margin-left:-8px}.ui-button-text-icon-primary .ui-button-icon-primary,.ui-button-text-icons .ui-button-icon-primary,.ui-button-icons-only .ui-button-icon-primary{left:.5em}.ui-button-text-icon-secondary .ui-button-icon-secondary,.ui-button-text-icons .ui-button-icon-secondary,.ui-button-icons-only .ui-button-icon-secondary{right:.5em}.ui-buttonset{margin-right:7px}.ui-buttonset .ui-button{margin-left:0;margin-right:-.3em}input.ui-button::-moz-focus-inner,button.ui-button::-moz-focus-inner{border:0;padding:0}.ui-datepicker{width:17em;padding:.2em .2em 0;display:none}.ui-datepicker .ui-datepicker-header{position:relative;padding:.2em 0}.ui-datepicker .ui-datepicker-prev,.ui-datepicker .ui-datepicker-next{position:absolute;top:2px;width:1.8em;height:1.8em}.ui-datepicker .ui-datepicker-prev-hover,.ui-datepicker .ui-datepicker-next-hover{top:1px}.ui-datepicker .ui-datepicker-prev{left:2px}.ui-datepicker .ui-datepicker-next{right:2px}.ui-datepicker .ui-datepicker-prev-hover{left:1px}.ui-datepicker .ui-datepicker-next-hover{right:1px}.ui-datepicker .ui-datepicker-prev span,.ui-datepicker .ui-datepicker-next span{display:block;position:absolute;left:50%;margin-left:-8px;top:50%;margin-top:-8px}.ui-datepicker .ui-datepicker-title{margin:0 2.3em;line-height:1.8em;text-align:center}.ui-datepicker .ui-datepicker-title select{font-size:1em;margin:1px 0}.ui-datepicker select.ui-datepicker-month,.ui-datepicker select.ui-datepicker-year{width:45%}.ui-datepicker table{width:100%;font-size:.9em;border-collapse:collapse;margin:0 0 .4em}.ui-datepicker th{padding:.7em .3em;text-align:center;font-weight:bold;border:0}.ui-datepicker td{border:0;padding:1px}.ui-datepicker td span,.ui-datepicker td a{display:block;padding:.2em;text-align:right;text-decoration:none}.ui-datepicker .ui-datepicker-buttonpane{background-image:none;margin:.7em 0 0 0;padding:0 .2em;border-left:0;border-right:0;border-bottom:0}.ui-datepicker .ui-datepicker-buttonpane button{float:right;margin:.5em .2em .4em;cursor:pointer;padding:.2em .6em .3em .6em;width:auto;overflow:visible}.ui-datepicker .ui-datepicker-buttonpane button.ui-datepicker-current{float:left}.ui-datepicker.ui-datepicker-multi{width:auto}.ui-datepicker-multi .ui-datepicker-group{float:left}.ui-datepicker-multi .ui-datepicker-group table{width:95%;margin:0 auto .4em}.ui-datepicker-multi-2 .ui-datepicker-group{width:50%}.ui-datepicker-multi-3 .ui-datepicker-group{width:33.3%}.ui-datepicker-multi-4 .ui-datepicker-group{width:25%}.ui-datepicker-multi .ui-datepicker-group-last .ui-datepicker-header,.ui-datepicker-multi .ui-datepicker-group-middle .ui-datepicker-header{border-left-width:0}.ui-datepicker-multi .ui-datepicker-buttonpane{clear:left}.ui-datepicker-row-break{clear:both;width:100%;font-size:0}.ui-datepicker-rtl{direction:rtl}.ui-datepicker-rtl .ui-datepicker-prev{right:2px;left:auto}.ui-datepicker-rtl .ui-datepicker-next{left:2px;right:auto}.ui-datepicker-rtl .ui-datepicker-prev:hover{right:1px;left:auto}.ui-datepicker-rtl .ui-datepicker-next:hover{left:1px;right:auto}.ui-datepicker-rtl .ui-datepicker-buttonpane{clear:right}.ui-datepicker-rtl .ui-datepicker-buttonpane button{float:left}.ui-datepicker-rtl .ui-datepicker-buttonpane button.ui-datepicker-current,.ui-datepicker-rtl .ui-datepicker-group{float:right}.ui-datepicker-rtl .ui-datepicker-group-last .ui-datepicker-header,.ui-datepicker-rtl .ui-datepicker-group-middle .ui-datepicker-header{border-right-width:0;border-left-width:1px}.ui-dialog{overflow:hidden;position:absolute;top:0;left:0;padding:.2em;outline:0}.ui-dialog .ui-dialog-titlebar{padding:.4em 1em;position:relative}.ui-dialog .ui-dialog-title{float:left;margin:.1em 0;white-space:nowrap;width:90%;overflow:hidden;text-overflow:ellipsis}.ui-dialog .ui-dialog-titlebar-close{position:absolute;right:.3em;top:50%;width:20px;margin:-10px 0 0 0;padding:1px;height:20px}.ui-dialog .ui-dialog-content{position:relative;border:0;padding:.5em 1em;background:none;overflow:auto}.ui-dialog .ui-dialog-buttonpane{text-align:left;border-width:1px 0 0 0;background-image:none;margin-top:.5em;padding:.3em 1em .5em .4em}.ui-dialog .ui-dialog-buttonpane .ui-dialog-buttonset{float:right}.ui-dialog .ui-dialog-buttonpane button{margin:.5em .4em .5em 0;cursor:pointer}.ui-dialog .ui-resizable-se{width:12px;height:12px;right:-5px;bottom:-5px;background-position:16px 16px}.ui-draggable .ui-dialog-titlebar{cursor:move}.ui-draggable-handle{-ms-touch-action:none;touch-action:none}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:none}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{position:relative;margin:0;padding:3px 1em 3px .4em;cursor:pointer;min-height:0;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0}.ui-progressbar{height:2em;text-align:left;overflow:hidden}.ui-progressbar .ui-progressbar-value{margin:-1px;height:100%}.ui-progressbar .ui-progressbar-overlay{background:url("data:image/gif;base64,R0lGODlhKAAoAIABAAAAAP///yH/C05FVFNDQVBFMi4wAwEAAAAh+QQJAQABACwAAAAAKAAoAAACkYwNqXrdC52DS06a7MFZI+4FHBCKoDeWKXqymPqGqxvJrXZbMx7Ttc+w9XgU2FB3lOyQRWET2IFGiU9m1frDVpxZZc6bfHwv4c1YXP6k1Vdy292Fb6UkuvFtXpvWSzA+HycXJHUXiGYIiMg2R6W459gnWGfHNdjIqDWVqemH2ekpObkpOlppWUqZiqr6edqqWQAAIfkECQEAAQAsAAAAACgAKAAAApSMgZnGfaqcg1E2uuzDmmHUBR8Qil95hiPKqWn3aqtLsS18y7G1SzNeowWBENtQd+T1JktP05nzPTdJZlR6vUxNWWjV+vUWhWNkWFwxl9VpZRedYcflIOLafaa28XdsH/ynlcc1uPVDZxQIR0K25+cICCmoqCe5mGhZOfeYSUh5yJcJyrkZWWpaR8doJ2o4NYq62lAAACH5BAkBAAEALAAAAAAoACgAAAKVDI4Yy22ZnINRNqosw0Bv7i1gyHUkFj7oSaWlu3ovC8GxNso5fluz3qLVhBVeT/Lz7ZTHyxL5dDalQWPVOsQWtRnuwXaFTj9jVVh8pma9JjZ4zYSj5ZOyma7uuolffh+IR5aW97cHuBUXKGKXlKjn+DiHWMcYJah4N0lYCMlJOXipGRr5qdgoSTrqWSq6WFl2ypoaUAAAIfkECQEAAQAsAAAAACgAKAAAApaEb6HLgd/iO7FNWtcFWe+ufODGjRfoiJ2akShbueb0wtI50zm02pbvwfWEMWBQ1zKGlLIhskiEPm9R6vRXxV4ZzWT2yHOGpWMyorblKlNp8HmHEb/lCXjcW7bmtXP8Xt229OVWR1fod2eWqNfHuMjXCPkIGNileOiImVmCOEmoSfn3yXlJWmoHGhqp6ilYuWYpmTqKUgAAIfkECQEAAQAsAAAAACgAKAAAApiEH6kb58biQ3FNWtMFWW3eNVcojuFGfqnZqSebuS06w5V80/X02pKe8zFwP6EFWOT1lDFk8rGERh1TTNOocQ61Hm4Xm2VexUHpzjymViHrFbiELsefVrn6XKfnt2Q9G/+Xdie499XHd2g4h7ioOGhXGJboGAnXSBnoBwKYyfioubZJ2Hn0RuRZaflZOil56Zp6iioKSXpUAAAh+QQJAQABACwAAAAAKAAoAAACkoQRqRvnxuI7kU1a1UU5bd5tnSeOZXhmn5lWK3qNTWvRdQxP8qvaC+/yaYQzXO7BMvaUEmJRd3TsiMAgswmNYrSgZdYrTX6tSHGZO73ezuAw2uxuQ+BbeZfMxsexY35+/Qe4J1inV0g4x3WHuMhIl2jXOKT2Q+VU5fgoSUI52VfZyfkJGkha6jmY+aaYdirq+lQAACH5BAkBAAEALAAAAAAoACgAAAKWBIKpYe0L3YNKToqswUlvznigd4wiR4KhZrKt9Upqip61i9E3vMvxRdHlbEFiEXfk9YARYxOZZD6VQ2pUunBmtRXo1Lf8hMVVcNl8JafV38aM2/Fu5V16Bn63r6xt97j09+MXSFi4BniGFae3hzbH9+hYBzkpuUh5aZmHuanZOZgIuvbGiNeomCnaxxap2upaCZsq+1kAACH5BAkBAAEALAAAAAAoACgAAAKXjI8By5zf4kOxTVrXNVlv1X0d8IGZGKLnNpYtm8Lr9cqVeuOSvfOW79D9aDHizNhDJidFZhNydEahOaDH6nomtJjp1tutKoNWkvA6JqfRVLHU/QUfau9l2x7G54d1fl995xcIGAdXqMfBNadoYrhH+Mg2KBlpVpbluCiXmMnZ2Sh4GBqJ+ckIOqqJ6LmKSllZmsoq6wpQAAAh+QQJAQABACwAAAAAKAAoAAAClYx/oLvoxuJDkU1a1YUZbJ59nSd2ZXhWqbRa2/gF8Gu2DY3iqs7yrq+xBYEkYvFSM8aSSObE+ZgRl1BHFZNr7pRCavZ5BW2142hY3AN/zWtsmf12p9XxxFl2lpLn1rseztfXZjdIWIf2s5dItwjYKBgo9yg5pHgzJXTEeGlZuenpyPmpGQoKOWkYmSpaSnqKileI2FAAACH5BAkBAAEALAAAAAAoACgAAAKVjB+gu+jG4kORTVrVhRlsnn2dJ3ZleFaptFrb+CXmO9OozeL5VfP99HvAWhpiUdcwkpBH3825AwYdU8xTqlLGhtCosArKMpvfa1mMRae9VvWZfeB2XfPkeLmm18lUcBj+p5dnN8jXZ3YIGEhYuOUn45aoCDkp16hl5IjYJvjWKcnoGQpqyPlpOhr3aElaqrq56Bq7VAAAOw==");height:100%;filter:alpha(opacity=25);opacity:0.25}.ui-progressbar-indeterminate .ui-progressbar-value{background-image:none}.ui-resizable{position:relative}.ui-resizable-handle{position:absolute;font-size:0.1px;display:block;-ms-touch-action:none;touch-action:none}.ui-resizable-disabled .ui-resizable-handle,.ui-resizable-autohide .ui-resizable-handle{display:none}.ui-resizable-n{cursor:n-resize;height:7px;width:100%;top:-5px;left:0}.ui-resizable-s{cursor:s-resize;height:7px;width:100%;bottom:-5px;left:0}.ui-resizable-e{cursor:e-resize;width:7px;right:-5px;top:0;height:100%}.ui-resizable-w{cursor:w-resize;width:7px;left:-5px;top:0;height:100%}.ui-resizable-se{cursor:se-resize;width:12px;height:12px;right:1px;bottom:1px}.ui-resizable-sw{cursor:sw-resize;width:9px;height:9px;left:-5px;bottom:-5px}.ui-resizable-nw{cursor:nw-resize;width:9px;height:9px;left:-5px;top:-5px}.ui-resizable-ne{cursor:ne-resize;width:9px;height:9px;right:-5px;top:-5px}.ui-selectable{-ms-touch-action:none;touch-action:none}.ui-selectable-helper{position:absolute;z-index:100;border:1px dotted black}.ui-selectmenu-menu{padding:0;margin:0;position:absolute;top:0;left:0;display:none}.ui-selectmenu-menu .ui-menu{overflow:auto;overflow-x:hidden;padding-bottom:1px}.ui-selectmenu-menu .ui-menu .ui-selectmenu-optgroup{font-size:1em;font-weight:bold;line-height:1.5;padding:2px 0.4em;margin:0.5em 0 0 0;height:auto;border:0}.ui-selectmenu-open{display:block}.ui-selectmenu-button{display:inline-block;overflow:hidden;position:relative;text-decoration:none;cursor:pointer}.ui-selectmenu-button span.ui-icon{right:0.5em;left:auto;margin-top:-8px;position:absolute;top:50%}.ui-selectmenu-button span.ui-selectmenu-text{text-align:left;padding:0.4em 2.1em 0.4em 1em;display:block;line-height:1.4;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.ui-slider{position:relative;text-align:left}.ui-slider .ui-slider-handle{position:absolute;z-index:2;width:1.2em;height:1.2em;cursor:default;-ms-touch-action:none;touch-action:none}.ui-slider .ui-slider-range{position:absolute;z-index:1;font-size:.7em;display:block;border:0;background-position:0 0}.ui-slider.ui-state-disabled .ui-slider-handle,.ui-slider.ui-state-disabled .ui-slider-range{filter:inherit}.ui-slider-horizontal{height:.8em}.ui-slider-horizontal .ui-slider-handle{top:-.3em;margin-left:-.6em}.ui-slider-horizontal .ui-slider-range{top:0;height:100%}.ui-slider-horizontal .ui-slider-range-min{left:0}.ui-slider-horizontal .ui-slider-range-max{right:0}.ui-slider-vertical{width:.8em;height:100px}.ui-slider-vertical .ui-slider-handle{left:-.3em;margin-left:0;margin-bottom:-.6em}.ui-slider-vertical .ui-slider-range{left:0;width:100%}.ui-slider-vertical .ui-slider-range-min{bottom:0}.ui-slider-vertical .ui-slider-range-max{top:0}.ui-sortable-handle{-ms-touch-action:none;touch-action:none}.ui-spinner{position:relative;display:inline-block;overflow:hidden;padding:0;vertical-align:middle}.ui-spinner-input{border:none;background:none;color:inherit;padding:0;margin:.2em 0;vertical-align:middle;margin-left:.4em;margin-right:22px}.ui-spinner-button{width:16px;height:50%;font-size:.5em;padding:0;margin:0;text-align:center;position:absolute;cursor:default;display:block;overflow:hidden;right:0}.ui-spinner a.ui-spinner-button{border-top:none;border-bottom:none;border-right:none}.ui-spinner .ui-icon{position:absolute;margin-top:-8px;top:50%;left:0}.ui-spinner-up{top:0}.ui-spinner-down{bottom:0}.ui-spinner .ui-icon-triangle-1-s{background-position:-65px -16px}.ui-tabs{position:relative;padding:.2em}.ui-tabs .ui-tabs-nav{margin:0;padding:.2em .2em 0}.ui-tabs .ui-tabs-nav li{list-style:none;float:left;position:relative;top:0;margin:1px .2em 0 0;border-bottom-width:0;padding:0;white-space:nowrap}.ui-tabs .ui-tabs-nav .ui-tabs-anchor{float:left;padding:.5em 1em;text-decoration:none}.ui-tabs .ui-tabs-nav li.ui-tabs-active{margin-bottom:-1px;padding-bottom:1px}.ui-tabs .ui-tabs-nav li.ui-tabs-active .ui-tabs-anchor,.ui-tabs .ui-tabs-nav li.ui-state-disabled .ui-tabs-anchor,.ui-tabs .ui-tabs-nav li.ui-tabs-loading .ui-tabs-anchor{cursor:text}.ui-tabs-collapsible .ui-tabs-nav li.ui-tabs-active .ui-tabs-anchor{cursor:pointer}.ui-tabs .ui-tabs-panel{display:block;border-width:0;padding:1em 1.4em;background:none}.ui-tooltip{padding:8px;position:absolute;z-index:9999;max-width:300px;-webkit-box-shadow:0 0 5px #aaa;box-shadow:0 0 5px #aaa}body .ui-tooltip{border-width:2px}.ui-widget{font-family:Trebuchet MS,Tahoma,Verdana,Arial,sans-serif;font-size:1.1em}.ui-widget .ui-widget{font-size:1em}.ui-widget input,.ui-widget select,.ui-widget textarea,.ui-widget button{font-family:Trebuchet MS,Tahoma,Verdana,Arial,sans-serif;font-size:1em}.ui-widget-content{border:1px solid #ddd;background:#eee url("images/ui-bg_highlight-soft_100_eeeeee_1x100.png") 50% top repeat-x;color:#333}.ui-widget-content a{color:#333}.ui-widget-header{border:1px solid #e78f08;background:#f6a828 url("images/ui-bg_gloss-wave_35_f6a828_500x100.png") 50% 50% repeat-x;color:#fff;font-weight:bold}.ui-widget-header a{color:#fff}.ui-state-default,.ui-widget-content .ui-state-default,.ui-widget-header .ui-state-default{border:1px solid #ccc;background:#f6f6f6 url("images/ui-bg_glass_100_f6f6f6_1x400.png") 50% 50% repeat-x;font-weight:bold;color:#1c94c4}.ui-state-default a,.ui-state-default a:link,.ui-state-default a:visited{color:#1c94c4;text-decoration:none}.ui-state-hover,.ui-widget-content .ui-state-hover,.ui-widget-header .ui-state-hover,.ui-state-focus,.ui-widget-content .ui-state-focus,.ui-widget-header .ui-state-focus{border:1px solid #fbcb09;background:#fdf5ce url("images/ui-bg_glass_100_fdf5ce_1x400.png") 50% 50% repeat-x;font-weight:bold;color:#c77405}.ui-state-hover a,.ui-state-hover a:hover,.ui-state-hover a:link,.ui-state-hover a:visited,.ui-state-focus a,.ui-state-focus a:hover,.ui-state-focus a:link,.ui-state-focus a:visited{color:#c77405;text-decoration:none}.ui-state-active,.ui-widget-content .ui-state-active,.ui-widget-header .ui-state-active{border:1px solid #fbd850;background:#fff url("images/ui-bg_glass_65_ffffff_1x400.png") 50% 50% repeat-x;font-weight:bold;color:#eb8f00}.ui-state-active a,.ui-state-active a:link,.ui-state-active a:visited{color:#eb8f00;text-decoration:none}.ui-state-highlight,.ui-widget-content .ui-state-highlight,.ui-widget-header .ui-state-highlight{border:1px solid #fed22f;background:#ffe45c url("images/ui-bg_highlight-soft_75_ffe45c_1x100.png") 50% top repeat-x;color:#363636}.ui-state-highlight a,.ui-widget-content .ui-state-highlight a,.ui-widget-header .ui-state-highlight a{color:#363636}.ui-state-error,.ui-widget-content .ui-state-error,.ui-widget-header .ui-state-error{border:1px solid #cd0a0a;background:#b81900 url("images/ui-bg_diagonals-thick_18_b81900_40x40.png") 50% 50% repeat;color:#fff}.ui-state-error a,.ui-widget-content .ui-state-error a,.ui-widget-header .ui-state-error a{color:#fff}.ui-state-error-text,.ui-widget-content .ui-state-error-text,.ui-widget-header .ui-state-error-text{color:#fff}.ui-priority-primary,.ui-widget-content .ui-priority-primary,.ui-widget-header .ui-priority-primary{font-weight:bold}.ui-priority-secondary,.ui-widget-content .ui-priority-secondary,.ui-widget-header .ui-priority-secondary{opacity:.7;filter:Alpha(Opacity=70);font-weight:normal}.ui-state-disabled,.ui-widget-content .ui-state-disabled,.ui-widget-header .ui-state-disabled{opacity:.35;filter:Alpha(Opacity=35);background-image:none}.ui-state-disabled .ui-icon{filter:Alpha(Opacity=35)}.ui-icon{width:16px;height:16px}.ui-icon,.ui-widget-content .ui-icon{background-image:url("images/ui-icons_222222_256x240.png")}.ui-widget-header .ui-icon{background-image:url("images/ui-icons_ffffff_256x240.png")}.ui-state-default .ui-icon{background-image:url("images/ui-icons_ef8c08_256x240.png")}.ui-state-hover .ui-icon,.ui-state-focus .ui-icon{background-image:url("images/ui-icons_ef8c08_256x240.png")}.ui-state-active .ui-icon{background-image:url("images/ui-icons_ef8c08_256x240.png")}.ui-state-highlight .ui-icon{background-image:url("images/ui-icons_228ef1_256x240.png")}.ui-state-error .ui-icon,.ui-state-error-text .ui-icon{background-image:url("images/ui-icons_ffd27a_256x240.png")}.ui-icon-blank{background-position:16px 16px}.ui-icon-carat-1-n{background-position:0 0}.ui-icon-carat-1-ne{background-position:-16px 0}.ui-icon-carat-1-e{background-position:-32px 0}.ui-icon-carat-1-se{background-position:-48px 0}.ui-icon-carat-1-s{background-position:-64px 0}.ui-icon-carat-1-sw{background-position:-80px 0}.ui-icon-carat-1-w{background-position:-96px 0}.ui-icon-carat-1-nw{background-position:-112px 0}.ui-icon-carat-2-n-s{background-position:-128px 0}.ui-icon-carat-2-e-w{background-position:-144px 0}.ui-icon-triangle-1-n{background-position:0 -16px}.ui-icon-triangle-1-ne{background-position:-16px -16px}.ui-icon-triangle-1-e{background-position:-32px -16px}.ui-icon-triangle-1-se{background-position:-48px -16px}.ui-icon-triangle-1-s{background-position:-64px -16px}.ui-icon-triangle-1-sw{background-position:-80px -16px}.ui-icon-triangle-1-w{background-position:-96px -16px}.ui-icon-triangle-1-nw{background-position:-112px -16px}.ui-icon-triangle-2-n-s{background-position:-128px -16px}.ui-icon-triangle-2-e-w{background-position:-144px -16px}.ui-icon-arrow-1-n{background-position:0 -32px}.ui-icon-arrow-1-ne{background-position:-16px -32px}.ui-icon-arrow-1-e{background-position:-32px -32px}.ui-icon-arrow-1-se{background-position:-48px -32px}.ui-icon-arrow-1-s{background-position:-64px -32px}.ui-icon-arrow-1-sw{background-position:-80px -32px}.ui-icon-arrow-1-w{background-position:-96px -32px}.ui-icon-arrow-1-nw{background-position:-112px -32px}.ui-icon-arrow-2-n-s{background-position:-128px -32px}.ui-icon-arrow-2-ne-sw{background-position:-144px -32px}.ui-icon-arrow-2-e-w{background-position:-160px -32px}.ui-icon-arrow-2-se-nw{background-position:-176px -32px}.ui-icon-arrowstop-1-n{background-position:-192px -32px}.ui-icon-arrowstop-1-e{background-position:-208px -32px}.ui-icon-arrowstop-1-s{background-position:-224px -32px}.ui-icon-arrowstop-1-w{background-position:-240px -32px}.ui-icon-arrowthick-1-n{background-position:0 -48px}.ui-icon-arrowthick-1-ne{background-position:-16px -48px}.ui-icon-arrowthick-1-e{background-position:-32px -48px}.ui-icon-arrowthick-1-se{background-position:-48px -48px}.ui-icon-arrowthick-1-s{background-position:-64px -48px}.ui-icon-arrowthick-1-sw{background-position:-80px -48px}.ui-icon-arrowthick-1-w{background-position:-96px -48px}.ui-icon-arrowthick-1-nw{background-position:-112px -48px}.ui-icon-arrowthick-2-n-s{background-position:-128px -48px}.ui-icon-arrowthick-2-ne-sw{background-position:-144px -48px}.ui-icon-arrowthick-2-e-w{background-position:-160px -48px}.ui-icon-arrowthick-2-se-nw{background-position:-176px -48px}.ui-icon-arrowthickstop-1-n{background-position:-192px -48px}.ui-icon-arrowthickstop-1-e{background-position:-208px -48px}.ui-icon-arrowthickstop-1-s{background-position:-224px -48px}.ui-icon-arrowthickstop-1-w{background-position:-240px -48px}.ui-icon-arrowreturnthick-1-w{background-position:0 -64px}.ui-icon-arrowreturnthick-1-n{background-position:-16px -64px}.ui-icon-arrowreturnthick-1-e{background-position:-32px -64px}.ui-icon-arrowreturnthick-1-s{background-position:-48px -64px}.ui-icon-arrowreturn-1-w{background-position:-64px -64px}.ui-icon-arrowreturn-1-n{background-position:-80px -64px}.ui-icon-arrowreturn-1-e{background-position:-96px -64px}.ui-icon-arrowreturn-1-s{background-position:-112px -64px}.ui-icon-arrowrefresh-1-w{background-position:-128px -64px}.ui-icon-arrowrefresh-1-n{background-position:-144px -64px}.ui-icon-arrowrefresh-1-e{background-position:-160px -64px}.ui-icon-arrowrefresh-1-s{background-position:-176px -64px}.ui-icon-arrow-4{background-position:0 -80px}.ui-icon-arrow-4-diag{background-position:-16px -80px}.ui-icon-extlink{background-position:-32px -80px}.ui-icon-newwin{background-position:-48px -80px}.ui-icon-refresh{background-position:-64px -80px}.ui-icon-shuffle{background-position:-80px -80px}.ui-icon-transfer-e-w{background-position:-96px -80px}.ui-icon-transferthick-e-w{background-position:-112px -80px}.ui-icon-folder-collapsed{background-position:0 -96px}.ui-icon-folder-open{background-position:-16px -96px}.ui-icon-document{background-position:-32px -96px}.ui-icon-document-b{background-position:-48px -96px}.ui-icon-note{background-position:-64px -96px}.ui-icon-mail-closed{background-position:-80px -96px}.ui-icon-mail-open{background-position:-96px -96px}.ui-icon-suitcase{background-position:-112px -96px}.ui-icon-comment{background-position:-128px -96px}.ui-icon-person{background-position:-144px -96px}.ui-icon-print{background-position:-160px -96px}.ui-icon-trash{background-position:-176px -96px}.ui-icon-locked{background-position:-192px -96px}.ui-icon-unlocked{background-position:-208px -96px}.ui-icon-bookmark{background-position:-224px -96px}.ui-icon-tag{background-position:-240px -96px}.ui-icon-home{background-position:0 -112px}.ui-icon-flag{background-position:-16px -112px}.ui-icon-calendar{background-position:-32px -112px}.ui-icon-cart{background-position:-48px -112px}.ui-icon-pencil{background-position:-64px -112px}.ui-icon-clock{background-position:-80px -112px}.ui-icon-disk{background-position:-96px -112px}.ui-icon-calculator{background-position:-112px -112px}.ui-icon-zoomin{background-position:-128px -112px}.ui-icon-zoomout{background-position:-144px -112px}.ui-icon-search{background-position:-160px -112px}.ui-icon-wrench{background-position:-176px -112px}.ui-icon-gear{background-position:-192px -112px}.ui-icon-heart{background-position:-208px -112px}.ui-icon-star{background-position:-224px -112px}.ui-icon-link{background-position:-240px -112px}.ui-icon-cancel{background-position:0 -128px}.ui-icon-plus{background-position:-16px -128px}.ui-icon-plusthick{background-position:-32px -128px}.ui-icon-minus{background-position:-48px -128px}.ui-icon-minusthick{background-position:-64px -128px}.ui-icon-close{background-position:-80px -128px}.ui-icon-closethick{background-position:-96px -128px}.ui-icon-key{background-position:-112px -128px}.ui-icon-lightbulb{background-position:-128px -128px}.ui-icon-scissors{background-position:-144px -128px}.ui-icon-clipboard{background-position:-160px -128px}.ui-icon-copy{background-position:-176px -128px}.ui-icon-contact{background-position:-192px -128px}.ui-icon-image{background-position:-208px -128px}.ui-icon-video{background-position:-224px -128px}.ui-icon-script{background-position:-240px -128px}.ui-icon-alert{background-position:0 -144px}.ui-icon-info{background-position:-16px -144px}.ui-icon-notice{background-position:-32px -144px}.ui-icon-help{background-position:-48px -144px}.ui-icon-check{background-position:-64px -144px}.ui-icon-bullet{background-position:-80px -144px}.ui-icon-radio-on{background-position:-96px -144px}.ui-icon-radio-off{background-position:-112px -144px}.ui-icon-pin-w{background-position:-128px -144px}.ui-icon-pin-s{background-position:-144px -144px}.ui-icon-play{background-position:0 -160px}.ui-icon-pause{background-position:-16px -160px}.ui-icon-seek-next{background-position:-32px -160px}.ui-icon-seek-prev{background-position:-48px -160px}.ui-icon-seek-end{background-position:-64px -160px}.ui-icon-seek-start{background-position:-80px -160px}.ui-icon-seek-first{background-position:-80px -160px}.ui-icon-stop{background-position:-96px -160px}.ui-icon-eject{background-position:-112px -160px}.ui-icon-volume-off{background-position:-128px -160px}.ui-icon-volume-on{background-position:-144px -160px}.ui-icon-power{background-position:0 -176px}.ui-icon-signal-diag{background-position:-16px -176px}.ui-icon-signal{background-position:-32px -176px}.ui-icon-battery-0{background-position:-48px -176px}.ui-icon-battery-1{background-position:-64px -176px}.ui-icon-battery-2{background-position:-80px -176px}.ui-icon-battery-3{background-position:-96px -176px}.ui-icon-circle-plus{background-position:0 -192px}.ui-icon-circle-minus{background-position:-16px -192px}.ui-icon-circle-close{background-position:-32px -192px}.ui-icon-circle-triangle-e{background-position:-48px -192px}.ui-icon-circle-triangle-s{background-position:-64px -192px}.ui-icon-circle-triangle-w{background-position:-80px -192px}.ui-icon-circle-triangle-n{background-position:-96px -192px}.ui-icon-circle-arrow-e{background-position:-112px -192px}.ui-icon-circle-arrow-s{background-position:-128px -192px}.ui-icon-circle-arrow-w{background-position:-144px -192px}.ui-icon-circle-arrow-n{background-position:-160px -192px}.ui-icon-circle-zoomin{background-position:-176px -192px}.ui-icon-circle-zoomout{background-position:-192px -192px}.ui-icon-circle-check{background-position:-208px -192px}.ui-icon-circlesmall-plus{background-position:0 -208px}.ui-icon-circlesmall-minus{background-position:-16px -208px}.ui-icon-circlesmall-close{background-position:-32px -208px}.ui-icon-squaresmall-plus{background-position:-48px -208px}.ui-icon-squaresmall-minus{background-position:-64px -208px}.ui-icon-squaresmall-close{background-position:-80px -208px}.ui-icon-grip-dotted-vertical{background-position:0 -224px}.ui-icon-grip-dotted-horizontal{background-position:-16px -224px}.ui-icon-grip-solid-vertical{background-position:-32px -224px}.ui-icon-grip-solid-horizontal{background-position:-48px -224px}.ui-icon-gripsmall-diagonal-se{background-position:-64px -224px}.ui-icon-grip-diagonal-se{background-position:-80px -224px}.ui-corner-all,.ui-corner-top,.ui-corner-left,.ui-corner-tl{border-top-left-radius:4px}.ui-corner-all,.ui-corner-top,.ui-corner-right,.ui-corner-tr{border-top-right-radius:4px}.ui-corner-all,.ui-corner-bottom,.ui-corner-left,.ui-corner-bl{border-bottom-left-radius:4px}.ui-corner-all,.ui-corner-bottom,.ui-corner-right,.ui-corner-br{border-bottom-right-radius:4px}.ui-widget-overlay{background:#666 url("images/ui-bg_diagonals-thick_20_666666_40x40.png") 50% 50% repeat;opacity:.5;filter:Alpha(Opacity=50)}.ui-widget-shadow{margin:-5px 0 0 -5px;padding:5px;background:#000 url("images/ui-bg_flat_10_000000_40x100.png") 50% 50% repeat-x;opacity:.2;filter:Alpha(Opacity=20);border-radius:5px} -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/print.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Memo: 3 | * Unfortunately, we use "!important" in our non-print CSS + we 4 | * use the occasional inline style; so !important is used in 5 | * print.css to override any styles found in other prior stylesheets 6 | * (amy 7 | * 8 | * TODO: 9 | * - unfocus / unhover mouse upon hitting print button / command 10 | * - fix arbitrary variables (numbers, colors, inherited styles) 11 | * when we upgrade our CSS stylesheets to stylus/sass/etc. 12 | * - stop using !important so much when CSS is more cleaned up 13 | * 14 | * Currently known HTML printing issues: 15 | * - Neither Firefox nor Chrome (+ poss. others) print background colors 16 | * - Page break rules are enforced but implemented uniquely 17 | * - Chrome appears to have a hard time reading pseudo selectors on 18 | * stringed-together, dashed classes with wildcard attributes 19 | * (e.g., .ui-resizeable[style*="..."]) 20 | */ 21 | /* PRINT-SPECIFIC HACKS --------------------------------------- */ 22 | /* 23 | * Temporary fix to enable Chrome print preview, 24 | * given that Chrome cannot handle "position: fixed" 25 | */ 26 | #context-bar, 27 | div.top-menu-buttons, 28 | div.globalComments, 29 | div.commentsPane, 30 | .tb-title-icons, 31 | #topbar, 32 | #user-menu { 33 | position: relative !important; 34 | } 35 | #help-menu { 36 | position: relative !important; 37 | } 38 | /* PAGE DESIGN ELEMENTS --------------------------------------- 39 | * - align command prompts with title 40 | * - left-align title in plain font style 41 | * - conform to document-like style guidelines 42 | * (white legal paper style, professional, clean) 43 | */ 44 | div.databricks-notification-wrapper { 45 | display: none; 46 | } 47 | /* display title, move content down from it */ 48 | #topbar { 49 | display: block !important; 50 | height: auto !important; 51 | } 52 | /* align full title and position content underneath */ 53 | .tb-button { 54 | display: none !important; 55 | } 56 | .tb-title-wrapper { 57 | text-align: left !important; 58 | margin: 0; 59 | left: 0; 60 | white-space: initial; 61 | position: relative; 62 | } 63 | .tb-title-lang { 64 | display: none !important; 65 | } 66 | #overallView { 67 | /* align content left despite filebrowser popping up */ 68 | left: 0 !important; 69 | overflow: visible !important; 70 | } 71 | div#sidebar { 72 | display: none; 73 | } 74 | button.feedback-btn { 75 | display: none; 76 | } 77 | div#debugHelper { 78 | display: none; 79 | } 80 | div.dashboard-edit-view div.dashboard-layout-view-wrapper { 81 | width: 850px; 82 | } 83 | div.dashboard-edit-view div.widget { 84 | width: 800px !important; 85 | } 86 | div.dashboard-edit-view div.dashboard-side-menu { 87 | display: none; 88 | } 89 | #content { 90 | display: block !important; 91 | top: 0 !important; 92 | left: 0 !important; 93 | } 94 | /* show full command result stat 95 | (e.g., "Command took 0.5s -- by admin at ...") */ 96 | .command-provenance { 97 | visibility: visible !important; 98 | opacity: 1 !important; 99 | } 100 | /* hide web-specific UI components --------------- */ 101 | /* large web-specific nav components */ 102 | #context-bar, 103 | div.top-menu-buttons, 104 | div.globalComments, 105 | div.commentsPane, 106 | .tb-title-icons, 107 | #tooltip, 108 | #filebrowser-popup, 109 | #searchpanel-popup, 110 | #user-menu { 111 | display: none !important; 112 | } 113 | /* icons, select menus and buttons */ 114 | .submit-hint, 115 | a[class*="-button"], 116 | i[class*="icon-"], 117 | select { 118 | display: none !important; 119 | } 120 | .results .plot-controls { 121 | display: none !important; 122 | } 123 | /* i[class*="icon-"] exceptions */ 124 | a[class="cancel-query-button"] { 125 | display: block !important; 126 | } 127 | /* spinners, connection status */ 128 | .tb-status, 129 | .tb-status img { 130 | display: none !important; 131 | } 132 | .command-result .spinner .spinner-wrapper { 133 | /* hide spinner icon, but keep "cancel" text */ 134 | display: none !important; 135 | } 136 | /* scrollbars */ 137 | html, 138 | body, 139 | .shellSessionView { 140 | overflow: hidden !important; 141 | } 142 | /* empty content */ 143 | textarea:empty { 144 | display: none !important; 145 | } 146 | div.primaryPrompt { 147 | display: none !important; 148 | } 149 | /* dividers, borders */ 150 | div.divider { 151 | display: none !important; 152 | } 153 | .command { 154 | border: none !important; 155 | } 156 | /* input prompts, placeholder text */ 157 | *::-webkit-input-placeholder { 158 | /* WebKit browsers */ 159 | color: tranparent; 160 | } 161 | *:-moz-placeholder { 162 | /* Mozilla Firefox 4 to 18 */ 163 | color: tranparent; 164 | } 165 | *::-moz-placeholder { 166 | /* Mozilla Firefox 19+ */ 167 | color: tranparent; 168 | } 169 | *:-ms-input-placeholder { 170 | /* Internet Explorer 10+ */ 171 | color: tranparent; 172 | } 173 | /* CSS effects (hover, focus, etc) --------------------------- */ 174 | div.divider:hover { 175 | display: none !important; 176 | } 177 | div.divider:hover .btn-circle { 178 | display: none !important; 179 | } 180 | .command:hover, 181 | .command:active { 182 | border: none !important; 183 | box-shadow: none !important; 184 | } 185 | /* 186 | * PAGE PROPERTIES ------------------------------------------- 187 | * Ala MS Word, give page margin, counting existing spaces. 188 | * Fit images to page, horizontally 189 | */ 190 | @page { 191 | /* allow space from possible headers & footers */ 192 | margin: 2cm; 193 | } 194 | body { 195 | max-width: 100% !important; 196 | } 197 | .ui-resizable { 198 | max-height: 100%; 199 | max-width: 100%; 200 | } 201 | /* Chrome fix for autosizing figures. 202 | * If width and height are auto-set, Chrome Print will size them 203 | * too small, whereas Firefox Print will size them appropriately! */ 204 | .figure-results-and-comments .widget[style*="width: auto; height: auto"] { 205 | /* Chrome appears to have a hard time reading stringed-together, dashed 206 | * class names with pseudo selectors (e.g., .ui-resizeable[style*="..."]) */ 207 | width: 100% !important; 208 | min-height: 300px; 209 | /* this also needs to be set for height, or figure still appears small */ 210 | } 211 | /* do not add the min-height: 300px; of white space if just an input widget */ 212 | .widget.widget-input { 213 | min-height: initial !important; 214 | } 215 | /* Following CSS for autosizing figures better only works for firefox 216 | (but not for Chrome) */ 217 | @-moz-document url-prefix() { 218 | .ui-resizable { 219 | width: auto !important; 220 | height: auto !important; 221 | max-width: 100%; 222 | } 223 | } 224 | /* page break rules ------------------------------------------ */ 225 | img, 226 | .figure, 227 | .ui-resizeable { 228 | page-break-inside: avoid; 229 | } 230 | .previousPrompt { 231 | page-break-inside: avoid; 232 | } 233 | .command .command-input .command-box { 234 | page-break-before: avoid; 235 | } 236 | .results-and-comments { 237 | page-break-before: avoid; 238 | } 239 | .results-and-comments.figure-results-and-comments { 240 | page-break-before: auto; 241 | page-break-inside: avoid; 242 | } 243 | .command .ui-resizeable { 244 | page-break-inside: avoid; 245 | } 246 | /* TABLE PROPERTIES ------------------------------------------- */ 247 | .table-bordered { 248 | border: 1px solid #ddd; 249 | } 250 | .results .table thead, 251 | .table thead { 252 | background: #fafafa; 253 | } 254 | /* 255 | * Printing quirk with overflow property 256 | * - prevents table overflows but also truncates chart bottoms 257 | */ 258 | .command-result-wrapper .results { 259 | overflow: hidden; 260 | } 261 | .results-table .inner { 262 | overflow: hidden; 263 | } 264 | #content { 265 | overflow-y: visible; 266 | } 267 | /* 268 | * Last command view in session pane is truncated by a consistent margin 269 | * within print media ONLY (despite applying same CSS for screen media 270 | */ 271 | .sessionPane .shell-top > div:last-child .results { 272 | padding-bottom: 50px; 273 | overflow: visible; 274 | } 275 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/source_code_pro.css: -------------------------------------------------------------------------------- 1 | /* latin-ext */ 2 | @font-face { 3 | font-family: 'Source Code Pro'; 4 | font-style: normal; 5 | font-weight: 400; 6 | src: local('Source Code Pro'), local('SourceCodePro-Regular'), url('../../fonts/source_code_pro/mrl8jkM18OlOQN8JLgasD4a1YDtoarzwSXxTHggEXMw.woff2') format('woff2'); 7 | unicode-range: U+0100-024F, U+1E00-1EFF, U+20A0-20AB, U+20AD-20CF, U+2C60-2C7F, U+A720-A7FF; 8 | } 9 | /* latin */ 10 | @font-face { 11 | font-family: 'Source Code Pro'; 12 | font-style: normal; 13 | font-weight: 400; 14 | src: local('Source Code Pro'), local('SourceCodePro-Regular'), url('../../fonts/source_code_pro/mrl8jkM18OlOQN8JLgasD5bPFduIYtoLzwST68uhz_Y.woff2') format('woff2'); 15 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2212, U+2215; 16 | } 17 | /* latin-ext */ 18 | @font-face { 19 | font-family: 'Source Code Pro'; 20 | font-style: normal; 21 | font-weight: 700; 22 | src: local('Source Code Pro Bold'), local('SourceCodePro-Bold'), url('../../fonts/source_code_pro/leqv3v-yTsJNC7nFznSMqe4s1Ux4PuImWPk5fSr6HPL3rGVtsTkPsbDajuO5ueQw.woff2') format('woff2'); 23 | unicode-range: U+0100-024F, U+1E00-1EFF, U+20A0-20AB, U+20AD-20CF, U+2C60-2C7F, U+A720-A7FF; 24 | } 25 | /* latin */ 26 | @font-face { 27 | font-family: 'Source Code Pro'; 28 | font-style: normal; 29 | font-weight: 700; 30 | src: local('Source Code Pro Bold'), local('SourceCodePro-Bold'), url('../../fonts/source_code_pro/leqv3v-yTsJNC7nFznSMqZkF8H8ye47wsfpWywda8og.woff2') format('woff2'); 31 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2212, U+2215; 32 | } 33 | -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/spinner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/dataFrane_SQL - Databricks_files/spinner.gif -------------------------------------------------------------------------------- /dataFrane_SQL - Databricks_files/spinner.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /dataFrane_SQL.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["df_ctry = spark.read.option(\"inferSchema\",\"true\").option(\"header\",\"true\").csv(\"/FileStore/tables/ctry_summary.csv\")"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["df_ctry.createOrReplaceTempView(\"ctry_summary\")"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["spark.sql(\"select * from ctry_summary\").show()"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["df_sort = spark.sql(\"select DEST_COUNTRY_NAME,count(1) as cnt from ctry_summary group by DEST_COUNTRY_NAME sort by cnt\")\n#df_sort.show()\ndf_sort.explain()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["df_ctry.groupBy(\"DEST_COUNTRY_NAME\").count().explain()\n#execution plan is same as SQL"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["from pyspark.sql.functions import max \ndf_ctry.select(max(\"count\")).explain()\nspark.sql(\"select max(count) from ctry_summary\").explain()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["df_people_flow = spark.read.option(\"inferSchema\",\"true\").option(\"header\",\"true\").csv(\"/FileStore/tables/global_flow_of_people.csv\")"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["df_people_flow.printSchema()\ndf_people_flow.createOrReplaceTempView(\"people_flow\")"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["df_sql = spark.sql(\"\"\"\n SELECT country_dest, sum(regionflow_1990) as destination_total\nFROM people_flow\nGROUP BY country_dest\nORDER BY sum(regionflow_1990) DESC\nLIMIT 5\n \"\"\")\ndf_sql.explain()"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["#withColumnRenamed\nfrom pyspark.sql.functions import desc\ndf_new = df_people_flow.groupBy(\"country_dest\")\\\n.sum(\"regionflow_1990\")\\\n.withColumnRenamed(\"sum(regionflow_1990)\", \"destination_total\")\\\n.sort(desc(\"destination_total\"))\\\n.limit(5)\n\ndf_new.explain()"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["df_sql.collect()"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"code","source":["df_new.collect()"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":["#commutative vs associative "],"metadata":{},"outputs":[],"execution_count":13}],"metadata":{"name":"dataFrane_SQL","notebookId":3023601934734134},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /dataframe_API.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["df = spark.createDataFrame([[\"A\",10],[\"B\",20]])\ndf.show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#None = null\ndf_2 = spark.createDataFrame([{\"ID\":\"A\",\"NO\":10},{\"ID\":\"B\",\"NO\":20},{\"ID\":None,\"NO\":30}])\ndf_2.show()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["#eqNullSafe\nfrom pyspark.sql.functions import col\ndf_2.where(col('ID').eqNullSafe(\"hello\")).show()"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["#lit\nfrom pyspark.sql.functions import lit\ndf_2.select(lit(5),lit(\"sda\")).show()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#Converting to Spark Types\n"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#Working with Booleans\n"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#Working with Numbers\n"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["#Working with Strings\n\nfrom pyspark.sql.functions import initcap, lower\ndf_2.select(initcap(\"ID\")).show()\ndf_2.select(lower(col(\"ID\"))).show()\ndf_2.withColumn(\"newCol\", lower(col(\"ID\"))).select(\"newCol\").show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["#Regular Expressions\nfrom pyspark.sql.functions import regexp_replace\ndf_2.select(regexp_replace(col(\"ID\"),\"A\",\"AAA\")).show()"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["#Working with Dates and Timestamps\nfrom pyspark.sql.functions import current_date, current_timestamp\ndf_3 = spark.range(5).withColumn(\"today\",current_date()).withColumn(\"now\", current_timestamp())\ndf_3.show()"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["from pyspark.sql.functions import date_add , date_sub, datediff, months_between, to_date\ndf_3.select(date_sub(col(\"today\"),5)).show()\ndf_3.select(to_date(lit(\"2017-12-01\")).alias(\"start\"), to_date(lit(\"2017-01-01\")).alias(\"end\")).select(months_between(col(\"start\"),col(\"end\"))).show()"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"code","source":["#Working with Nulls in Data\n#coalesce\nfrom pyspark.sql.functions import coalesce \ndf_2.select(coalesce(col(\"NO\"),col(\"ID\"))).show()\n\ndf_2.select(coalesce(col(\"ID\"),col(\"NO\"))).show()"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":["#from pyspark.sql.functions import nvl, nvl2 # nullif, ifnull, \ndf_2.createOrReplaceTempView(\"tmpTbl\")\nspark.sql(\"select nvl(ID,'AAA') from tmpTbl\").show()"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"code","source":["df_2.na.drop(\"any\").show()"],"metadata":{},"outputs":[],"execution_count":14},{"cell_type":"code","source":["#Working with Complex Types\n"],"metadata":{},"outputs":[],"execution_count":15}],"metadata":{"name":"dataframe_API","notebookId":2162637923774473},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /pics/Spark2_Structured_Streaming_myLearning_infoGraphics.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/Spark2_Structured_Streaming_myLearning_infoGraphics.jpg -------------------------------------------------------------------------------- /pics/Spark2_myLearning_inforGraphics.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/Spark2_myLearning_inforGraphics.jpg -------------------------------------------------------------------------------- /pics/dag_crossJoin.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_crossJoin.JPG -------------------------------------------------------------------------------- /pics/dag_crossJoin_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_crossJoin_2.JPG -------------------------------------------------------------------------------- /pics/dag_filter.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_filter.JPG -------------------------------------------------------------------------------- /pics/dag_filter_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_filter_2.JPG -------------------------------------------------------------------------------- /pics/dag_groupBy.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_groupBy.JPG -------------------------------------------------------------------------------- /pics/dag_innerJoin.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_innerJoin.JPG -------------------------------------------------------------------------------- /pics/dag_innerJoin_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_innerJoin_2.JPG -------------------------------------------------------------------------------- /pics/dag_leftJoin.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_leftJoin.JPG -------------------------------------------------------------------------------- /pics/dag_sort.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_sort.JPG -------------------------------------------------------------------------------- /pics/dag_sort_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/dag_sort_2.JPG -------------------------------------------------------------------------------- /pics/explain_plan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan.jpg -------------------------------------------------------------------------------- /pics/explain_plan_detail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_detail.jpg -------------------------------------------------------------------------------- /pics/explain_plan_groupBy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_groupBy.jpg -------------------------------------------------------------------------------- /pics/explain_plan_joinHint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_joinHint.jpg -------------------------------------------------------------------------------- /pics/explain_plan_joins.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_joins.jpg -------------------------------------------------------------------------------- /pics/explain_plan_physical.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_physical.jpg -------------------------------------------------------------------------------- /pics/explain_plan_sql_vs_dataFrame.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/explain_plan_sql_vs_dataFrame.jpg -------------------------------------------------------------------------------- /pics/partition_rePartition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/partition_rePartition.jpg -------------------------------------------------------------------------------- /pics/queryOptimization_hint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/queryOptimization_hint.jpg -------------------------------------------------------------------------------- /pics/spark-yarn-client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/spark-yarn-client.png -------------------------------------------------------------------------------- /pics/spark-yarn-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/spark-yarn-table.png -------------------------------------------------------------------------------- /pics/spark_cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/spark_cluster.png -------------------------------------------------------------------------------- /pics/spark_local.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/spark_local.png -------------------------------------------------------------------------------- /pics/streaming_1.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_1.JPG -------------------------------------------------------------------------------- /pics/streaming_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_2.JPG -------------------------------------------------------------------------------- /pics/streaming_3.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_3.JPG -------------------------------------------------------------------------------- /pics/streaming_steps_to_start_pipeline.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_steps_to_start_pipeline.JPG -------------------------------------------------------------------------------- /pics/streaming_steps_to_start_pipeline_1.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_steps_to_start_pipeline_1.JPG -------------------------------------------------------------------------------- /pics/streaming_steps_to_start_pipeline_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_steps_to_start_pipeline_2.JPG -------------------------------------------------------------------------------- /pics/streaming_steps_to_start_pipeline_3.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_steps_to_start_pipeline_3.JPG -------------------------------------------------------------------------------- /pics/streaming_windowing.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_windowing.JPG -------------------------------------------------------------------------------- /pics/streaming_windowing_lateEvents.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_windowing_lateEvents.JPG -------------------------------------------------------------------------------- /pics/streaming_windowing_steps.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_windowing_steps.JPG -------------------------------------------------------------------------------- /pics/streaming_windowing_watermarking_append.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_windowing_watermarking_append.JPG -------------------------------------------------------------------------------- /pics/streaming_windowing_watermarking_update.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/streaming_windowing_watermarking_update.JPG -------------------------------------------------------------------------------- /pics/transformations_narrowVSwide.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/transformations_narrowVSwide.jpg -------------------------------------------------------------------------------- /pics/yarn-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivek-bombatkar/Spark-with-Python---My-learning-notes-/ed7290c5e95066687d5e827624a6fcb8b5e468b6/pics/yarn-cluster.png -------------------------------------------------------------------------------- /sparkStreaming.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["spark.sql(\"show schemas\").show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["dfStatic = spark.read\\\n.format(\"csv\")\\\n.option(\"header\",\"true\")\\\n.option(\"inferSchema\",\"true\")\\\n.load(\"/FileStore/tables/retailDate.csv\")\n\nstaticSchema = dfStatic.schema\nstaticSchema"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["#1. create stream DF\ndfStream = spark.readStream\\\n.option(\"masFilesPerTrigger\", 1)\\\n.format(\"csv\")\\\n.option(\"header\",\"true\")\\\n.schema(staticSchema)\\\n.load(\"/FileStore/tables/*.csv\")\\"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["dfStream.isStreaming"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#2. some code to process straming data\nfrom pyspark.sql.functions import col, window\ndfNew = dfStream.selectExpr(\"CustomerID\"\n , \"(UnitPrice * Quantity) as total_cost\"\n ,\"InvoiceDate\")\\\n .groupBy( col(\"CustomerID\"), window(col(\"InvoiceDate\"),\"1 day\"))\\\n .sum(\"total_cost\")\n "],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#3. start writing stream to memory \ndfNew.writeStream\\\n.format(\"memory\")\\\n.queryName(\"test\")\\\n.outputMode(\"complete\")\\\n.start()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#4. read from memory\nspark.sql(\"select * from test\").show()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["dfNew.writeStream\\\n.format(\"console\")\\\n.queryName(\"test_2\")\\\n.outputMode(\"complete\")\\\n.start()\n\n\n#Neither of these streaming methods should be used in production \n"],"metadata":{},"outputs":[],"execution_count":8}],"metadata":{"name":"sparkStreaming","notebookId":863376660785174},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /sparkStreaming_basic_1.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["socketDF = spark \\\n .readStream \\\n .format(\"socket\") \\\n .option(\"host\", \"localhost\") \\\n .option(\"port\", 9999) \\\n .load()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["socketDF.isStreaming\ntmpSchema = socketDF.schema\ntmpSchema "],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["csvDF = spark.readStream\\\n.option(\"sep\", \" \")\\\n.schema(tmpSchema )\\\n.csv(\"/FileStore/tables/\")"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["from pyspark.sql.functions import explode, split\nwords = csvDF.select(explode(split(csvDF.value, \" \")).alias(\"word\"))"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["wordCounts = words.groupBy(\"word\").count()\nprint wordCounts"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#wordCounts.writeStream\\\n#.outputMode(\"complete\")\\\n#.format(\"console\")\\\n#.start()\nrunningStream = wordCounts.writeStream\\\n.format(\"memory\")\\\n.queryName(\"myResult\")\\\n.outputMode(\"complete\")\\\n.start()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["dfRes = spark.sql(\"select * from myResult\")"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["dfRes.write.format(\"com.databricks.spark.csv\").save(\"/FileStore/tables/result.csv\")"],"metadata":{},"outputs":[],"execution_count":8}],"metadata":{"name":"sparkStreaming_basic_1","notebookId":863376660785188},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /sparkStreaming_basic_2.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["#https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html\n#https://databricks.com/blog/2017/02/23/working-complex-data-formats-structured-streaming-apache-spark-2-1.html\n#https://databricks.com/blog/2017/01/19/real-time-streaming-etl-structured-streaming-apache-spark-2-1.html\n\n#The key idea in Structured Streaming is to treat a live data stream as a table that is being continuously appended"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["#3. start writing stream to memory \n#dfNew.writeStream\\\n#.format(\"memory\")\\\n#.queryName(\"test\")\\\n#.outputMode(\"complete\")\\\n#.start()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["#The “Output” is defined as what gets written out to the external storage.\n#Complete Mode - The entire updated Result Table will be written to the external storage. \n#Append Mode - Only the new rows appended in the Result Table since the last trigger will be written to the external storage.\n#Update Mode - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage "],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["#Handling Event-time and Late Data\n"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["#Input Sources\n# - File source, - Kafka source, - Socket source , - Rate source\n"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#Schema inference and partition of streaming DataFrames/Datasets\n"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["socketDF = spark \\\n .readStream \\\n .format(\"socket\") \\\n .option(\"host\", \"localhost\") \\\n .option(\"port\", 9999) \\\n .load()\n\nsocketDF.isStreaming # Returns True for DataFrames that have streaming sources\n\nuserSchema = socketDF.schema\n\n# Read all the csv files written atomically in a directory\n#userSchema = StructType().add(\"name\", \"string\").add(\"age\", \"integer\")\ncsvDF = spark \\\n .readStream \\\n .option(\"sep\", \" \") \\\n .schema(userSchema) \\\n .csv(\"/FileStore/tables/\")\n # Equivalent to format(\"csv\").load(\"/path/to/directory\")"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["runningStream = csvDF.writeStream\\\n.format(\"memory\")\\\n.queryName(\"myResult\")\\\n.outputMode(\"update\")\\\n.start()\n#ERROR\n#u'Complete output mode not supported when there are no streaming aggregations on streaming DataFrames/Datasets;;\\nFileSource[/FileStore/tables/]'.start()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["runningStream.stop()"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["#Schema inference and partition of streaming DataFrames/Datasets\n"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["#Window Operations on Event Time\nfrom pyspark.sql.functions import window\n#userSchema = StructType().add(\"name\", \"string\").add(\"age\", \"integer\")\n\n# grounBy aggregation in every 2 minutes window, sliding every 1 minute\n\nwindowedCount = csvDF.groupBy(window(csvDF[0], \"2 minutes\", \"1 minutes\"), csvDF[0] ).count()\n"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"code","source":["runningStreamWindow = windowedCount.writeStream\\\n.format(\"memory\")\\\n.queryName(\"myResult\")\\\n.outputMode(\"complete\")\\\n.start()"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":["runningStreamWindow.stop()"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"code","source":["#Handling Late Data and Watermarking\n#Now consider what happens if one of the events arrives late to the application. For example, say, a word generated at 12:04 (i.e. event time) could be received by the application at 12:11. The application should use the time 12:04 instead of 12:11 to update the older counts for the window 12:00 - 12:10. \n#To enable this, in Spark 2.1, we have introduced watermarking, which lets the engine automatically track the current event time in the data and attempt to clean up old state accordingly.\n\nrunningStreamWindowWatermark = csvDF.withWatermark(\"timestamp\", \"5 minutes\")\\\n.groupBy(window(csvDF[0], \"2 minutes\",\"1 minutes\")).count()"],"metadata":{},"outputs":[],"execution_count":14}],"metadata":{"name":"sparkStreaming_basic_2","notebookId":3388231702265442},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /spark_explain_plan.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["from pyspark.sql import Row\ntmp_data = [(100,\"aaa\"),(120,\"bbb\"),(150,\"ccc\"),(200,\"aaa\"),(220,\"aaa\")]\ntmp_row = Row(\"id\",\"name\")\nrdd_row = sc.parallelize(tmp_data)\nrdd_schema = rdd_row.map(lambda x: tmp_row(*x))\n\ndf_employee = spark.createDataFrame(rdd_schema)\ndf_employee.show()\n#df_emloyee = spark.createDataFrame([{\"ID\":100, \"name\":\"abc\"}])"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["from pyspark.sql.functions import col\ndf_employee.filter(col(\"name\") == \"aaa\").show()\ndf_employee.filter(col(\"name\") == \"aaa\").explain()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["df_employee.filter(col(\"name\") == \"aaa\").explain(True)"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["df_employee.createOrReplaceTempView(\"vw_employee\")"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["spark.sql(\"select * from vw_employee where name = 'aaa'\").show()"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["spark.sql(\"select * from vw_employee where name = 'aaa'\").explain(True)"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["tmp_list_dept = [(100,\"finance\"),(120,\"marketing\")]\ntop_row_dept = Row(\"id\",\"dept_name\")\ntmp_rdd_dept = sc.parallelize(tmp_list_dept)\ntmp_rdd_dept_schema = tmp_rdd_dept.map(lambda x: top_row_dept(*x)) \ndf_dept = spark.createDataFrame(tmp_rdd_dept_schema)\ndf_dept.show()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["df_employee.join(df_dept, df_dept.id == df_employee.id,\"inner\").show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["df_employee.join(df_dept, df_dept.id == df_employee.id,\"inner\").explain(True)"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["df_dept.createOrReplaceTempView(\"vw_dept\")"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["spark.sql(\"select * from vw_employee join vw_dept on vw_employee.id = vw_dept.id \").show()"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"code","source":["spark.sql(\"select * from vw_employee join vw_dept on vw_employee.id = vw_dept.id \").explain(True)"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":["df_employee.join(df_dept.hint(\"broadcast\"),\"ID\",\"left\").show()"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"code","source":["df_employee.join(df_dept,\"id\",\"left\").explain()"],"metadata":{},"outputs":[],"execution_count":14},{"cell_type":"code","source":["df_employee.join(df_dept.hint(\"broadcast\"),\"ID\",\"left\").explain()"],"metadata":{},"outputs":[],"execution_count":15},{"cell_type":"code","source":["from pyspark.sql import Row\ntmp_data = [(100,\"aaa\"),(120,\"bbb\"),(150,\"ccc\"),(200,\"aaa\"),(220,\"aaa\")]\ntmp_row = Row(\"id\",\"name\")\nrdd_row = sc.parallelize(tmp_data)\nrdd_schema = rdd_row.map(lambda x: tmp_row(*x))\n\ndf_employee = spark.createDataFrame(rdd_schema)\ndf_employee.show()"],"metadata":{},"outputs":[],"execution_count":16},{"cell_type":"code","source":["#group by\ndf_employee.groupBy(\"name\").count().sort(\"count\").show()\ndf_employee.groupBy(\"name\").count().sort(\"count\").explain()"],"metadata":{},"outputs":[],"execution_count":17},{"cell_type":"code","source":["from pyspark.sql import Row\ntmp_data = [(100,\"aaa\",5000),(120,\"bbb\",2000),(150,\"ccc\",5000),(200,\"aaa\",1000),(220,\"aaa\",3000)]\ntmp_row = Row(\"id\",\"name\",\"salary\")\nrdd_row = sc.parallelize(tmp_data)\nrdd_schema = rdd_row.map(lambda x: tmp_row(*x))\n\ndf_employee = spark.createDataFrame(rdd_schema)\ndf_employee.show()"],"metadata":{},"outputs":[],"execution_count":18},{"cell_type":"code","source":["df_employee.groupBy(\"name\").sum(\"salary\").show()\ndf_employee.groupBy(\"name\").sum(\"salary\").explain()"],"metadata":{},"outputs":[],"execution_count":19},{"cell_type":"code","source":["#Catalyst supports both rule-based and cost-based optimization.\n1. cost based Optimizer(CBO): If a sql query can be executed in 2 different ways ( like may have path 1 and path2 for same query),then What CBO does is, it basically calculates the cost of each path and the analyses for which path the cost of execution is less and then executes that path so that it can optimize the quey execution.\n\n2. Rule base optimizer(RBO): this basically follows the rules which are needed for executing a query. So depending on the number of rules which are to be applied, the optimzer runs the query."],"metadata":{},"outputs":[],"execution_count":20},{"cell_type":"code","source":["df_employee.select(\"name\",\"id\").explain()\ndf_employee.select(\"name\",\"id\").limit(2).explain()"],"metadata":{},"outputs":[],"execution_count":21}],"metadata":{"name":"spark_explain_plan","notebookId":441836281736275},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /streaming_basics.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["df = spark.read.format('csv').option('header','true').option('inferSchema','true').load('/FileStore/tables/retail_data.csv')\ndf.show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["from pyspark.sql.functions import window, column, desc, col\ndf.selectExpr(\"customerId\" ,\"(UnitPrice * Quantity) as total_cost\",\"InvoiceDate\").groupBy(\"customerId\", window(col(\"InvoiceDate\"),\"1 day\") ).sum(\"total_cost\").show()"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["reatilData_schema = df.schema\nreatilData_schema"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["stream_df = spark.readStream.schema(reatilData_schema)\\\n.option(\"maxFilesPerTrigger\",1)\\\n.format(\"csv\")\\\n.option(\"header\",\"true\")\\\n.load('/FileStore/tables/retail_data.csv')\n\nstream_df.isStreaming\n#stream_df.show()"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["# still a lazy operation\nfrom pyspark.sql.functions import window, column, desc, col\ndf_perHour = stream_df.selectExpr(\"customerId\" ,\"(UnitPrice * Quantity) as total_cost\",\"InvoiceDate\").groupBy(\"customerId\", window(col(\"InvoiceDate\"),\"1 day\") ).sum(\"total_cost\")"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["spark.conf.set(\"spark.sql.shuffel.partitions\",\"5\")"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["df_perHour.writeStream.format(\"memory\")\\\n.queryName(\"abc\")\\\n.outputMode(\"complete\")\\\n.start()\n#http://spark.apache.org/docs/latest/streaming-programming-guide.html#checkpointing"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["spark.sql(\"select * from abc\").show()"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["df_perHour.writeStream.format(\"console\")\\\n.queryName(\"abc\")\\\n.outputMode(\"complete\")\\\n.start()"],"metadata":{},"outputs":[],"execution_count":9}],"metadata":{"name":"streaming_basics","notebookId":749342988544251},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /test_1.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["spark.sql(\"show databases\").show()"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["print \"hello spark\""],"metadata":{},"outputs":[],"execution_count":2}],"metadata":{"name":"test_1","notebookId":3566324934842311},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /test_data/ctry_summary.csv: -------------------------------------------------------------------------------- 1 | DEST_COUNTRY_NAME,ORIGIN_COUNTRY_NAME,count 2 | United States,Romania,15 3 | United States,Croatia,1 4 | United States,Ireland,344 5 | Germany,India,240 6 | GB,India,300 7 | Canada,India,500 -------------------------------------------------------------------------------- /test_data/retail_data.csv: -------------------------------------------------------------------------------- 1 | InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country 2 | 536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom 3 | 536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom 4 | 536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom -------------------------------------------------------------------------------- /test_data/testFile_4.csv: -------------------------------------------------------------------------------- 1 | aaa bbb ccc --------------------------------------------------------------------------------