├── .DS_Store ├── .blogideas.swp ├── .gitignore ├── .gitignore.swp ├── .index.html.swp ├── .nojekyll ├── .swp ├── 5-golden-rules-of-cloud-computing.html ├── 5-golden-rules-of-cloud-computing └── index.html ├── CNAME ├── _drafts ├── .DS_Store ├── .rust-scala-part-5.md.swp ├── .spark-3-datasource-v2-part-4.md.swp ├── .spark-3-introduction-part-10.md.swp ├── .tensorflow-on-spark-3.md.swp ├── .test.txt.swp ├── discovering-directors.md ├── node-as-build-tool.md ├── sbt-on-ubuntu.md ├── spark-from-scratch-part-1.md └── understanding-spark-connect-6.md ├── _plugins └── .jekyll_lunr_js_search.rb.swp ├── _posts ├── .2015-05-26-analysing-csv-data-in-spark.md.swp ├── .2016-07-03-introduction-to-spark-two-part-7.md.swp ├── .2016-10-21-statistical-data-exploration-spark-part-1.md.swp ├── .2016-11-22-statistical-data-exploration-spark-part-3.md.swp ├── .2017-02-17-scaling-spark-with-kubernetes-part-3.md.swp ├── .2017-02-23-scaling-spark-with-kubernetes-part-4.md.swp ├── .2017-02-26-scaling-spark-with-kubernetes-part-5.md.swp ├── .2017-02-27-scaling-spark-with-kubernetes-part-6.md.swp ├── .2017-03-06-scaling-spark-with-kubernetes-part-7.md.swp ├── .2017-04-13-migrating-to-spark-two-part-1.md.swp ├── .2017-04-15-migrating-to-spark-two-part-2.md.swp ├── .2017-05-02-scaling-spark-with-kubernetes-part-8.md.swp ├── .2017-05-08-migrating-to-spark-two-part-3.md.swp ├── .2017-06-07-migrating-to-spark-two-part-10.md.swp ├── .2017-06-09-migrating-to-spark-two-part-6.md.swp ├── .2017-08-06-introduction-to-spark-structured-streaming-part-3.md.swp ├── .2017-08-11-introduction-to-spark-structured-streaming-part-5.md.swp ├── .2018-03-13-spark-vector-to-numpy.md.swp ├── .2018-04-24-spark-datasource-v2-part-4.md.swp ├── .DS_Store └── 2017-12-19-class-imbalance-part-1.md ├── a-tech-event-list-for-2010.html ├── a-tech-event-list-for-2010 └── index.html ├── about └── index.html ├── addtopages.sh ├── akka-http-helloworld.html ├── akka-http-helloworld └── index.html ├── akka-http-testing.html ├── akka-http-testing └── index.html ├── analysing-csv-data-in-spark.html ├── analysing-csv-data-in-spark └── index.html ├── analysing-kaggle-titanic-data.html ├── analysing-kaggle-titanic-data └── index.html ├── anatomy-of-rdd.html ├── anatomy-of-rdd └── index.html ├── anatomy-of-spark-catalyst-part-2.html ├── anatomy-of-spark-catalyst-part-2 └── index.html ├── anatomy-of-spark-catalyst.html ├── anatomy-of-spark-catalyst └── index.html ├── anatomy-of-spark-dataframe-api.html ├── anatomy-of-spark-dataframe-api └── index.html ├── anatomy-of-spark-datasource-api.html ├── anatomy-of-spark-datasource-api └── index.html ├── antlr-as-an-external-tool-in-eclipse-on-ubuntu.html ├── antlr-as-an-external-tool-in-eclipse-on-ubuntu └── index.html ├── apache-beam-next-step-in-big-data-unification.html ├── apache-beam-next-step-in-big-data-unification └── index.html ├── apache-spark-not-a-one-trick-pony.html ├── apache-spark-not-a-one-trick-pony └── index.html ├── apache-tuscany-part-1-installing-tuscany-plug-in-in-eclipse.html ├── apache-tuscany-part-1-installing-tuscany-plug-in-in-eclipse └── index.html ├── apache-tuscany-part-2-hello-world-using-eclipse-and-Maven.html ├── apache-tuscany-part-2-hello-world-using-eclipse-and-Maven └── index.html ├── array-copy-hack-in-c.html ├── array-copy-hack-in-c └── index.html ├── art-what-role-do-they-play-in-our-lives.html ├── art-what-role-do-they-play-in-our-lives └── index.html ├── barrier-execution-mode-part-1.html ├── barrier-execution-mode-part-1 └── index.html ├── barrier-execution-mode-part-2.html ├── barrier-execution-mode-part-2 └── index.html ├── bert-email-spam-1.html ├── bert-email-spam-2.html ├── book-review-javaScript-patterns.html ├── book-review-javaScript-patterns └── index.html ├── boot-custom-recovery- └── index.html ├── boot-custom-recovery.html ├── building-distributed-systems-from-scratch-part1.html ├── building-distributed-systems-from-scratch-part1 └── index.html ├── building-distributed-systems-from-scratch-part2.html ├── building-distributed-systems-from-scratch-part2 └── index.html ├── categories ├── akka-http │ └── index.html ├── akka │ └── index.html ├── android │ └── index.html ├── androidone │ └── index.html ├── antlr │ └── index.html ├── apache │ └── index.html ├── apple │ └── index.html ├── art │ └── index.html ├── azkaban │ └── index.html ├── barrier-execution │ └── index.html ├── beam │ └── index.html ├── bert-email-spam │ └── index.html ├── book │ └── index.html ├── browser │ └── index.html ├── c │ └── index.html ├── chrome │ └── index.html ├── class-imbalance │ └── index.html ├── clickhouse │ └── index.html ├── cloudcomputing │ └── index.html ├── cplusplus │ └── index.html ├── data-modeling │ └── index.html ├── datascience │ └── index.html ├── datasource-series │ └── index.html ├── datasource-v2-series │ └── index.html ├── datasource-v2-spark-three │ └── index.html ├── design │ └── index.html ├── devday │ └── index.html ├── developer │ └── index.html ├── distributed-systems │ └── index.html ├── eclipse │ └── index.html ├── exploring-langgraph │ └── index.html ├── firefox │ └── index.html ├── flink-series │ └── index.html ├── flink-streaming │ └── index.html ├── flink │ └── index.html ├── functional-programming │ └── index.html ├── google │ └── index.html ├── gplus │ └── index.html ├── graal-vm │ └── index.html ├── hacking │ └── index.html ├── hadoop │ └── index.html ├── html5 │ └── index.html ├── introduction-structured-streaming │ └── index.html ├── java │ └── index.html ├── javascript │ └── index.html ├── k8-api │ └── index.html ├── k8s-horizontal-scaling │ └── index.html ├── k9-api │ └── index.html ├── kaggle │ └── index.html ├── kubernetes-series │ └── index.html ├── kubernetes │ └── index.html ├── langgraph │ └── index.html ├── latest-java │ └── index.html ├── llm │ └── index.html ├── machine-learning │ └── index.html ├── maven │ └── index.html ├── mean-series │ └── index.html ├── mesos │ └── index.html ├── nectar │ └── index.html ├── python │ └── index.html ├── rediscover-implicits-scala3 │ └── index.html ├── rust-functional │ └── index.html ├── rust-scala │ └── index.html ├── rust │ └── index.html ├── scala │ └── index.html ├── scala3 │ └── index.html ├── spark-aqe │ └── index.html ├── spark-connect │ └── index.html ├── spark-pandas │ └── index.html ├── spark-plugin │ └── index.html ├── spark-summit-east-2016 │ └── index.html ├── spark-three │ └── index.html ├── spark-two-migration-series │ └── index.html ├── spark-two │ └── index.html ├── spark │ └── index.html ├── statistical-data-exploration │ └── index.html ├── talks │ └── index.html ├── technology │ └── index.html ├── testing │ └── index.html ├── transformer-models │ └── index.html ├── tuscany │ └── index.html ├── ubuntu │ └── index.html ├── workshop │ └── index.html └── yarn │ └── index.html ├── class-imbalance-part-1.html ├── class-imbalance-part-1 └── index.html ├── class-imbalance-part-2.html ├── class-imbalance-part-2 └── index.html ├── class-imbalance-part-3.html ├── class-imbalance-part-3 └── index.html ├── clickouse-clustering-spark-developer.html ├── clickouse-clustering-spark-developer └── index.html ├── cloud-computing-as-i-see.html ├── cloud-computing-as-i-see └── index.html ├── cloud-services- └── index.html ├── cloud-services.html ├── converting-java-collections-to-scala.html ├── converting-java-collections-to-scala └── index.html ├── css ├── app.css ├── long_heading.css └── main.css ├── custom-mesos-executor-scala.html ├── custom-mesos-executor-scala └── index.html ├── data-modeling-spark-part-1.html ├── data-modeling-spark-part-1 └── index.html ├── data-modeling-spark-part-2.html ├── data-modeling-spark-part-2 └── index.html ├── distributing-third-party-libraries-in-mesos.html ├── distributing-third-party-libraries-in-mesos └── index.html ├── dogfight-book-review.html ├── dogfight-book-review └── index.html ├── dynamic-spark-shuffle-partitions.html ├── dynamic-spark-shuffle-partitions └── index.html ├── evaluating-spark-rdd-for-side-effects.html ├── evaluating-spark-rdd-for-side-effects └── index.html ├── evolution-of-apache-spark.html ├── evolution-of-apache-spark └── index.html ├── exploratory-data-analysis-in-spark-with-jupyter.html ├── exploratory-data-analysis-in-spark-with-jupyter └── index.html ├── exploring-art-with-chrome-store.html ├── exploring-art-with-chrome-store └── index.html ├── exploring-langgraph-part-1.html ├── extending-spark-api.html ├── extending-spark-api └── index.html ├── favicon.ico ├── feed.mesos.xml ├── feed.xml ├── functional-programming-in-c++.html ├── functional-programming-in-c++ └── index.html ├── functional-programming-in-rust-part-1.html ├── functional-programming-in-rust-part-1 └── index.html ├── functional-programming-in-rust-part-2.html ├── functional-programming-in-rust-part-2 └── index.html ├── getting-started-with-antlr-basics.html ├── getting-started-with-antlr-basics └── index.html ├── glom-in-spark.html ├── glom-in-spark └── index.html ├── gmail-offline-use-with-care.html ├── gmail-offline-use-with-care └── index.html ├── google-chrome-os-a-different-ball-game.html ├── google-chrome-os-a-different-ball-game └── index.html ├── google-plus-after-one-week.html ├── google-plus-after-one-week └── index.html ├── google-top-100-searches-in-2013-chennai-express,-jiah-khan-from-India,-makes-into-the-list.html ├── google-top-100-searches-in-2013-chennai-express,-jiah-khan-from-India,-makes-into-the-list └── index.html ├── graal-vm-part-1.html ├── graal-vm-part-1 └── index.html ├── graal-vm-part-2.html ├── graal-vm-part-2 └── index.html ├── graal-vm-part-3.html ├── graal-vm-part-3 └── index.html ├── graal-vm-part-4.html ├── graal-vm-part-4 └── index.html ├── graal-vm-part-5.html ├── graal-vm-part-5 └── index.html ├── gradient-descent-for-logistic-regression-in-octave.html ├── gradient-descent-for-logistic-regression-in-octave └── index.html ├── granular-resource-management-mesos.html ├── granular-resource-management-mesos └── index.html ├── hadoop-workshop-first-success-story.html ├── hadoop-workshop-first-success-story └── index.html ├── handling-empty-rdd-in-spark-streaming.html ├── handling-empty-rdd-in-spark-streaming └── index.html ├── hello.html ├── hello └── index.html ├── history-of-spark.html ├── history-of-spark └── index.html ├── horizontal-scaling-k8s-part-1.html ├── horizontal-scaling-k8s-part-1 └── index.html ├── horizontal-scaling-k8s-part-2.html ├── horizontal-scaling-k8s-part-2 └── index.html ├── horizontal-scaling-k8s-part-3.html ├── horizontal-scaling-k8s-part-3 └── index.html ├── if-apple-holds-the-future-of-computing-I-dontwant-to-be-a-developer.html ├── if-apple-holds-the-future-of-computing-I-dontwant-to-be-a-developer └── index.html ├── images ├── .DS_Store ├── aqe │ ├── nonoptimisedgroupbydag.png │ ├── nonoptimisegroupbyjobs.png │ ├── optimisedgroupbydag.png │ └── optimisedgroupbyjobs.png ├── combinedlogisticregression.png ├── credit_card_class_plot.png ├── firstlogisticregression.png ├── histogram_in_r.png ├── histogram_lifexp.png ├── hpa │ ├── spark-master-auto-scale.png │ └── spark-master-single-slave.png ├── inmemoryscan │ ├── withname.png │ └── withoutname.png ├── inmemoryscanname.png ├── joinhint │ ├── broadcasthint.png │ ├── cartesianhint.png │ ├── shufflehint.png │ └── sortmergehint.png ├── langgraph │ └── hello_world_graph.png ├── mesos_output.png ├── mesos_task_run.png ├── nestedfolder.png ├── pandasonspark │ └── pandasplan ├── pursuit_of_unknown_book_cover.jpg ├── readitlater_architecture.png ├── rest_lamp.png ├── rest_mean.png ├── rss.png ├── sandbox_screen.png ├── secondlogisticregression.png ├── spark-ui-kube.png ├── spark_connect │ ├── spark_connect_spark_ui.png │ └── spark_ui_after_helloworld.png └── under_sample_class_plot.png ├── implementing-shuffle-in-mesos.html ├── implementing-shuffle-in-mesos └── index.html ├── improving-mobile-payments-with-real-time-spark.html ├── improving-mobile-payments-with-real-time-spark └── index.html ├── in-persuit-of-the-unknown-book-review.html ├── in-persuit-of-the-unknown-book-review └── index.html ├── index.html ├── interactive-scheduling-using-azkaban-challenges-in-scheduling-interactive-workloads.html ├── interactive-scheduling-using-azkaban-challenges-in-scheduling-interactive-workloads └── index.html ├── interactive-scheduling-using-azkaban-setting-up-solo-server.html ├── interactive-scheduling-using-azkaban-setting-up-solo-server └── index.html ├── interactive-workflow-management-using-azkaban.html ├── interactive-workflow-management-using-azkaban └── index.html ├── introduction-to-cloud-computing.html ├── introduction-to-cloud-computing └── index.html ├── introduction-to-flink-for-spark-developers-flink-vs-spark.html ├── introduction-to-flink-for-spark-developers-flink-vs-spark └── index.html ├── introduction-to-flink-streaming-part-1.html ├── introduction-to-flink-streaming-part-1 └── index.html ├── introduction-to-flink-streaming-part-10.html ├── introduction-to-flink-streaming-part-10 └── index.html ├── introduction-to-flink-streaming-part-2.html ├── introduction-to-flink-streaming-part-2 └── index.html ├── introduction-to-flink-streaming-part-3.html ├── introduction-to-flink-streaming-part-3 └── index.html ├── introduction-to-flink-streaming-part-4.html ├── introduction-to-flink-streaming-part-4 └── index.html ├── introduction-to-flink-streaming-part-5.html ├── introduction-to-flink-streaming-part-5 └── index.html ├── introduction-to-flink-streaming-part-6.html ├── introduction-to-flink-streaming-part-6 └── index.html ├── introduction-to-flink-streaming-part-7.html ├── introduction-to-flink-streaming-part-7 └── index.html ├── introduction-to-flink-streaming-part-8.html ├── introduction-to-flink-streaming-part-8 └── index.html ├── introduction-to-flink-streaming-part-9.html ├── introduction-to-flink-streaming-part-9 └── index.html ├── introduction-to-flink-talk.html ├── introduction-to-flink-talk └── index.html ├── introduction-to-hadoop-for-spark-developers.html ├── introduction-to-hadoop-for-spark-developers └── index.html ├── introduction-to-spark-2.0.html ├── introduction-to-spark-2.0 └── index.html ├── introduction-to-spark-data-source-api-part-1.html ├── introduction-to-spark-data-source-api-part-1 └── index.html ├── introduction-to-spark-data-source-part-1 └── index.html ├── introduction-to-spark-streaming-meetup-talk.html ├── introduction-to-spark-streaming-meetup-talk └── index.html ├── introduction-to-spark-structured-streaming-part-1.html ├── introduction-to-spark-structured-streaming-part-1 └── index.html ├── introduction-to-spark-structured-streaming-part-10.html ├── introduction-to-spark-structured-streaming-part-10 └── index.html ├── introduction-to-spark-structured-streaming-part-11.html ├── introduction-to-spark-structured-streaming-part-11 └── index.html ├── introduction-to-spark-structured-streaming-part-12.html ├── introduction-to-spark-structured-streaming-part-12 └── index.html ├── introduction-to-spark-structured-streaming-part-13.html ├── introduction-to-spark-structured-streaming-part-13 └── index.html ├── introduction-to-spark-structured-streaming-part-14.html ├── introduction-to-spark-structured-streaming-part-14 └── index.html ├── introduction-to-spark-structured-streaming-part-15.html ├── introduction-to-spark-structured-streaming-part-15 └── index.html ├── introduction-to-spark-structured-streaming-part-2.html ├── introduction-to-spark-structured-streaming-part-2 └── index.html ├── introduction-to-spark-structured-streaming-part-3.html ├── introduction-to-spark-structured-streaming-part-3 └── index.html ├── introduction-to-spark-structured-streaming-part-4.html ├── introduction-to-spark-structured-streaming-part-4 └── index.html ├── introduction-to-spark-structured-streaming-part-5.html ├── introduction-to-spark-structured-streaming-part-5 └── index.html ├── introduction-to-spark-structured-streaming-part-6.html ├── introduction-to-spark-structured-streaming-part-6 └── index.html ├── introduction-to-spark-structured-streaming-part-7.html ├── introduction-to-spark-structured-streaming-part-7 └── index.html ├── introduction-to-spark-structured-streaming-part-8.html ├── introduction-to-spark-structured-streaming-part-8 └── index.html ├── introduction-to-spark-structured-streaming-part-9.html ├── introduction-to-spark-structured-streaming-part-9 └── index.html ├── introduction-to-spark-two-part-1.html ├── introduction-to-spark-two-part-1 └── index.html ├── introduction-to-spark-two-part-2.html ├── introduction-to-spark-two-part-2 └── index.html ├── introduction-to-spark-two-part-3.html ├── introduction-to-spark-two-part-3 └── index.html ├── introduction-to-spark-two-part-4.html ├── introduction-to-spark-two-part-4 └── index.html ├── introduction-to-spark-two-part-5.html ├── introduction-to-spark-two-part-5 └── index.html ├── introduction-to-spark-two-part-6.html ├── introduction-to-spark-two-part-6 └── index.html ├── introduction-to-spark-two-part-7.html ├── introduction-to-spark-two-part-7 └── index.html ├── introduction-to-spark.html ├── introduction-to-spark └── index.html ├── java-the-language-of-the-future.html ├── java-the-language-of-the-future └── index.html ├── json-in-akka-http.html ├── json-in-akka-http └── index.html ├── kryo-disk-serialization-in-spark.html ├── kryo-disk-serialization-in-spark └── index.html ├── kryo-disk-serialization-spark └── index.html ├── latest-java-1.html ├── latest-java-2.html ├── latest-java-3.html ├── latest-java-4.html ├── latest-java-5.html ├── library-jar-mesos └── index.html ├── machine-learning-with-spark.html ├── machine-learning-with-spark └── index.html ├── matfile-to-rdd.html ├── matfile-to-rdd └── index.html ├── mesos-helloworld-scala.html ├── mesos-helloworld-scala └── index.html ├── mesos-single-node-setup-ubuntu.html ├── mesos-single-node-setup-ubuntu └── index.html ├── migrate-spark-datasource-2.4.html ├── migrate-spark-datasource-2.4 └── index.html ├── migrating-to-spark-two-part-1.html ├── migrating-to-spark-two-part-1 └── index.html ├── migrating-to-spark-two-part-10.html ├── migrating-to-spark-two-part-10 └── index.html ├── migrating-to-spark-two-part-2.html ├── migrating-to-spark-two-part-2 └── index.html ├── migrating-to-spark-two-part-3.html ├── migrating-to-spark-two-part-3 └── index.html ├── migrating-to-spark-two-part-4.html ├── migrating-to-spark-two-part-4 └── index.html ├── migrating-to-spark-two-part-5.html ├── migrating-to-spark-two-part-5 └── index.html ├── migrating-to-spark-two-part-6.html ├── migrating-to-spark-two-part-6 └── index.html ├── migrating-to-spark-two-part-7.html ├── migrating-to-spark-two-part-7 └── index.html ├── migrating-to-spark-two-part-8.html ├── migrating-to-spark-two-part-8 └── index.html ├── migrating-to-spark-two-part-9.html ├── migrating-to-spark-two-part-9 └── index.html ├── most influential-men -in-computer-field └── index.html ├── most-influential-men-in-computer-field.html ├── most-misread-stories-of-2009.html ├── most-misread-stories-of-2009 └── index.html ├── mozilla-firefox--browser-for-human-beings └── index.html ├── mozilla-firefox-browser-for-human-beings.html ├── multi-column-feature-transformation-spark-ml.html ├── multi-column-feature-transformation-spark-ml └── index.html ├── multi-source-spark-tellius.html ├── multi-source-spark-tellius └── index.html ├── nectar-developing-an-open-source-predictive-modeling-framework-on-hadoop.html ├── nectar-developing-an-open-source-predictive-modeling-framework-on-hadoop └── index.html ├── one-day-hadoop-workshop-in-bangalore.html ├── one-day-hadoop-workshop-in-bangalore └── index.html ├── page10 └── index.html ├── page11 └── index.html ├── page12 └── index.html ├── page13 └── index.html ├── page14 └── index.html ├── page15 └── index.html ├── page16 └── index.html ├── page17 └── index.html ├── page18 └── index.html ├── page19 └── index.html ├── page2 └── index.html ├── page20 └── index.html ├── page21 └── index.html ├── page22 └── index.html ├── page23 └── index.html ├── page24 └── index.html ├── page3 └── index.html ├── page4 └── index.html ├── page5 └── index.html ├── page6 └── index.html ├── page7 └── index.html ├── page8 └── index.html ├── page9 └── index.html ├── parallel-cross-validation.html ├── parallel-cross-validation └── index.html ├── persuit-of-unknown-book-review └── index.html ├── pipe-in-spark.html ├── pipe-in-spark └── index.html ├── promising-technologies-of-2010.html ├── promising-technologies-of-2010 └── index.html ├── read-it-later-in-mean-part-1.html ├── read-it-later-in-mean-part-1 └── index.html ├── read-it-later-in-mean-part-2.html ├── read-it-later-in-mean-part-2 └── index.html ├── rediscovering-implicits-scala-3-part-1.html ├── rediscovering-implicits-scala-3-part-2.html ├── rediscovering-implicits-scala-3-part-3.html ├── running-scala-programs-on-yarn.html ├── running-scala-programs-on-yarn └── index.html ├── rust-scala-part-1.html ├── rust-scala-part-1 └── index.html ├── rust-scala-part-2.html ├── rust-scala-part-2 └── index.html ├── rust-scala-part-3.html ├── rust-scala-part-3 └── index.html ├── rust-scala-part-4.html ├── rust-scala-part-4 └── index.html ├── rust-scala-part-5.html ├── rust-scala-part-5 └── index.html ├── rust-scala-part-6.html ├── rust-scala-part-6 └── index.html ├── rust-scala-part-7.html ├── rust-scala-part-7 └── index.html ├── say-hello-to-android-part-I-introduction-to-android.html ├── say-hello-to-android-part-I-introduction-to-android └── index.html ├── sbt-on-ubuntu.html ├── sbt-on-ubuntu └── index.html ├── scala-for-android.html ├── scala-for-android └── index.html ├── scala-magnet-pattern.html ├── scala-magnet-pattern └── index.html ├── scaling-spark-with-kubernetes-part-1.html ├── scaling-spark-with-kubernetes-part-1 └── index.html ├── scaling-spark-with-kubernetes-part-2.html ├── scaling-spark-with-kubernetes-part-2 └── index.html ├── scaling-spark-with-kubernetes-part-3.html ├── scaling-spark-with-kubernetes-part-3 └── index.html ├── scaling-spark-with-kubernetes-part-4.html ├── scaling-spark-with-kubernetes-part-4 └── index.html ├── scaling-spark-with-kubernetes-part-5.html ├── scaling-spark-with-kubernetes-part-5 └── index.html ├── scaling-spark-with-kubernetes-part-6.html ├── scaling-spark-with-kubernetes-part-6 └── index.html ├── scaling-spark-with-kubernetes-part-7.html ├── scaling-spark-with-kubernetes-part-7 └── index.html ├── scaling-spark-with-kubernetes-part-8.html ├── scaling-spark-with-kubernetes-part-8 └── index.html ├── scaling-spark-with-kubernetes-part-9.html ├── scaling-spark-with-kubernetes-part-9 └── index.html ├── secondary-namenode---what-it-really-do └── index.html ├── secondary-namenode-what-it-really-do.html ├── simple-akka-remote-example.html ├── simple-akka-remote-example └── index.html ├── sizeof-operator-java-scala.html ├── sizeof-operator-java-scala └── index.html ├── spark-3-datasource-v2-part-1.html ├── spark-3-datasource-v2-part-1 └── index.html ├── spark-3-datasource-v2-part-2.html ├── spark-3-datasource-v2-part-2 └── index.html ├── spark-3-datasource-v2-part-3.html ├── spark-3-datasource-v2-part-3 └── index.html ├── spark-3-datasource-v2-part-4.html ├── spark-3-datasource-v2-part-4 └── index.html ├── spark-3-datasource-v2-part-5.html ├── spark-3-datasource-v2-part-5 └── index.html ├── spark-3-datasource-v2-part-6.html ├── spark-3-datasource-v2-part-6 └── index.html ├── spark-3-introduction-part-1.html ├── spark-3-introduction-part-1 └── index.html ├── spark-3-introduction-part-10.html ├── spark-3-introduction-part-10 └── index.html ├── spark-3-introduction-part-2.html ├── spark-3-introduction-part-2 └── index.html ├── spark-3-introduction-part-3.html ├── spark-3-introduction-part-3 └── index.html ├── spark-3-introduction-part-4.html ├── spark-3-introduction-part-4 └── index.html ├── spark-3-introduction-part-5.html ├── spark-3-introduction-part-5 └── index.html ├── spark-3-introduction-part-6.html ├── spark-3-introduction-part-6 └── index.html ├── spark-3-introduction-part-7.html ├── spark-3-introduction-part-7 └── index.html ├── spark-3-introduction-part-8.html ├── spark-3-introduction-part-8 └── index.html ├── spark-3-introduction-part-9.html ├── spark-3-introduction-part-9 └── index.html ├── spark-aqe-part-1.html ├── spark-aqe-part-1 └── index.html ├── spark-aqe-part-2.html ├── spark-aqe-part-2 └── index.html ├── spark-datasource-v2-part-1.html ├── spark-datasource-v2-part-1 └── index.html ├── spark-datasource-v2-part-2.html ├── spark-datasource-v2-part-2 └── index.html ├── spark-datasource-v2-part-3.html ├── spark-datasource-v2-part-3 └── index.html ├── spark-datasource-v2-part-4.html ├── spark-datasource-v2-part-4 └── index.html ├── spark-datasource-v2-part-5.html ├── spark-datasource-v2-part-5 └── index.html ├── spark-datasource-v2-part-6.html ├── spark-datasource-v2-part-6 └── index.html ├── spark-datasource-v2-part-7.html ├── spark-datasource-v2-part-7 └── index.html ├── spark-datasource-v2-part-8.html ├── spark-datasource-v2-part-8 └── index.html ├── spark-in-javascript.html ├── spark-in-javascript └── index.html ├── spark-on-kubernetes.html ├── spark-on-kubernetes └── index.html ├── spark-pandas-part-1.html ├── spark-pandas-part-1 └── index.html ├── spark-pandas-part-2.html ├── spark-pandas-part-2 └── index.html ├── spark-plugin-part-1.html ├── spark-plugin-part-1 └── index.html ├── spark-plugin-part-2.html ├── spark-plugin-part-2 └── index.html ├── spark-plugin-part-3.html ├── spark-plugin-part-3 └── index.html ├── spark-plugin-part-4.html ├── spark-plugin-part-4 └── index.html ├── spark-plugin-part-5.html ├── spark-plugin-part-5 └── index.html ├── spark-rdd-fold.html ├── spark-rdd-fold └── index.html ├── spark-vector-to-numpy.html ├── spark-vector-to-numpy └── index.html ├── statistical-data-exploration-spark-part-1.html ├── statistical-data-exploration-spark-part-1 └── index.html ├── statistical-data-exploration-spark-part-2.html ├── statistical-data-exploration-spark-part-2 └── index.html ├── statistical-data-exploration-spark-part-3.html ├── statistical-data-exploration-spark-part-3 └── index.html ├── structured-data-processing-with-spark-sql-meetup-talk.html ├── structured-data-processing-with-spark-sql-meetup-talk └── index.html ├── tensorflow-on-spark-3.0.html ├── tensorflow-on-spark-3.0 └── index.html ├── test-containers-scala.html ├── test-containers-scala └── index.html ├── third-party-library-mesos └── index.html ├── ubuntu-at-work-part-2-Where-are-the-plugins.html ├── ubuntu-at-work-part-2-Where-are-the-plugins └── index.html ├── ubuntu-at-work-part1.html ├── ubuntu-at-work-part1 └── index.html ├── ubuntu-at-work-part3--Maven-ghost └── index.html ├── ubuntu-at-work-part3-Maven-ghost.html ├── understanding-k8s-api-part-1.html ├── understanding-k8s-api-part-1 └── index.html ├── understanding-k8s-api-part-2.html ├── understanding-k8s-api-part-2 └── index.html ├── understanding-k8s-api-part-3.html ├── understanding-k8s-api-part-3 └── index.html ├── understanding-spark-connect-1.html ├── understanding-spark-connect-2.html ├── understanding-spark-connect-3.html ├── understanding-spark-connect-4.html ├── understanding-spark-connect-5.html ├── upgrade-to-jekyll-4.txt ├── using-antlr-with-maven.html ├── using-antlr-with-maven └── index.html ├── web-forms-do-it-right-a-dzone-story.html ├── web-forms-do-it-right-a-dzone-story └── index.html ├── whats-new-in-spark-framework-improvements.html └── whats-new-in-spark-framework-improvements └── index.html /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.DS_Store -------------------------------------------------------------------------------- /.blogideas.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.blogideas.swp -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.jekyll-cache 2 | -------------------------------------------------------------------------------- /.gitignore.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.gitignore.swp -------------------------------------------------------------------------------- /.index.html.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.index.html.swp -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.nojekyll -------------------------------------------------------------------------------- /.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/.swp -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | blog.madhukaraphatak.com -------------------------------------------------------------------------------- /_drafts/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.DS_Store -------------------------------------------------------------------------------- /_drafts/.rust-scala-part-5.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.rust-scala-part-5.md.swp -------------------------------------------------------------------------------- /_drafts/.spark-3-datasource-v2-part-4.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.spark-3-datasource-v2-part-4.md.swp -------------------------------------------------------------------------------- /_drafts/.spark-3-introduction-part-10.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.spark-3-introduction-part-10.md.swp -------------------------------------------------------------------------------- /_drafts/.tensorflow-on-spark-3.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.tensorflow-on-spark-3.md.swp -------------------------------------------------------------------------------- /_drafts/.test.txt.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_drafts/.test.txt.swp -------------------------------------------------------------------------------- /_drafts/discovering-directors.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Discovering Directors : Quentin Torantino" 4 | date : 2015-01-10 5 | categories: discovering_directores 6 | --- 7 | 8 | Over the years, I have watched thousands of movies in various languages from various directors. I am kind of person who go to movies to see director's work rather than an actor or actress. So it is always fascinated how people discover different directors in different point of time of their life and how they can enjoy movies which are 20-30 years old. 9 | 10 | These is a new series of posts, where I will be sharing my experience on how I discovered different directors, their movie making styles and how I discovered different movies. 11 | 12 | May be this may help people to discover something new to watch or capture some of finer details which they may have missed. 13 | 14 | The first director I am going to talk about is Quentin Torantino. 15 | 16 | ## Quentin tornatino 17 | 18 | Quentin tornatino is my number one favorite in current time. He is one of the rock star director out there. His movies are for very specific pop culture enthusiast people still they are do appeal for broader audience. The reasons I think I like Quentin is 19 | 20 | * Novel like screen play 21 | * Incredible music 22 | * Great dialog 23 | * Superb attention to individual scenes 24 | 25 | 26 | ## How I discovered him? 27 | 28 | One of my friend, some day suggested to watch *Pulp fiction*. Now by the name of it, it sounded strange. So I gave it a shot. I started to watch without subs, and I gave up. It was so hard to me to follow the dialogs and make sense of it. 29 | 30 | Then I watched with subs. One of the striking thing was of whole film how chilled out each specific scenes. Gangster talking non revlevent stuff all the time. Most of the dialogs were not really relevent to the story but they made character look more realistic. I also liked reverse screen play which I liked from Kannada,a Indian language, movies like ssh,A of a director called upendra. 31 | 32 | So after pulp fiction,the main take away was the fun is not in overall story. The fun is in enjoying each scene and following characters. This was something different from usual English films I seen till that point of time. Most of the movies are too fast to move the plot along rather than hanging for time. This trends is there in any other directors too. 33 | 34 | Once you discover a new director, the obvious next step to see his other movies. The obvious next movie to watch was reservoir dogs. Again same characteristics of pulp but with more strong dialogs and single mindedness. I like reservoir dogs for its simplicity of overall making. It's guys in suits in a warehouse. That's it. 35 | 36 | The next film, I watched of Quentin is not what he directed. As I told before I am big fan of his script writing abilities not only directorial ones. So I watched True romance. I loved it. It's something very romantic and violent at same time. The kind of dailogues i never heard in any other movies even in his own movies. It's been one of my favorite movie of all time. 37 | 38 | After these first three movie, I watched Jackie brown. I think its one of the lowest rated movies of Quentin. But I liked it again because its hanging low aspect. It's again shows why Quentin all about scenes not about just twisting stories. 39 | 40 | Then after words I have seen all of his other movies and all of his interviews too. 41 | 42 | One of the striking factor I realized over time is by watching Quentin movies and interviews, I got introduced to 43 | 44 | * Independent film making 45 | * Scene oriented filming 46 | * Numerous other independent directors like Rober rodriguez, Kevin smith, Rich hickey etc 47 | * Western classics like Dollar trilogy 48 | 49 | So Quentin movie knowledge have been really influential on my movie watch in last few years. This is how I discovered Quentin. We will meet again with new director in next post in the series. 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /_drafts/node-as-build-tool.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Node.js- New crossplatform toolkit" 4 | categories: javascript 5 | --- 6 | Node.js started as low latency web server written in javascript. It was great hit among the 7 | javascript developers who wanted use their javascript skills to write server side. But over 8 | years node.js is grown into grown into something completelty different. It has become cross 9 | platform application development tool for creating desktop application and tools. It's kind 10 | of replacing Java to write nice desktop application which exploits all nice things about 11 | web applications 12 | 13 | The following are few reasons node.js is good desktop development platform 14 | 15 | Available on all platforms 16 | Great UI support with all greatness of web 17 | Access to filesystem 18 | Standard package system using npm 19 | 20 | Some of the example how people are building some of cool apps using node.js 21 | nothing to with server programming. 22 | 23 | ##Atom - Github texteditor written javascript 24 | Texteditor are one of crictical piece of any programming setup. Sublime,Textmate with 25 | classic vi,emacs rule the game. But having modern texteditor with javascript is always 26 | nice. The atom is one of the first realization of the dream. Before node.js if you wanted 27 | to create complete js application for desktop you need to run a server process so that 28 | you can talk to filesystem. But with node.js you dont need to run the anymore. 29 | If you want to try atom you can download [here](https://github.com/atom/atom) 30 | 31 | ##Bower - Maven of javascript 32 | No more randomly picking up javascript from internet and putting in each page. As we 33 | have great support dependecy management for language like Java, we got now same for 34 | javascript. Using bower , it joy to bootstrap different projects which uses libraries 35 | like angular.js, polymer. Before bower, we needed to know what css,js to download 36 | and what to wire where. But with bower its all super easy and straight forward. 37 | Checkout bower [here](http://bower.io/) 38 | 39 | ##Grunt - Javascript task runner 40 | We need so many things to automated in development like minimization,running builds etc. 41 | Is it not cool if we can do all this using javascript. Grunt is created for same purpose. 42 | You can script lot of things in Grunt and automate all so that you dont have to worry 43 | anymore. 44 | 45 | Checkout [Grunt](http://gruntjs.com/) 46 | 47 | These are 3 applications I use it everyday. The future is interesting 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /_drafts/sbt-on-ubuntu.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Sbt on ubuntu" 4 | categories: scala 5 | --- 6 | Sbt is official build tool used by Scala ecosystem project. So as you have maven on system 7 | whenever you deal with java projects, it's handy to have sbt on path to do thing with 8 | scala projects. Some days back i need to do some scala developement and I chose a different 9 | path of using Sbt rather than maven as usual. I need to have sbt to compile the project 10 | and getting sbt on ubuntu was not that straight forward. So after going through few blog 11 | posts and Stackover flow , i thought I document in this post 12 | 13 | Steps to install Sbt on ubuntu 14 | 15 | 16 | {%highlight sh %} 17 | 18 | #Add typesafe repository 19 | wget http://apt.typesafe.com/repo-deb-build-0002.deb ` 20 | 21 | #Install the repository 22 | sudo dpkg -i repo-deb-build-0002.deb 23 | 24 | #Refresh 25 | sudo apt-get update 26 | 27 | #Install sbt 28 | sudo apt-get install sbt 29 | 30 | {% endhighlight %} 31 | 32 | Add specific version of sbt 33 | #Some project need specific version of sbt like 0.13. If you get 34 | error saying " Detected sbt version 0.12.2 Cannot find sbt launcher 0.12.2 Please download: From typesafe.artifactoryonline.com/typesafe/ivy-releases/… 35 | To /home//.sbt/.lib/0.12.2/sbt-launch.jar" 36 | 37 | Run the following commands by replacing with you username. 38 | 39 | {%highlight sh %} 40 | mkdir -p /home//.sbt/.lib/0.12.2/ 41 | wget -O /home//.sbt/.lib/0.12.2/sbt-launch.jar 42 | http://typesafe.artifactoryonline.com/typesafe/ivy-releases/ 43 | org.scala-sbt/sbt-launch/0.12.2/sbt-launch.jar 44 | {% endhighlight %} 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /_drafts/spark-from-scratch-part-1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Spark from Scratch - Part 1" 4 | categories: spark spark-from-scratch 5 | --- 6 | This are series of blog posts, where I am putting my experience of learning Apache 7 | spark from its source code. 8 | 9 | ##Why? 10 | 11 | I have used spark over a year now and really like its simplicity. But I want to understand 12 | it from the low level. Understanding big data software is not easy as they have lakhs of line of 13 | source code. But spark is different. It has elegant small core on which most of the functionality 14 | is built. In one of the talks, Matei Zaharia, core developer of spark, mentioned that initial 15 | release, 0.1-alpha had only 1600 lines scala code which is highly encouraging. 16 | 17 | So the idea will be to start exploring spark from 0.1-alpha release and built my knowledge 18 | with subsequent releases. 19 | 20 | ##Setup 21 | 22 | This is first post in the series. In this post I am going to discuss about how to setup the 23 | environment for exploring spark code. 24 | 25 | Follow the following steps to setup 26 | 27 | * ####Clone the source code 28 | git clone https://github.com/apache/spark.git 29 | * ####Build the master 30 | export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m" 31 | mvn -DskipTests clean package 32 | * ####Checkout 0.1-alpha 33 | git checkout -b firstrelease alpha-0.1 34 | 35 | 36 | Now you will be in first release branch. 37 | The source code will be in src folder and the classes will be in build folder 38 | 39 | * ####Install Scala 2.8 40 | 41 | The spark alpha-0.1 version uses Scala 2.8. So download it from [here](http://www.scala-lang.org/download/2.8.0.final.html). 42 | Set SCALA_HOME environment variable to point it to Scala 2.8 43 | 44 | * ####Build 45 | 46 | Run 47 | make 48 | to build. Also run 49 | make jar 50 | to generate two jar files in build folder. One is Spark.jar which contains all the compiled 51 | spark code and another spark-dep.jar which contains dependencies. 52 | 53 | If you are able to run all above steps then you are ready to explore the source code. 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /_drafts/understanding-spark-connect-6.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Understanding Spark Connect API - Part 6: Dataframe Sharing Across Scala and Python" 4 | date : 2023-10-11 5 | categories: scala python spark spark-connect 6 | --- 7 | 8 | In the 3.4 version, Apache Spark has released a new client/server-based API called Spark Connect. This API will help in improving how we develop and deploy Spark applications. 9 | 10 | In this series of blogs, we are going to explore various functionalities exposed by spark-connect API. This is the sixth post in the series where we will discuss about sharing dataframe between different spark sessions between Scala and Python. You can read all the posts in the series [here](/categories/spark-connect). 11 | 12 | 13 | ## Reading Serialized Plan in Python 14 | 15 | In last post, we serialised proto buffer plan to a file from Scala. Same plan can be read from python and loaded into protocol buffer. 16 | 17 | {% highlight python %} 18 | f = open("filepath", mode="rb") 19 | plan = f.read() 20 | {% endhighlight %} 21 | 22 | In above code, we read the serialised file as binary file in python. Here we read the content of the file to binary file. 23 | 24 | Then we will parse the binary content to proto buffer plan using below code. 25 | 26 | {% highlight python %} 27 | 28 | import pyspark.sql.connect.proto as proto 29 | protoPlan = proto.Plan() 30 | protoPlan.ParseFromString(plan) 31 | 32 | {% endhighlight %} 33 | 34 | In above code, we create an empty proto object and the using *ParseFromString* method of protobuffers we read the binary content to that object. 35 | 36 | The below will be content protoPlan when we print the same. 37 | 38 | {% highlight javascript %} 39 | 40 | root { 41 | common { 42 | plan_id: 2 43 | } 44 | filter { 45 | input { 46 | common { 47 | plan_id: 1 48 | } 49 | local_relation { 50 | data: "\377\377\377\377 # truncated for output here" 51 | schema: "{\"type\":\"struct\",\"fields\": 52 | [{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}, 53 | {\"name\":\"age\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}}, 54 | {\"name\":\"salary\",\"type\":\"double\",\"nullable\":false,\"metadata\":{}}]}" 55 | } 56 | } 57 | condition { 58 | expression_string { 59 | expression: "salary > 60000" 60 | } 61 | } 62 | } 63 | } 64 | 65 | {% endhighlight %} 66 | 67 | As you can observe, it's the same plan we had in earlier example. So we have successfully read the protobuffer plan to python proto object. 68 | 69 | 70 | ## Converting Protobuffer Plan to DataFrame 71 | 72 | Once we have the plan, we can use spark-connect client to execute the plan and get the result as pandas dataframe. 73 | 74 | {% highlight python %} 75 | 76 | client = spark._client 77 | 78 | {% endhighlight %} 79 | 80 | In the above code, we get handle to spark-connect client from the spark session. 81 | 82 | Then we can use below code to convert the plan to a pandas dataframe. Here the spark-connect client will send the derserliaised dataframe plan to server and fetches the result 83 | 84 | -------------------------------------------------------------------------------- /_plugins/.jekyll_lunr_js_search.rb.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_plugins/.jekyll_lunr_js_search.rb.swp -------------------------------------------------------------------------------- /_posts/.2015-05-26-analysing-csv-data-in-spark.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2015-05-26-analysing-csv-data-in-spark.md.swp -------------------------------------------------------------------------------- /_posts/.2016-07-03-introduction-to-spark-two-part-7.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2016-07-03-introduction-to-spark-two-part-7.md.swp -------------------------------------------------------------------------------- /_posts/.2016-10-21-statistical-data-exploration-spark-part-1.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2016-10-21-statistical-data-exploration-spark-part-1.md.swp -------------------------------------------------------------------------------- /_posts/.2016-11-22-statistical-data-exploration-spark-part-3.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2016-11-22-statistical-data-exploration-spark-part-3.md.swp -------------------------------------------------------------------------------- /_posts/.2017-02-17-scaling-spark-with-kubernetes-part-3.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-02-17-scaling-spark-with-kubernetes-part-3.md.swp -------------------------------------------------------------------------------- /_posts/.2017-02-23-scaling-spark-with-kubernetes-part-4.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-02-23-scaling-spark-with-kubernetes-part-4.md.swp -------------------------------------------------------------------------------- /_posts/.2017-02-26-scaling-spark-with-kubernetes-part-5.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-02-26-scaling-spark-with-kubernetes-part-5.md.swp -------------------------------------------------------------------------------- /_posts/.2017-02-27-scaling-spark-with-kubernetes-part-6.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-02-27-scaling-spark-with-kubernetes-part-6.md.swp -------------------------------------------------------------------------------- /_posts/.2017-03-06-scaling-spark-with-kubernetes-part-7.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-03-06-scaling-spark-with-kubernetes-part-7.md.swp -------------------------------------------------------------------------------- /_posts/.2017-04-13-migrating-to-spark-two-part-1.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-04-13-migrating-to-spark-two-part-1.md.swp -------------------------------------------------------------------------------- /_posts/.2017-04-15-migrating-to-spark-two-part-2.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-04-15-migrating-to-spark-two-part-2.md.swp -------------------------------------------------------------------------------- /_posts/.2017-05-02-scaling-spark-with-kubernetes-part-8.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-05-02-scaling-spark-with-kubernetes-part-8.md.swp -------------------------------------------------------------------------------- /_posts/.2017-05-08-migrating-to-spark-two-part-3.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-05-08-migrating-to-spark-two-part-3.md.swp -------------------------------------------------------------------------------- /_posts/.2017-06-07-migrating-to-spark-two-part-10.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-06-07-migrating-to-spark-two-part-10.md.swp -------------------------------------------------------------------------------- /_posts/.2017-06-09-migrating-to-spark-two-part-6.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-06-09-migrating-to-spark-two-part-6.md.swp -------------------------------------------------------------------------------- /_posts/.2017-08-06-introduction-to-spark-structured-streaming-part-3.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-08-06-introduction-to-spark-structured-streaming-part-3.md.swp -------------------------------------------------------------------------------- /_posts/.2017-08-11-introduction-to-spark-structured-streaming-part-5.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2017-08-11-introduction-to-spark-structured-streaming-part-5.md.swp -------------------------------------------------------------------------------- /_posts/.2018-03-13-spark-vector-to-numpy.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2018-03-13-spark-vector-to-numpy.md.swp -------------------------------------------------------------------------------- /_posts/.2018-04-24-spark-datasource-v2-part-4.md.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.2018-04-24-spark-datasource-v2-part-4.md.swp -------------------------------------------------------------------------------- /_posts/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/_posts/.DS_Store -------------------------------------------------------------------------------- /_posts/2017-12-19-class-imbalance-part-1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Class Imbalance in Credit Card Fraud Detection - Part 1 : Understanding Effect on Model Accuracy" 4 | date : 2017-12-19 5 | categories: scala spark datascience class-imbalance python 6 | --- 7 | Whenever we do classification in ML, we often assume that target label is evenly distributed in our dataset. This helps the training algorithm to learn the features as we have enough examples for all the different cases. For example, in learning a spam filter, we should have good amount of data which corresponds to emails which are spam and non spam. 8 | 9 | This even distribution is not always possible. Let's take an example of fraud detection. Fraud detection is a use case, where by looking at transaction we need to decide is the transaction is fraudulent or not. In majority of the cases, the transaction will be normal. So the data for fraudulent data is very small compared to normal ones. In these cases, there will be imbalance in target labels. This will effect the quality of models we can build.So in next series of posts we will discuss about what's class imbalance and how to handle it in python and spark. 10 | 11 | This is the first post in the series where we discuss about class imbalance and it's effect on classification model accuracy. You can read all the blogs in the series [here](/categories/class-imbalance). 12 | 13 | ## Data 14 | 15 | For our example, we will use credit card fraud data. This data has more than 30 variable about transaction and target column **Class** which signifies given transaction is fraud or not. You can learn more about data in [kaggle](https://www.kaggle.com/dalpozz/creditcardfraud). 16 | 17 | ## Class Imbalance 18 | 19 | Let's plot distribution of the target label using seaborn. 20 | 21 | {% highlight python %} 22 | sns.countplot(x='Class', data=df) 23 | {% endhighlight %} 24 | 25 | The distribution looks as below 26 | 27 | ![Distribution image](/images/credit_card_class_plot.png) 28 | 29 | As you can observe from the plot, we have so many 0 (non-fraud) compared to 1 (fraud). This kind of imbalance in the target variable is known as class imbalance. 30 | 31 | 32 | ## Classification on Imbalanced Data 33 | 34 | Let's run the logistic regression to do the classification on this imbalanced data. 35 | 36 | 37 | ### Data Preparation 38 | 39 | Before running algorithm, we need normalise *Amount* column and then drop *Time* column. 40 | 41 | {% highlight python %} 42 | 43 | from sklearn.preprocessing import StandardScaler 44 | from sklearn.cross_validation import train_test_split 45 | 46 | df['normal_amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1,1)) 47 | df = df.drop(['Amount','Time'], axis=1) 48 | X = df.loc[:,df.columns != 'Class'] 49 | y = df.loc[:,df.columns == 'Class'] 50 | {% endhighlight %} 51 | 52 | 53 | ### Logistic Regression 54 | 55 | Now we will run logistic regression 56 | 57 | {% highlight python %} 58 | 59 | X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 0) 60 | 61 | # Calculate the recall score for logistic Regression on Skewed data 62 | from sklearn.linear_model import LogisticRegression 63 | from sklearn.metrics import recall_score,accuracy_score 64 | lr = LogisticRegression() 65 | lr.fit(X_train,y_train) 66 | y_pred = lr.predict(X_test) 67 | print(accuracy_score(y_test,y_pred)) 68 | {% endhighlight %} 69 | 70 | The below is the output 71 | 72 | {% highlight text %} 73 | 74 | 0.999204147794 75 | 76 | {% endhighlight %} 77 | 78 | From the above result, you may be happy that algorithm is performing extremely well . But it's not true. As most of the labels 0, even random 79 | guess gives you 99% accuracy. So we need a better measure to understand the performance of the model. 80 | 81 | 82 | ### Recall 83 | 84 | Recall is a measure which measures the ability of model to predict right for a given label. In our case, we want to test the model how accurately it can recall fraud cases as we are interested in that. We can calculate it using *recall_score*. 85 | 86 | {% highlight python %} 87 | 88 | print(recall_score(y_test,y_pred,average=None) 89 | 90 | {% endhighlight %} 91 | 92 | In above code, we are calculating recall. *average* parameter makes sure that recall is returned individually for each label. The output will be 93 | 94 | {% highlight text %} 95 | 96 | [ 0.99985931 0.61904762] 97 | 98 | {% endhighlight %} 99 | 100 | As you can observe from the results, the recall for 1.0 is only 0.61904762 compared to 99% for 0. So our model is not doing a good job of recognising frauds. So this shows that how imbalanced data is effecting accuracy of model. 101 | 102 | You can access complete code from python notebook from [github](https://github.com/phatak-dev/spark-ml-kaggle/blob/master/python/credit_card_class_imbalance.ipynb) or live notebook on [kaggle](https://www.kaggle.com/madhukaraphatak/under-sampling-to-achieve-better-recall). 103 | 104 | 105 | ## Conclusion 106 | 107 | In this post we understood what is class imbalance and how it effects the accuracy of model. 108 | 109 | ## What's Next? 110 | 111 | In our next post, we will discuss how to handle class imbalance in python. 112 | 113 | -------------------------------------------------------------------------------- /addtopages.sh: -------------------------------------------------------------------------------- 1 | cp -r _site/* . && rm -rf _site/ && touch .nojekyll 2 | git status 3 | git add . 4 | git commit 5 | -------------------------------------------------------------------------------- /categories/akka/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: akka 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 |
23 | Madhukar's Blog 24 | 40 |
41 |
42 | 43 | 44 |
45 |
46 |

Category: akka

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/androidone/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: androidone 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: androidone

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/apache/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: apache 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: apache

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/apple/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: apple 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: apple

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/art/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: art 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: art

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/beam/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: beam 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: beam

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/browser/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: browser 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: browser

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/c/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: c 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: c

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/chrome/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: chrome 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: chrome

47 | 54 |
55 |
56 | 57 | 114 | 115 | 116 | 117 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /categories/clickhouse/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: clickhouse 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: clickhouse

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/cplusplus/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: cplusplus 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: cplusplus

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/design/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: design 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: design

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/devday/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: devday 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: devday

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/developer/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: developer 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: developer

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/exploring-langgraph/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: exploring-langgraph 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: exploring-langgraph

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/firefox/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: firefox 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: firefox

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/flink-series/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: flink-series 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: flink-series

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/functional-programming/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: functional-programming 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: functional-programming

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/gplus/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: gplus 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: gplus

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/hacking/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: hacking 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: hacking

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/html5/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: html5 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: html5

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/kaggle/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: kaggle 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: kaggle

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/langgraph/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: langgraph 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: langgraph

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/llm/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: llm 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: llm

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/machine-learning/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: machine-learning 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: machine-learning

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/nectar/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: nectar 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: nectar

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/spark-summit-east-2016/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: spark-summit-east-2016 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: spark-summit-east-2016

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/testing/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: testing 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: testing

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/workshop/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: workshop 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: workshop

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /categories/yarn/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Category: yarn 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 41 |
42 | 43 | 44 |
45 |
46 |

Category: yarn

47 | 52 |
53 |
54 | 55 | 112 | 113 | 114 | 115 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /css/app.css: -------------------------------------------------------------------------------- 1 | .category{ 2 | color: #aaaaaa; 3 | } 4 | iframe.embed { 5 | width: 81%; 6 | height: 25em; 7 | } 8 | /*.site-nav { 9 | line-height: 10px; 10 | }*/ 11 | 12 | a .category:hover 13 | { 14 | color:#2DE6CF; 15 | } 16 | 17 | .excerpt { 18 | font-style: italic; 19 | font-size: medium; 20 | font-family: serif; 21 | margin-top: 1%; 22 | } 23 | body { 24 | font-family: "Open Sans", Arial, sans-serif; 25 | } 26 | 27 | pre { 28 | white-space: pre-wrap; 29 | } 30 | 31 | pre, pre > code { 32 | /* Make more code fit on small screens */ 33 | font-size: 12px !important; 34 | } 35 | 36 | img{ 37 | display: block; 38 | max-width: 100%; 39 | height: auto; 40 | } 41 | 42 | @media only screen and (min-width: 481px) { 43 | pre, pre > code { 44 | /* Bigger font on bigger screens */ 45 | font-size: 16px !important; 46 | } 47 | } 48 | 49 | .related { 50 | margin-top : 5%; 51 | } 52 | 53 | .related h2{ 54 | font-weight: bold; 55 | display: block; 56 | font-size: 100%; 57 | margin-bottom: 2%; 58 | } 59 | .related ul { 60 | list-style-type: none; 61 | } 62 | .related ul li { 63 | line-height: 1.75em; 64 | } 65 | .related span { 66 | color: #aaa; 67 | font-family: Monaco, "Courier New", monospace; 68 | font-size: 80%; 69 | text-align : left; 70 | } 71 | 72 | .post-content table { 73 | border-collapse: collapse; 74 | width: 100%; 75 | } 76 | 77 | .post-content table th,td { 78 | padding: 6px 13px; 79 | border: 1px solid #ddd; 80 | } 81 | 82 | .post-content { 83 | margin-bottom : 5%; 84 | } 85 | /* Youtube responsive */ 86 | .video-container { 87 | position: relative; 88 | padding-bottom: 56.25%; 89 | padding-top: 30px; 90 | height: 0; 91 | overflow: hidden; } 92 | .video-container iframe, 93 | .video-container object, 94 | .video-container embed { 95 | position: absolute; 96 | top: 0; 97 | left: 0; 98 | width: 100%; 99 | height: 100%; } 100 | -------------------------------------------------------------------------------- /css/long_heading.css: -------------------------------------------------------------------------------- 1 | .post-content h3{ 2 | line-height: 1.2; 3 | } 4 | -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/favicon.ico -------------------------------------------------------------------------------- /images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/.DS_Store -------------------------------------------------------------------------------- /images/aqe/nonoptimisedgroupbydag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/aqe/nonoptimisedgroupbydag.png -------------------------------------------------------------------------------- /images/aqe/nonoptimisegroupbyjobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/aqe/nonoptimisegroupbyjobs.png -------------------------------------------------------------------------------- /images/aqe/optimisedgroupbydag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/aqe/optimisedgroupbydag.png -------------------------------------------------------------------------------- /images/aqe/optimisedgroupbyjobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/aqe/optimisedgroupbyjobs.png -------------------------------------------------------------------------------- /images/combinedlogisticregression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/combinedlogisticregression.png -------------------------------------------------------------------------------- /images/credit_card_class_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/credit_card_class_plot.png -------------------------------------------------------------------------------- /images/firstlogisticregression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/firstlogisticregression.png -------------------------------------------------------------------------------- /images/histogram_in_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/histogram_in_r.png -------------------------------------------------------------------------------- /images/histogram_lifexp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/histogram_lifexp.png -------------------------------------------------------------------------------- /images/hpa/spark-master-auto-scale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/hpa/spark-master-auto-scale.png -------------------------------------------------------------------------------- /images/hpa/spark-master-single-slave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/hpa/spark-master-single-slave.png -------------------------------------------------------------------------------- /images/inmemoryscan/withname.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/inmemoryscan/withname.png -------------------------------------------------------------------------------- /images/inmemoryscan/withoutname.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/inmemoryscan/withoutname.png -------------------------------------------------------------------------------- /images/inmemoryscanname.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/inmemoryscanname.png -------------------------------------------------------------------------------- /images/joinhint/broadcasthint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/joinhint/broadcasthint.png -------------------------------------------------------------------------------- /images/joinhint/cartesianhint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/joinhint/cartesianhint.png -------------------------------------------------------------------------------- /images/joinhint/shufflehint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/joinhint/shufflehint.png -------------------------------------------------------------------------------- /images/joinhint/sortmergehint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/joinhint/sortmergehint.png -------------------------------------------------------------------------------- /images/langgraph/hello_world_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/langgraph/hello_world_graph.png -------------------------------------------------------------------------------- /images/mesos_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/mesos_output.png -------------------------------------------------------------------------------- /images/mesos_task_run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/mesos_task_run.png -------------------------------------------------------------------------------- /images/nestedfolder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/nestedfolder.png -------------------------------------------------------------------------------- /images/pandasonspark/pandasplan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/pandasonspark/pandasplan -------------------------------------------------------------------------------- /images/pursuit_of_unknown_book_cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/pursuit_of_unknown_book_cover.jpg -------------------------------------------------------------------------------- /images/readitlater_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/readitlater_architecture.png -------------------------------------------------------------------------------- /images/rest_lamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/rest_lamp.png -------------------------------------------------------------------------------- /images/rest_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/rest_mean.png -------------------------------------------------------------------------------- /images/rss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/rss.png -------------------------------------------------------------------------------- /images/sandbox_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/sandbox_screen.png -------------------------------------------------------------------------------- /images/secondlogisticregression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/secondlogisticregression.png -------------------------------------------------------------------------------- /images/spark-ui-kube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/spark-ui-kube.png -------------------------------------------------------------------------------- /images/spark_connect/spark_connect_spark_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/spark_connect/spark_connect_spark_ui.png -------------------------------------------------------------------------------- /images/spark_connect/spark_ui_after_helloworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/spark_connect/spark_ui_after_helloworld.png -------------------------------------------------------------------------------- /images/under_sample_class_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phatak-dev/blog/7025515776abe8e1686d5148bc4551d662fa02b6/images/under_sample_class_plot.png -------------------------------------------------------------------------------- /upgrade-to-jekyll-4.txt: -------------------------------------------------------------------------------- 1 | 1. Install latest ruby and Jekyll 2 | 2. Install "gem install jekyll-paginate" 3 | 3. Install "gem install webrick" 4 | --------------------------------------------------------------------------------