├── 2015
    ├── ideas-list-julia.md
    ├── ideas-list-swc.md
    ├── ideas-list.md
    ├── ideas
    │   └── skeleton.md
    ├── proposals
    │   ├── Amit.md
    │   ├── agarwal-darshan.md
    │   ├── agarwal-rajat.md
    │   ├── akshay.md
    │   ├── banaszkiewicz-piotr-amy.md
    │   ├── banaszkiewicz-piotr-server.md
    │   ├── baweja-prabh.md
    │   ├── cum_enrolment_workshops.png
    │   ├── cum_workshops.png
    │   ├── girdhar-yashasvi.md
    │   ├── ian-henriksen.md
    │   ├── li-yanghao.md
    │   ├── pankaj-kumar.md
    │   ├── schamnad-sanjeed.md
    │   └── skeleton.md
    └── wrap-up-blog-post.md
├── 2016
    ├── ideas-list-biocore.md
    ├── ideas-list-dynd.md
    ├── ideas-list-ecodata-retriever.md
    ├── ideas-list-juliaopt.md
    ├── ideas-list-juliaquantum.md
    ├── ideas-list-matplotlib.md
    ├── ideas-list-swc.md
    ├── ideas-list.md
    ├── ideas-skeleton.md
    └── proposals
    │   ├── .gitkeep
    │   ├── Ayush_Pandey.ipynb
    │   ├── Ayush_Pandey.md
    │   ├── Ayush_Pandey.pdf
    │   ├── Ramchandran.pdf
    │   ├── Ramchandran.tex
    │   ├── Roger-luo-proposal-for-JuliaQuantum.md
    │   ├── aizenman-hannah.md
    │   ├── bhargav-srinivasa.md
    │   ├── blessing-chick.md
    │   ├── chris-medrela.md
    │   ├── deng-pan.md
    │   ├── dsquareindia-gensim-proposal.md
    │   ├── filip-ter.md
    │   ├── iso-hayate.md
    │   ├── maurya-kumar-raj.md
    │   ├── missfont.log
    │   ├── narayan-aditya-visualizer.md
    │   ├── narayan-aditya-visualizer.png
    │   ├── patrick-kofod-mogensen.md
    │   ├── proposal.md
    │   ├── rai-vivek.md
    │   ├── result-aggregation-server-garg-prerit.md
    │   ├── singh-shubham.md
    │   ├── skeleton.md
    │   ├── stolliker-ryan.md
    │   ├── tucek-vit.md
    │   ├── verma-nikhil.md
    │   └── vora-meet.md
├── 2017
    ├── accepted_student_blogs.md
    ├── gensim_proposal.pdf
    ├── ideas-list-fenics.md
    ├── ideas-list-matplotlib.md
    ├── ideas-list-skeleton.md
    ├── ideas-list-stan.md
    ├── ideas-list.md
    └── proposals
    │   ├── .gitkeep
    │   ├── Chinmaya_Pancholi.md
    │   ├── Chinmaya_Pancholi.pdf
    │   ├── Data_Retriever_Shivam_Negi.docx
    │   ├── Data_Retriever_Shivam_Negi.pdf
    │   ├── Parul-Sethi.md
    │   ├── ahmed_shahnawaz.md
    │   ├── amritanshu-jain_data-retriever.mdown
    │   ├── bansal-utkarsh.md
    │   ├── bhargav-srinivasa.md
    │   ├── bill-engels-pymc3-proposal.md
    │   ├── chait-kaitlyn.md
    │   ├── dushatskiy-arkadiy.md
    │   ├── ferres_pymc3_vi.md
    │   ├── ferres_pymc3_vi.pdf
    │   ├── karkada-ashok-alekh.md
    │   ├── kumar-kapil.md
    │   ├── kvnamipara_data_retriever.mdown
    │   ├── le-hoang.md
    │   ├── markus-beuckelmann.pdf
    │   ├── michal-habera.md
    │   ├── patni-harshit.md
    │   ├── prakhar_gsoc_17.md
    │   ├── prakhar_gsoc_17.pdf
    │   ├── proposal.pdf
    │   ├── proposal_gensim.md
    │   ├── pymc3-proposal-float32-gp.md
    │   ├── ranasinghe-kanchana.md
    │   ├── saparina-irina.md
    │   ├── shannon.md
    │   ├── vachher-shubh.md
    │   └── yashchuk-ivan.md
├── 2018
    ├── ideas-list-fenics.md
    ├── ideas-list-shogun.md
    ├── ideas-list-stan.md
    ├── ideas-list.md
    └── proposals
    │   └── .gitkeep
├── 2019
    └── ideas-list.md
├── 2020
    └── ideas-list.md
├── 2021
    └── ideas-list.md
├── 2022
    └── ideas-list.md
├── 2023
    └── ideas-list.md
├── 2024
    └── ideas-list.md
├── 2025
    └── ideas-list.md
├── .gitignore
├── CONTRIBUTING-mentors.md
├── CONTRIBUTING-students.md
├── CONTRIBUTING.md
├── PULL_REQUEST_TEMPLATE
├── README.md
├── img
    ├── CVXPY-logo.png
    ├── NumFocus_LRG.png
    ├── PyBaMM.png
    ├── aeon.png
    ├── aiida.png
    ├── arviz.png
    ├── bambi.png
    ├── biocommons.png
    ├── blosc.png
    ├── cantera-logo.png
    ├── chainer-logo.png
    ├── clawpack.png
    ├── colour.png
    ├── conda_forge.png
    ├── cupy-logo.png
    ├── dash.png
    ├── dask.png
    ├── discover-cookbook.png
    ├── ecodata-retriever.png
    ├── econark.png
    ├── equadratures.png
    ├── fenics.png
    ├── flux.png
    ├── geopandas.png
    ├── gridap.png
    ├── igraph.png
    ├── jump.png
    ├── jupyter.png
    ├── lfortran.png
    ├── logo-gensim_large.png
    ├── matplotlib.png
    ├── mdanalysis.png
    ├── mesa.png
    ├── mlpack.png
    ├── mpm.png
    ├── networkx.png
    ├── nteract.png
    ├── openfhe_logo.png
    ├── optimagic_logo.png
    ├── optuna.png
    ├── pvlib.png
    ├── pymc_logo.png
    ├── pysal_logo.png
    ├── pytorchignite-logo.png
    ├── qutip.png
    ├── sciml.png
    ├── shogun-logo.png
    ├── signac.png
    ├── spyder.png
    ├── stan-logo.png
    ├── yellowbrick.png
    ├── yt-logo.png
    └── zarr.png
├── organization
    ├── README.md
    ├── message-to-accepted-students.md
    ├── message-to-rejected-students.md
    ├── operations.md
    ├── profile.md
    ├── stipend.md
    ├── summit.md
    └── team.md
└── templates
    ├── ideas-page.md
    └── proposal.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | .DS_Store
3 | Thumbs.db
4 | 


--------------------------------------------------------------------------------
/2015/ideas-list-julia.md:
--------------------------------------------------------------------------------
 1 | # Julia
 2 | 
 3 | ## JuliaWeb: The Networking / Web framework for the [Julia Programming Language](http://www.julialang.org).
 4 | 
 5 | **Please ask questions [here](https://github.com/swcarpentry/gsoc2015/issues/9)**
 6 | or [here](https://github.com/JuliaWeb/Roadmap/issues).
 7 | 
 8 | Julia is a high-level, high-performance dynamic programming language for technical computing and has seen significant adoption in various fields/areas of practice. One of the current deficiencies in the language is the lack of a robust networking / data exchange framework for obtaining and exporting data using standard network protocols. Existing efforts have relied on unstable/minimally-supported external packages; as a result, there is an underlying fragility to the network and web stacks.
 9 | 
10 | We would like to improve the robustness of the Julia network/web framework by initially focusing on the sound implementation of a TLS package that would underpin secure communications between Julia programs and the general internet community. It is our hope that the TLS package would be the official way to develop secure higher level network interfaces such as LDAPS, HTTPS, FTPS, etc. The non-secure versions of these interfaces/protocols will also need some work.
11 | 
12 | ### Technical Details
13 | 
14 | Experience with networking and network protocols from OSI layers 3 (Network) through 7 (Application) will be required, as will an ability to read and understand relevant internet standards (RFCs). Familiarity with both Julia and C / C++ will also be required. Formal software development and support experience would be beneficial but is not a firm requirement.
15 | 
16 | ### Mentors
17 | 
18 | * @sbromberger
19 | * (a few more pending)
20 | 
21 | ### Acknowledgements
22 | 
23 | * The entire Julia core team
24 | * The members of [JuliaWeb](https://github.com/JuliaWeb)
25 | 


--------------------------------------------------------------------------------
/2015/ideas-list.md:
--------------------------------------------------------------------------------
1 | # Ideas Pages
2 | 
3 | - Julia: https://github.com/numfocus/gsoc/blob/master/2015/ideas-list-julia.md
4 | - Software Carpentry: https://github.com/numfocus/gsoc/blob/master/2015/ideas-list-swc.md
5 | - SymPy: https://github.com/sympy/sympy/wiki/GSoC-2015-Ideas
6 | 


--------------------------------------------------------------------------------
/2015/ideas/skeleton.md:
--------------------------------------------------------------------------------
1 | # Title
2 | 
3 | ## Abstract
4 | 
5 | ## Technical Details
6 | 
7 | ## Open Source Development Experience
8 | 


--------------------------------------------------------------------------------
/2015/proposals/agarwal-rajat.md:
--------------------------------------------------------------------------------
 1 | # Add taxonomic name resolution to the EcoData Retriever to facilitate data-intensive ecology
 2 | 
 3 | ## Abstract
 4 | 
 5 | The EcoData Retriever is a Python based tool for automatically downloading, cleaning up, and restructuring ecological data. It does the data munging so that the users can focus on using the data.
 6 | In ecological and evolutionary data, the names of species are constantly being redefined - This makes it difficult to combine datasets. By automating reconciliation of different species names as part of the process of accessing the data in the first place it will be easier to combine diverse datasets and in new and interesting ways.
 7 | 
 8 | ## Technical Details
 9 | 
10 | The project would consists of the following components:
11 | 
12 | 1. Querying the taxonomy name resolution service:
13 | APIs of the name resolution services have to be used to fetch the possible names using Python. The name of the species would be the input to the APIs and their possible names would be returned. This will be a separate module for the purpose of reusability. As planned, as of now pytaxize would be used to fetch the possible names.
14 | 
15 | 2. Deciding on the best name
16 | From the possible names returned from different services, an algorithm to determine the best name has to be developed. This would include giving different weights to parameters like reliability of source, consistency with other services and other parameters which need to be thought of. 
17 | 
18 | 3. Updating the database
19 | Once the decision about the best name is made, this component will update the database with the best name and its location.
20 | 
21 | 4. Building control flow
22 | This component would allow the user to choose whether to run name resolution or not. Also, name resolution for the species should not run if the species name is not present in the database. This will act as a bridge between the UI-Database module and the Name Resolution module.
23 | 
24 | 
25 | ## Schedule of Deliverables
26 | 
27 | Before the coding period begins, I intend to do the following:
28 | 1. Get more familiar with the existing code base.
29 | 2. Finding the best name resolution services which are to be used.
30 | 3. Making the design decisions of various modules with the mentors.
31 | 
32 | ### May 25th -  June 7th
33 | 
34 | Getting the list of names, by interfacing with the different APIs, that are to be given as input to the best name deciding algorithm.  
35 | 
36 | ### June 8th - June 21st
37 | 
38 | Developing the best name deciding algorithm. 
39 | 
40 | ### June 22nd - July 5th
41 | 
42 | Updating the database with the best name.
43 | 
44 | ### July 6th - July 19th
45 | 
46 | Building the control flow option. 
47 | 
48 | ### July 20th - August 2rd
49 | 
50 | Complete the control flow and start fixing bugs.
51 | 
52 | ### August 3rd - August 16th
53 | 
54 | Make the code more efficient and robust with documentation. Fix more bugs.
55 | 
56 | ### August 17th - August 21st 19:00 UTC
57 | 
58 | Week to scrub code, write tests, improve documentation, etc.
59 | 
60 | ## Future works
61 | 
62 | 1. Adding more reliable name resolution APIs.
63 | 2. Improving the accuracy of the best name deciding algorithm.
64 | 3. Bug fixing the existing and adding more engines for different formats of data. For example - JSON, XML etc.
65 | 4. Adding more features to the user interface like giving the option to the user to update the names from a specific source.
66 | 5. Support for the existing code base.
67 | 
68 | ## Open Source Development Experience
69 | 
70 | I'm afraid that I have no experience contributing to open source projects (this would be my first!) but have used many open source tools and modified their code to accomplish different tasks. This has also involved reading and getting familiar with moderate sized code bases and development practices.
71 | 
72 | I am currently working with Prof. Sharad Goel from Stanford University doing research in Data Science and collaborating with other researchers using GitHub.
73 | 
74 | I have also worked with a start-up from MIT Media Lab which involved modifying and tweaking open source tools and collaborating with other developers in the team using GitHub (The repository is private).
75 | 
76 | I also like to use Git for version control and put up any projects that I do on my GitHub account.
77 | 
78 | 
79 | ## Academic Experience
80 | 
81 | Institute: BITS Pilani - Goa Campus, India
82 | Majors: B.E.(Hons.) Computer Science, M.Sc.(Hons.) Chemistry
83 | Year of Study: Sophomore
84 | 
85 | Coursework and projects are listed in my resume at - http://rajatagarwal.me/Resume.pdf
86 | 
87 | 
88 | ## Why this project?
89 | 
90 | Pursuing majors in both science and engineering has often motivated me to bring the two fields together. Given my development experience, I'm pretty excited about contributing to this project as it helps me use my engineering skills (Computer Science) benefit scientific research.


--------------------------------------------------------------------------------
/2015/proposals/akshay.md:
--------------------------------------------------------------------------------
  1 | # Add taxonomic name resolution to the EcoData Retriever to facilitate data intensive approaches to ecology.
  2 | 
  3 | ## Abstract
  4 | 
  5 | The EcoData Retriever is an engine which automates the tasks of finding,
  6 | downloading, and cleaning up ecological data files, and then stores them in a
  7 | local database of your choice. The program cleans and munges the datasets and
  8 | formats them  before inserting it into your database.
  9 | 
 10 | This project will tackle the problem of resolving scientific names stored
 11 | inconsistently across different datasets. This would allow the users to more
 12 | easily combine different datasets and answer scientific questions which was not
 13 | possible with the use of single datasets alone.
 14 | 
 15 | ## Technical Details
 16 | There are three main components in the project:
 17 | 
 18 | * Accessing and incorporating the resolution service into the retriever
 19 | 
 20 | * Building the control flow logic for querying
 21 | 
 22 | * Updating the user interfaces
 23 | 
 24 | **Accessing and incorporating the resolution service into the retriever**:
 25 | 
 26 | The main component of this project would be to build a system to resolve a
 27 | species name by sending it to one of many taxonomic name resolution services like
 28 | [Global Names Resolver](http://resolver.globalnames.org/) which is already supported by pytaxize.
 29 | and processing the result to determine the best name. This can be done by
 30 | incorporating the feature in the retriever directly or using the already
 31 | existing library [pytaxize](https://github.com/sckott/pytaxize) in the retriever.
 32 | I would propose to use pytaxize and use that in the retriever as this will
 33 | ensure modularity and help develop pytaxize in the process so that it becomes a
 34 | complete port of its R counterpart [taxize](https://github.com/ropensci/taxize).
 35 | This will involve a fair amount of work as different name resolution services
 36 | have to be tested by running variety of data and have to compared for the
 37 | results. And a fair amount of thought has to be given for cases wherein two
 38 | different services give two equally good results.
 39 | 
 40 | **Building the control flow logic for querying**:
 41 | 
 42 | The second component of this project would be to restrict the running of
 43 | resolution depending on the type of data. This can be done by adding a separate
 44 | column in the scripts so that it contains the species name. We will make sure
 45 | that the resolution will not run in the absence of the species name information in the table.
 46 | 
 47 | **Updating the user interfaces**:
 48 | 
 49 | The third component involves updating the user interface of the retriever to
 50 | felicitate users to perform Taxonomic name resolution.
 51 | 
 52 | **Additional Components**:
 53 | * Developing pytaxize
 54 | * Adding more datasets(There are a lot of dataset requests in the issues section)
 55 | 
 56 | ## Schedule of Deliverables
 57 | 
 58 | This timeline has been made keeping in mind the 40 hour requirement.I am not
 59 | planning on taking any vacations during the summer and would dedicatedly work
 60 | on the project.
 61 | 
 62 | ### May 25th -  June 7th
 63 | 
 64 | Start working on pytaxize and test different Taxonomic name resolution services
 65 | 
 66 | ### June 8th - June 21th
 67 | 
 68 | Implement these services in pytaxise and add unit tests.
 69 | 
 70 | ### June 22th - July 5th
 71 | 
 72 | Build the control flow for running or not running taxonomic name resolution
 73 | depending on the type of data and the users desires.
 74 | 
 75 | ### July 6th - July 19th
 76 | 
 77 | Update the data model and the user interfaces to work with information about
 78 | species and taxonomic name resolution.
 79 | 
 80 | ### July 20th - August 2rd
 81 | 
 82 | Finish up UI. Make sure the entire thing works properly. Make sure that the
 83 | code is well tested.
 84 | 
 85 | ### August 3rd - August 16th
 86 | 
 87 | * Work on open issues in the retriever like adding new datasets and fixing bugs.
 88 | * Work on pytaxise to make it more robust.
 89 | 
 90 | ### August 17th - August 21th 19:00 UTC
 91 | 
 92 | Week to scrub code, write tests, improve documentation, etc.
 93 | 
 94 | ## Open Source Development Experience
 95 | 
 96 | I participated in last year's Google Summer of Code under SymPy. I made
 97 | improvements to their existing Geometry module. The project was entirely object
 98 | oriented in nature. [Here](https://github.com/sympy/sympy/wiki/GSoC-2014-Application--Akshay--Geometry-Module)
 99 | is the link to my application. I have blogged about my work [Here](https://www.geekpy.blogsopt.in).
100 | Apart from that I have also contributed to Scikit-Learn.
101 | 
102 | I have been using Python for over two years now and I am quite comfortable working
103 | with it. Apart from that I have a decent knowledge of Java and C++.I use Ubuntu
104 |  14.04 as my work machine and have been using GIT for over a year now.
105 | 
106 | ## Academic Experience
107 | 
108 | I am a third year undergraduate student at Bits-Pilani, India pursuing a
109 | dual degree in B.E(Hons) Electronics and Instrumentation and M.Sc (Hons)
110 | Economics. I have taken wide range of courses from Control Systems to Probability
111 | and also courses such as Applied Econometrics which requires analysing large amounts of data.
112 | 
113 | ## Why this project?
114 | 
115 | I was going through the list of project ideas and found this really interesting.
116 | This project will improve the functionality of the retriever and benefit the users of the system.
117 | 
118 | ## Contributions to the project
119 | 
120 | ### Retriever
121 | 
122 | * [281](https://github.com/weecology/retriever/pull/281) Started working on adding
123 |   an XML engine to the retriever. This has helped me in gaining insights to the core
124 |   codebase.
125 | 
126 | * [283](https://github.com/weecology/retriever/pull/283) Removed trailing white
127 |   spaces in the entire codebase.
128 | 
129 | * [286](https://github.com/weecology/retriever/pull/286) Added eBird observation dataset.
130 | 
131 | * [288](https://github.com/weecology/retriever/pull/288) Added a few docstrings
132 |   and refactored a small code
133 | 
134 | * [289](https://github.com/weecology/retriever/pull/289) Refactored the entire Json
135 |   format_insert_value()
136 | 
137 | ### pytaxize
138 | 
139 | * [15](https://github.com/sckott/pytaxize/pull/15) Changed raise_for_status to
140 | raise_for_status()
141 | 
142 | * [17](https://github.com/sckott/pytaxize/pull/17) Input validation for ubio_search
143 | 
144 | * [21](https://github.com/sckott/pytaxize/pull/21) Input validation to col_children
145 | 
146 | * [22](https://github.com/sckott/pytaxize/pull/22) Added tests to gni module
147 | 
148 | * [23](https://github.com/sckott/pytaxize/pull/23) Tests for Ubio module
149 | 
150 | * [24](https://github.com/sckott/pytaxize/pull/24) Added tests for tnrs module
151 | 
152 | * [25](https://github.com/sckott/pytaxize/pull/25) Input validation for gbif_parse
153 | 
154 | * [26](https://github.com/sckott/pytaxize/pull/26) Input validation for itis functions
155 | 
156 | * [29](https://github.com/sckott/pytaxize/pull/29) Refactored requests code throughout
157 |   the codebase.
158 | 


--------------------------------------------------------------------------------
/2015/proposals/banaszkiewicz-piotr-server.md:
--------------------------------------------------------------------------------
  1 | # Installation-test scripts: aggregation server, script enhancements
  2 | 
  3 | ## Abstract
  4 | 
  5 | While Software Carpentry workshops gather a lot of feedback regarding
  6 | instructors' teaching, not much attention was drawn to students' computers and
  7 | their issues with installation of open source software used during workshops.
  8 | 
  9 | This project will focus on bringing a working server for aggregating of
 10 | Software Carpentry installation testing scripts with additional goal of
 11 | providing easier and better experience for students using the script itself.
 12 | 
 13 | ## Features
 14 | 
 15 | The server should be able to aggregate incoming (via REST API) data from
 16 | installation testing script.
 17 | 
 18 | The server should allow Software Carpentry admins to view statistics from
 19 | uploaded data.
 20 | 
 21 | Installation script should upload diagnostic data consisting of:
 22 | 
 23 | * operating system information
 24 | * installed packages and their versions
 25 | * failed packages checks and error messanges
 26 | 
 27 | Additionally, the data may be matched against workshop
 28 | [reference package list](https://github.com/wking/swc-setup-installation-test/issues/2).
 29 | 
 30 | ## Technical Details
 31 | 
 32 | The server will be implemented using [Django](https://www.djangoproject.com/)
 33 | and [SQLite](https://docs.djangoproject.com/en/1.7/ref/databases/) database
 34 | for lower footprint and easier maintainance (and because I feel comfortable
 35 | using them).
 36 | 
 37 | API, in a RESTful fashion, will be implemented using
 38 | [django-tastypie](http://tastypieapi.org/).
 39 | 
 40 | Rough database structure:
 41 | 
 42 | * system information table
 43 |     * operating system family (Windows / MacOSX / Linux / Other)
 44 |     * OS version (8.1 / 10.9 / Ubuntu 14.04 / ?)
 45 |     * Hardware architecture (x86_64 / x86 / Other)
 46 | * package checks table
 47 |     * package name
 48 |     * requested package version
 49 |     * found package version
 50 |     * failed check?
 51 |     * fail reason
 52 | 
 53 | Every entry in both tables will be additionally assigned a
 54 | [universally unique identifier](http://en.wikipedia.org/wiki/Universally_unique_identifier).
 55 | Every single UUID will correspond to one diagnostic data upload.
 56 | 
 57 | Additionally we might want to store workshop-specific data, ie.:
 58 | 
 59 | * track submissions from specific workshops
 60 | * track packages requested for specific workshop.
 61 | 
 62 | Enhancements to the installation testing script will use standard tools
 63 | available in Python Standard Library and on students' systems (like, for
 64 | example, `uname`).
 65 | 
 66 | I want to use Mozilla's
 67 | [Metrics-Graphics](https://github.com/mozilla/metrics-graphics) for charts and
 68 | graphs, because this JavaScript graphing seems actively developed.
 69 | 
 70 | ## Schedule of Deliverables
 71 | 
 72 | A word on my availability:
 73 | 
 74 | In the month of June I have to prepare for exams. I'll make sure to be
 75 | spending at least 20hrs per week on this project.
 76 | 
 77 | My exams are not scheduled yet, but the timeslot for them is
 78 | **June 23rd - July 7th**. In this time I won't be able to work on the project.
 79 | However, most likely I'll be over with the exams by the end of June - in that
 80 | case I'll resume working on the project as soon as I pass everything.
 81 | 
 82 | First I want to start by editing the installation testing script. I want to
 83 | enhance its capabilities in collecting diagnostic data from the system.
 84 | 
 85 | Currently, diagnostic output from the script on my system looks like this:
 86 | 
 87 | ```
 88 | ==================
 89 | System information
 90 | ==================
 91 | os.name            : posix
 92 | os.uname           : ('Linux', 'zenbook', '3.13.0-46-generic', '#79-Ubuntu SMP Tue Mar 10 20:06:50 UTC 2015', 'x86_64', 'x86_64')
 93 | platform           : linux2
 94 | platform+          : Linux-3.13.0-46-generic-x86_64-with-debian-jessie-sid
 95 | linux_distribution : ('debian', 'jessie/sid', '')
 96 | prefix             : /home/piotr/workspace/anaconda
 97 | exec_prefix        : /home/piotr/workspace/anaconda
 98 | executable         : /home/piotr/workspace/anaconda/bin/python
 99 | version_info       : sys.version_info(major=2, minor=7, micro=9, releaselevel='final', serial=0)
100 | version            : 2.7.9 |Anaconda 2.1.0 (64-bit)| (default, Dec 15 2014, 10:33:51)
101 | ```
102 | 
103 | It's very close to what I suggest in the database layout, but it's not entirely
104 | the same. To cover differences:
105 | 
106 | * for operating system family I'd use `platform.system()` instead of `os.name`
107 |   or `platform.name()` (to avoid matching, for example, "posix" to "Linux")
108 | * to discover exact system version I'd use `platform.linux_distribution()` or
109 |   `platform.release()` unless a better way exists)
110 | * to discover CPU architecture: `platform.processor()` instead of
111 |   `platform.uname()`.
112 | 
113 | ### May 25th - June 7th
114 | 
115 | Implement gathering of operating system diagnostic data. Start testing that
116 | script on at least one MacOSX machine, couple Windows boxes, and as many Linux
117 | boxes as possible.
118 | 
119 | ### June 8th - June 21st
120 | 
121 | Continue testing. Implement sending diagnostic data in the installation testing
122 | script. Change the database schema if required.
123 | 
124 | ### June 22nd - July 5th
125 | 
126 | I'm unavailable due to exams.
127 | 
128 | ### July 6th - July 19th
129 | 
130 | Implement the REST API. Provide good (100%) test coverage. Start working on
131 | a front end for Software Carpentry admins. Most likely graphs, charts, and so
132 | on will take the biggest amount of work at this point.
133 | 
134 | ### July 20th - August 2nd
135 | 
136 | Finish up UI, probably have a round of UX testing with Software Carpentry admins.
137 | 
138 | ### August 3rd - August 16th
139 | 
140 | Finish up automated testing and UX-testing. Write documentation.
141 | 
142 | ### August 17th - August 21st 19:00 UTC
143 | 
144 | In case the project finishes up earlier, I want to spend my time working on
145 | installation script (see
146 | https://github.com/wking/swc-setup-installation-test/issues/2).
147 | 
148 | ## Future works
149 | 
150 | I've been involved with Software Carpentry for almost a year now. I'm
151 | a Software Carpentry instructor, Software Carpentry Foundation member and
152 | I don't plan to leave.
153 | 
154 | ## Open Source Development Experience
155 | 
156 | 2010-2012: cooperation with
157 | [Oregon State University Open Source Lab](http://osuosl.org/):
158 | [Ganeti Web Manager](http://ganeti-webmgr.readthedocs.org/en/latest/) project
159 | (during two GSoCs and one Google Code-In).
160 | 
161 | GSoC 2014: [Peer instruction](https://github.com/pbanaszkiewicz/pitt) project
162 | for Mozilla Science Lab (and Software Carpentry).
163 | 
164 | Since January 2015: [Amy](https://github.com/swcarpentry/amy) for Software
165 | Carpentry.
166 | 
167 | ## Academic Experience
168 | 
169 | I'm studying Automatics Control and Robotics at
170 | [AGH-UST](http://www.agh.edu.pl/en) in Krakow, Poland. I know understand quite
171 | a bit of Mathematics, including optimization theory, control theory,
172 | probability, and others. Additionally I've got to know many industrial
173 | automatics systems (PLCs, robots, etc.), I'm also good at Matlab. I posses a
174 | LabView certificate (CLAD).
175 | 
176 | ## Why this project?
177 | 
178 | Because I liked it. :) And I know the resulting server will be quite useful
179 | for Software Carpentry.
180 | 


--------------------------------------------------------------------------------
/2015/proposals/cum_enrolment_workshops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2015/proposals/cum_enrolment_workshops.png


--------------------------------------------------------------------------------
/2015/proposals/cum_workshops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2015/proposals/cum_workshops.png


--------------------------------------------------------------------------------
/2015/proposals/ian-henriksen.md:
--------------------------------------------------------------------------------
  1 | # Title
  2 | Cython API For DyND
  3 | 
  4 | ## Abstract
  5 | DyND is an array math library written in C++ and designed to improve on the functionality of NumPy.
  6 | Its implementation as a C++ library allows it to be used for vector math at the C++ level without any interaction with Python objects.
  7 | It is proposed that the Python wrapper for DyND be modified to provide an API for Cython extension modules so that array arithmetic operations can be used in Cython without the overhead of interacting with Python objects.
  8 | 
  9 | ## Technical Details
 10 | Cython is language used to automatically generate C and C++ extension modules for Python.
 11 | It augments Python's syntax with optional explicit type declarations that can be used to make code more efficient on a localized basis.
 12 | Cython is commonly used for both accelerating computations that are difficult to perform efficiently in Python and for interfacing with existing libraries written in other languages via their C and C++ APIs.
 13 | Currently, Cython has good support for NumPy arrays and other array-like objects supporting Python's buffer protocol.
 14 | In Cython these kinds of objects can be declared as memory views.
 15 | They are required to have a fixed data type and a fixed number of dimensions.
 16 | They can be sliced and passed between functions in Cython without any calls to the Python C API, but any array arithmetic must be performed by explicitly looping through these arrays element-by-element.
 17 | NumPy arrays are usable within Cython, but the NumPy array object is inexorably tied to its Python API and operations on NumPy arrays cannot be performed easily without the Python API.
 18 | 
 19 | DyND, on the other hand, allows these sorts of arithmetic operations within C++ and without any dependency on operations involving python objects.
 20 | In addition, its design allows for better expression analysis and optimization at compile time.
 21 | Adding a Cython API for DyND will address the limitations that are currently a part of array arithmetic in Cython and make things like static expression analysis for arrays much easier to access when writing Python C extension modules.
 22 | Currently there are limited Cython wrappers in DyND, but they are not a part of the public API and only cover a portion of the features present.
 23 | 
 24 | ## Schedule of Deliverables
 25 | 
 26 | ### May 25 -  June 7
 27 | Modify the existing wrappers in DyND's python wrappers to make the C++ objects in DyND accessible to external Cython modules.
 28 | Make overloaded arithmetic and indexing operators properly handle exceptions in Cython.
 29 | Use the overloaded arithmetic operators at the Cython level.
 30 | 
 31 | ### June 8 - June 21
 32 | Add support for the overloaded assignment operator to the Cython API.
 33 | Overloading the assignment operator isn't currently supported in Cython, so this can be added by either adding the feature to Cython or by using Cython's support for user-specified C-names for functions.
 34 | 
 35 | ### June 22 - July 5
 36 | Provide externally available wrappers for types, arrfunc manipulation, math functions, and array iterators.
 37 | 
 38 | ### July 6 - July 19
 39 | Make Python wrapper classes publicly available for Cython modules.
 40 | Make conversion routines to and from PEP 3118 compliant objects publically available as well.
 41 | I will be at the SciPy conference for part of the week of July 6th, but I'll start working a few days before May 25 to make up for it.
 42 | 
 43 | ### July 20 - August 2
 44 | Make and test wrappers for take and groupby operations.
 45 | 
 46 | ### August 3 - August 16
 47 | Make conversions to and from Python functions and numpy gufuncs work properly within Cython.
 48 | Provide a public API for them.
 49 | 
 50 | ### August 17 - August 21 19:00 UTC
 51 | Simplify API as much as possible.
 52 | Improve documentation.
 53 | Clean up code further.
 54 | 
 55 | ## Future work
 56 | Future work will be focused on expanding the number of matrix and linear algebra operations that are available in DyND.
 57 | Once finished, the Cython API should also be kept up-to-date with the features that are finished in C++.
 58 | 
 59 | ## Open Source Development Experience
 60 | I worked for over two years as a primary contributor to BYU's lab manuals for their new applied math emphasis.
 61 | These labs are available on GitHub at byuimpact/numerical_computing.
 62 | I have submitted relatively small patches to several projects (including NumPy, SciPy, and PyFFTW) and have recently added a Cython API for BLAS and LAPACK to scipy.
 63 | The Cython API for BLAS and LAPACK makes it so that extension modules generated using Cython can now use the BLAS and LAPACK routines included in SciPy without having to link against the original libraries.
 64 | 
 65 | ## Academic Experience
 66 | I graduated from BYU in 2013 with an undergraduate degree in mathematics after attending for the 2009 and 2012 academic years.
 67 | I have been in the masters program here since August of 2013 and expect to be defending my thesis shortly before the GSOC program begins.
 68 | I am planning to begin the PhD program at BYU in the fall.
 69 | My research involves finite element analysis on spline curves.
 70 | My contribution there involves creating better refinement and evaluation techniques for certain classes of generalized spline curves.
 71 | 
 72 | ## Why this project?
 73 | I'm interested in DyND primarily because of the problems it solves.
 74 | When I was first learning to interface with other programming languages from Python, I was amazed by Fortran's ability to perform static expression analysis and optimization of array operations.
 75 | Though it is cumbersome as a language, its support for array operations is incredible.
 76 | I was troubled by the fact that, as good as it is, NumPy can't do all that Fortran does.
 77 | NumPy's array object is inseparably connected to the Python C API.
 78 | This makes it hard to perform fast vector operations on small arrays inside loops and it prevents improvements in vector libraries from providing any improvement to Cython extension modules.
 79 | I searched around to see if C++ had any sort of numpy-like options and found that most of the main vector math libraries in C++ (Eigen, Armadillo, Blaze-lib) only support operations on arrays with, at most, 3 dimensions.
 80 | What would be ideal is a library that supports a variety of memory layouts and can still statically optimize code evaluated using those arrays.
 81 | Other C++ vector libraries that handle high dimensional arrays (like blitz++) are no longer maintained. 
 82 | 
 83 | It was after reading about many of these other array libraries that I found DyND.
 84 | Its design is ideal since it allows for compile-time optimization for n-dimensional arrays at compile time and fast operations with small arrays.
 85 | NumPy's gufunc machinery is a remarkable work in its own right and I'm impressed by the fact that DyND is developing its own ArrFuncs.
 86 | I have little experience with writing a library that does static analysis like this since most of my experience has been in numerical methods, but I want to contribute because I see the massive kinds of benefits it will have.
 87 | As I learn more about the mechanics of how to use DyND, it makes me all the more anxious to help.
 88 | 
 89 | Adding a Cython API for DyND will make it possible to write extension modules for Python without having to go through the trouble of manually looping through each portion of an array.
 90 | Using DyND for this also makes it so that operations on arrays in Cython can be optimized at compile time at the library level rather than being confined to one particular way of looping through an array.
 91 | Much of the Python scientific stack is written in Cython, and providing a Cython API for DyND will make it much easier for developers to use the features of DyND in extension modules for Python.
 92 | With a Cython API in place, developers will no longer have to implement array operations in C++ and then wrap them separately.
 93 | This will make it so that little or no new C++ code is required to perform array operations within Python extension modules.
 94 | 
 95 | ## Appendix
 96 | Cython: https://github.com/cython/cython
 97 | DyND: https://github.com/libdynd/libdynd
 98 | DyND Python Wrappers: https://github.com/libdynd/dynd-python
 99 | Cython API for BLAS and LAPACK (Previous work providing related functionality): https://github.com/scipy/scipy/pull/4021
100 | BYU Applied Math Lab Manuals: https://github.com/byuimpact/numerical_computing
101 | 


--------------------------------------------------------------------------------
/2015/proposals/li-yanghao.md:
--------------------------------------------------------------------------------
 1 | # Add taxonomic name resolution to the EcoData Retriever to facilitate data-intensive approaches to ecology
 2 | 
 3 | ## Abstract
 4 | 
 5 | The EcoData Retriever is a Python based tool for automatically downloading, cleaning up, restructuring ecological data files into standard formats, and then storing them in your choice of database management systems. It does the hard work of data munging so that scientists can focus on doing science.
 6 | 
 7 | This project focuses on the problem that the names of species in ecological data are constantly being redefined. By automating reconciliation of different species names as part of the process of accessing the data in the first place, it will become much easier for users to combine diverse datasets for specific scientific research.
 8 | 
 9 | ## Technical Details
10 | 
11 | This project would build a system for sending a species name to one or more of the taxonomic name resolution services and determine the best name to appear in the generated databases. The project is composed of five main components: accessing the resolution services, determining the election strategy, updating the data model, the user interface and the control flow.
12 | 
13 | 1. **Accessing the resolution services.** This could be built by using [pytaxize](https://github.com/sckott/pytaxize) to resolve species names with specific parameters. Current `pytaxize` supports resolving names by some services, such as [Global Names Resolver](http://resolver.globalnames.org/). We could also build out `pytaxize` to support more resolution services and have the Retriever use it.
14 | 
15 | 2. **Determining the election strategy.** Since each resolution service may return many candidate names, we need to choose a best name according to the returned scores of services and some qualities of the candidate names. We also need to determine a few default standard data sources since it would be much faster than searching all data sources. For example, there are [181 sources](http://resolver.globalnames.org/data_sources) in `GNR`. My current idea is simple, which is to choose the name with highest average score from all data sources and the score also must be higher than a threshold. I will continue to talk with mentors to determine the final election strategy.
16 | 
17 | 3. **Updating the data model.** When using this name resolution, the data model is also needed to update with returned best names. I think we could add two columns, which contain "recommended species name" and "recommended score". "Recommended score" is determined by the election strategy with a value between 0 and 1. In this way, users could determine whether to adopt our recommended names.  
18 | 
19 | 4. **Updating the user interface.** This project also needs to update the user interface to allow the user to control whether or not to use this taxonomic name resolution for a particular dataset. My current idea is to add an option button beside the "download button". When users click it, an interface will come out. Users could choose whether to use taxonomic name resolution and change the default selected data sources by checking different checkboxes. There is also an input box with a text indicating which column contains the species names. This text could be obtained either from the dataset script or filled in the box by users.
20 | 
21 | 5. **Updating the control flow.** Before downloading, we should check if users choose to use taxonomic name resolution and whether the corresponding column exists. If so, we need to generate the final data with extra two columns.
22 | 
23 | ## Schedule of Deliverables
24 | 
25 | ###Before May 25th
26 | 
27 | Get more familiar with the Retriever codes.
28 | Determine the election strategy.
29 | 
30 | ### May 25th -  June 7th
31 | 
32 | Add functions to access more name resolution services in pytaxize.
33 | 
34 | ### June 8th - June 21th
35 | 
36 | Finish adding functions in pytaxize and test them.
37 | Write the module in Retriever to query names through pytaxize ports.
38 | 
39 | ### June 22th - July 5th
40 | 
41 | Do mid-term evaluations.
42 | Finish up the module to query names and process responses to choose the best name.
43 | 
44 | ### July 6th - July 19th
45 | 
46 | Test on some datasets to evaluate the replacement results and make some updates of the election strategy.
47 | 
48 | ### July 20th - August 2rd
49 | 
50 | Update the user interfaces for users to choose some options about the name resolution.
51 | 
52 | ### August 3rd - August 16th
53 | 
54 | Finish the user interfaces and make sure it works well with back-end codes.
55 | Test the entire code.
56 | 
57 | ### August 17th - August 21th 19:00 UTC
58 | 
59 | Week to scrub code, write tests, improve documentation, etc.
60 | 
61 | ## Future works
62 | 
63 | * Continue to work on the Retriever Project and finish some feature requests or fix some bugs in Retriever [Issues Page](https://github.com/weecology/retriever/issues). After I finish this gsoc project, I think I will have more ideas about how to improve the Retriever much better. 
64 | * Improve `pytaxize` by adding more functions. Since `pytaxize` is a incomplete python port of the R package [taxize](https://github.com/ropensci/taxize), it is very meaningful to extend it by adding more APIs of different data sources.
65 | 
66 | ## Open Source Development Experience
67 | 
68 | Although I don't have much open source development experience before, I'm always looking forward to have a chance to contribute more. 
69 | 
70 | * I have implemented some image processing projects in my research work and put them on the github, such as [NRSR](https://github.com/lyttonhao/NRSR) and [FH-Eigen](https://github.com/lyttonhao/FH-Eigen).
71 | * I also begin to contribute to the Retriever and pytaxize by some pull requests.
72 | 
73 | ## Academic Experience
74 | 
75 | I'm a fourth year undergraduate student at Peking University, Beijing, China. My major is computer science.
76 | 
77 | * I have implemented many course projects individually or in teams using C++, Python, JavaScript and other languages.
78 | * I am an intern student in the Institute of Computer Science & Technology of Peking University and doing some research about computer vision and image processing with Python and Matlab.
79 | * I have been an intern Software Development Engineer to finish some projects using Python in the Face Match Team of Hulu, Beijing.
80 | 
81 | ## Why this project?
82 | 
83 | I have been using Python for over one year and I think I have the capability to implement this project. This project is also very interesting to me since it could help scientists to do their research. I will be glad if I could help others through open source projects. So I think it's very meaningful and useful to improve and extend features of Retriever.
84 | 
85 | ## Contribution to the project
86 | * [Retriever #274](https://github.com/weecology/retriever/pull/274) Fix [#206](https://github.com/weecology/retriever/issues/206) to change ownership of .retriever directories to the user who does the install.
87 | 
88 | * [pytaxize #13](https://github.com/sckott/pytaxize/pull/13) Return [] for each query with no result returned instead of Raise NoResultException.
89 | 
90 | * [pytaxize #27](https://github.com/sckott/pytaxize/pull/27) Delete temporary name list file in _gnr_resolve().
91 | 
92 | * [pytaxize #33](https://github.com/sckott/pytaxize/pull/33) No matching results warning should be checked in gnr_resolve instead of _gnr_resolve.
93 | 


--------------------------------------------------------------------------------
/2015/proposals/pankaj-kumar.md:
--------------------------------------------------------------------------------
  1 | Name: Pankaj Kumar
  2 | 
  3 | Email: me@panks.me
  4 | 
  5 | Blog: [http://panks.me](http://panks.me)
  6 | 
  7 | Github: [http://gihub.com/panks](http://gihub.com/panks)
  8 | 
  9 | IRC Nick: panks
 10 | 
 11 | Country/Region: Chennai, India
 12 | 
 13 | # Title
 14 | Add taxonomic name resolution to the EcoData Retriever to facilitate data science approach to ecology
 15 | 
 16 | ## Abstract
 17 | This project deals with implementing automating reconciliation of different species names as part of the process of accessing the data in EcoData Retriever.
 18 | 
 19 | ## Technical Details
 20 | **EcoData Retriever** is python based tool for downloading, cleaning up, and restructuring ecological data. The information about the repositories are stored in .script files which contain URLs to the files containing the datasets.
 21 | 
 22 | The GUI lists the repositories and the user can download them by clicking on the download icon. Downloads are handled by the **DownloadManager** class. For every download requested information from the corresponding script file is added to the queue in **DownloadManager** class, which utilizes **DownloadThread** class, which is implemented on threading library, to fetch the dataset. 
 23 | 
 24 | The actual download operation happen is Engine class, which uses **urllib** library to fetch the dataset file, and store it locally. This operation happenes inside **download_file()** function in **./lib/engine.py** file. 
 25 | 
 26 | The problem we are facing right now is that the name of species are frequently redefined which makes it difficult to maintain consistency across the datasets and so it become difficult to combine multiple datasets together.
 27 | 
 28 | ~~How we can handle it is by implementing a function in Engine class, which updates the species names, before storing it local file.~~
 29 | 
 30 | We can implement this functionality in **pytaxize**, and use it in retriever.
 31 | 
 32 | In function **download_file()** in Engine after we fetch the file using urllib and check for clean line endings ~~we can pass the file to another function which updates the species names before we save it locally.~~ we can make a call to pytaxize, which return the list of canonical names and we can pick among the returned list, based on score or a combination of score and other parameters as suitable.
 33 | 
 34 | For implementing such a function we would require a source for updated/accepted species name. As given in description of the project we can make a call to one or more of the existing web services to get that information. For example, as mentioned in the description one such service is **Taxonomic Name Resolution Service** (also **GNR**). One way we could handle it is by making a list of all the species names once we are done downloading the dataset in download_file() function, and make a call to TNRS/GNR API
 35 | passing the list and retrieving the accepted names for the given species. Once we get the updated list of species name we can make those changes in the dataset and then store it locally.
 36 | 
 37 | The service used for each dataset (.script file) can be different, so one way to go about this would be to include this information in the corresponding .script files itself, which can then be passed to the Engine class to be utilized for getting updated names.
 38 | 
 39 | Also, I feel this process to update the names would add a significant amount of time to the total download time of a dataset. So I feel it would a good idea to maybe include a checkbox along with each dataset in the GUI to enable user to skip this operation in case the user doesn’t want updated name set.
 40 | 
 41 | ## Schedule of Deliverables
 42 | 
 43 | ### May 25th -  June 7th
 44 | Setup the system, get completely familiar with the Retriever and pytaxize codebase. Finalize the implementation details with the mentor.
 45 | 
 46 | ### June 8th - June 21st
 47 | Get familiar with any dependencies required for adding name resolution to pytaxize. Decide with the mentor on what sources to use for each of the dataset. Start implementing/updating the function in pytaxize and Engine class of Retriever.
 48 | 
 49 | ### June 22nd - July 5th
 50 | Finish implementation of the function and test it on a small datasets to see if the files are stored locally successfully with updated names. Take feedback from the mentor make any changes required. Submit Mid-term report.
 51 | 
 52 | ### July 6th - July 19th
 53 | Gather information on all the sources needed to fetch the updated species names for each of the dataset present with retriever currently, and the parameters to use for picking the names, and add that information to .script files.
 54 | 
 55 | ### July 20th - August 2nd
 56 | Check if the current implementations works for all the dataset presents, introduce more parameters if required
 57 | 
 58 | ### August 3rd - August 16th
 59 | Make any changes needed the GUI to incorporate this utility. Finalize the code and take feedback from the mentor.
 60 | 
 61 | ### August 17th - August 21th 19:00 UTC
 62 | Code polish and merge back to the master. Submit final evaluation.
 63 | 
 64 | ## Future works
 65 | Addition of dataset to Retriever
 66 | 
 67 | ## Open Source Development Experience
 68 | 
 69 | **digiKam**:
 70 |  
 71 | Made batch processing of images parallel on a multicore system. Implemented image filter based of image ratio and few more factors. Mentored GSoC project, which involved writing plugin to upload images to cloud services like Google drive and Dropbox.
 72 | 
 73 | **Caligra**:
 74 | 
 75 | Implemented pdf-filter so that editing of pdf documents can be done inside Caligra Words.
 76 | 
 77 | **MOOL Project** ([http://dos.iitm.ac.in/projects/MOOL/](http://dos.iitm.ac.in/projects/MOOL/)): 
 78 | This project deals with porting the Linux kernel to C++, my part was to port the ext4 file-system to C++.
 79 | 
 80 | ## Academic Experience
 81 | I am currently a **5th year Dual-Degree** (B. Tech + M. Tech) student at **Indian Institute of Technology Madras** in Computer Science and Engineering Department. I am a FOSS enthusiastic and I contribute to open-source projects in my free time.
 82 | 
 83 | This projects deal with implementing automating reconciliation of different species names as part of the process of accessing the data in EcoData Retriever. Which requires the knowledge Python which I’m familiar with and have also used Web Services via APIs in past in few of my projects. For example: ‘Yahoo! Just-Dial’ service, which I made as a part of Yahoo! HackU, which ended up with one of the winning projects. [https://github.com/panks/Yahoo-HackU--Yahoo-Justdial](https://github.com/panks/Yahoo-HackU--Yahoo-Justdial)
 84 | 
 85 | Also, I have participated in GSoC (2013) and similar programs before too, so this will not be my first time participation. During **GSoC (2013) I mentored two students** under KDE organization and the projects were successfully completed.
 86 | 
 87 | Apart from these I have also interned at Major software companies, which include Yahoo! and Microsoft, during my summer vacations.
 88 | 
 89 | ## Why this project?
 90 | I liked this project while going through the list of organization and their idea pages. Also, my masters project deals with processing of data and learning from a given set of reactions and predicting new pathway for synthesizing other novel organic compounds. For which I’m using KEGG database to get the set of reactions and mol files of the compounds involved.
 91 | 
 92 | ## Appendix
 93 | 
 94 | I submitted patches for following issues
 95 | 
 96 | ### pytaxize:
 97 | 
 98 | [#8](https://github.com/sckott/pytaxize/issues/8) Name resolution ([pull request](https://github.com/sckott/pytaxize/pull/19) and [pull request](https://github.com/sckott/pytaxize/pull/28))
 99 | 
100 | [#10](https://github.com/sckott/pytaxize/issues/10) Fix names_list() fxn that reads local csv files ([pull request](https://github.com/sckott/pytaxize/pull/18))
101 | 
102 | [#12](https://github.com/sckott/pytaxize/issues/12) Support multiple names for gnr_resolve() ([pull request](https://github.com/sckott/pytaxize/pull/19) and [pull request](https://github.com/sckott/pytaxize/pull/20))
103 | 
104 | 
105 | ### retriever
106 | 
107 | [#275](https://github.com/weecology/retriever/issues/275) Add Global Wood Density Database ([pull request](https://github.com/weecology/retriever/pull/284))
108 | 
109 | [#205](https://github.com/weecology/retriever/issues/205) download command should probably fail when specified path does not exist ([pull request](https://github.com/weecology/retriever/pull/287))
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/2015/proposals/schamnad-sanjeed.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2015/proposals/schamnad-sanjeed.md


--------------------------------------------------------------------------------
/2015/proposals/skeleton.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | 
 3 | ## Abstract
 4 | 
 5 | ## Technical Details
 6 | 
 7 | ## Schedule of Deliverables
 8 | 
 9 | ### May 25th -  June 7th
10 | 
11 | **You need to accomplish this to mid-term.**
12 | 
13 | ### June 8th - June 21th
14 | 
15 | **You need to accomplish this to mid-term.**
16 | 
17 | ### June 22th - July 5th
18 | 
19 | ### July 6th - July 19th
20 | 
21 | ### July 20th - August 2rd
22 | 
23 | ### August 3rd - August 16th
24 | 
25 | ### August 17th - August 21th 19:00 UTC
26 | 
27 | **Week to scrub code, write tests, improve documentation, etc.**
28 | 
29 | ## Future works
30 | 
31 | ## Open Source Development Experience
32 | 
33 | ## Academic Experience
34 | 
35 | ## Why this project?
36 | 
37 | ## Appendix
38 | 


--------------------------------------------------------------------------------
/2015/wrap-up-blog-post.md:
--------------------------------------------------------------------------------
 1 | [NumFOCUS](http://numfocus.org/)
 2 | is a public charity in the United States
 3 | that supports and promotes world-class, innovative, open source scientific software
 4 | (specially the scientific Python stack, [Julia](http://julialang.org/) and [rOpenSci](http://ropensci.org/)).
 5 | 
 6 | This is the first time that NumFOCUS participated in Google Summer of Code (GSoC)
 7 | although many of the projects that NumFOCUS support participated in many
 8 | previous editions as mentoring organizations or under the umbrella of one
 9 | mentoring organization (for example the [Python Software
10 | Foundation](https://www.python.org/psf/)).
11 | 
12 | The students working with us completed three projects
13 | and we are grateful for their incredible work,
14 | the many hours that their mentors spent during the summer
15 | and the amazing people that helped us with the GSoC application.
16 | 
17 | #   Cython API for DyND
18 | 
19 | [DyND](https://github.com/libdynd/libdynd) is a C++ library for dynamic, multidimensional arrays.
20 | The motivation for this project was to make DyND available
21 | for the Python scientific stack,
22 | that uses Cython extensively,
23 | without having to go through the trouble of manually looping through each portion of an array.
24 | 
25 | If you have ever use Cython you know that this was not a easy
26 | project to develop. We are happy that Ian completed the project
27 | even though he spent many hours hunting for bugs and reorganizing code
28 | (something that he did not expect).
29 | 
30 | Read more about it at [Ian's post](https://insertinterestingnamehere.github.io/posts/gsoc-concluding-thoughts.html).
31 | 
32 | #   Enhance AMY, a workshop-management platform for Software Carpentry
33 | 
34 | The [Software Carpentry Foundation](http://software-carpentry.org/scf/index.html) (SCF)
35 | is a non-profit volunteer organization
36 | whose members teach researchers basic software skills.
37 | SCF started to run over a hundred workshops worldwide a year
38 | and managing the workshops became a problem.
39 | 
40 | During the summer, Piotr worked to enhance [AMY](https://github.com/swcarpentry/amy/),
41 | a Django application that manages SCF's workshops,
42 | adding many new features, fixing bugs
43 | and helping SCF's program coordinators to keep updated
44 | with all the changes in AMY.
45 | 
46 | The option to create yet another tool from the scratch
47 | instead of use a CRM solution like [CiviCRM](https://civicrm.org/)
48 | was based on the fact that (1) most of the SCF members have some knowledge of
49 | Python so they could help maintain AMY in the long run
50 | and (2) an small Django application could fit better for
51 | others organizations running software training like
52 | [PyLadies](http://www.pyladies.com/) and [Django Girls](http://www.pyladies.com/).
53 | 
54 | Read more about it at [Piotr's post](http://piotr.banaszkiewicz.org/blog/2015/09/05/amy-update-7/).
55 | 
56 | #   JuliaQuantum: Framework for solvers
57 | 
58 | [Julia](http://julialang.org/) is a high-level,
59 | high-performance dynamic programming language for technical computing,
60 | with syntax that is familiar to users of other technical computing environments.
61 | In the last few years Julia got many third party libraries,
62 | which integrate external libraries or use native implementations in Julia.
63 | One of those efforts is [JuliaQuantum](https://juliaquantum.github.io/),
64 | which aims to provide tools and frameworks for dealing with problems
65 | from quantum mechanics and quantum information science.
66 | 
67 | During the summer Amit worked
68 | on a framework for solving dynamical equations for JuliaQuantum.
69 | He integrated several solvers like the Quantum Monte-Carlo Wave Function Method.
70 | The [new interface](https://github.com/JuliaQuantum/QuDynamics.jl) makes it easy to add new solvers
71 | and to test different methods for a given problem.
72 | 
73 | Read more about it at [Amit's post](https://juliaquantum.github.io/news/2015/08/GSoC2015-Wrap-up-and-Outlook).
74 | 


--------------------------------------------------------------------------------
/2016/ideas-list-biocore.md:
--------------------------------------------------------------------------------
 1 | # biocore's Idea Page
 2 | 
 3 | To be clear, your own ideas are most welcome!
 4 | 
 5 | ## A general purpose next-generation sequence pre-processor for QIIME 2
 6 | 
 7 | Various quality control and other sequence "pre-processing" steps are required of microbiome "next-generation" sequencing data before it can used in a QIIME analysis. We propose the development of a general purpose sequence pre-processor that would be developed as a [QIIME 2 plugin](https://github.com/biocore/qiime2). This is described in detail in an issue [here](https://github.com/biocore/qiime/issues/1954).
 8 | 
 9 | | **Intensity** | Trivial - Easy - Moderate - Hard |
10 | | **Involves** | |
11 | | **Mentors** | [@foo][], [@bar][] |
12 | 
13 | ### Technical Details
14 | 
15 | ### Open Source Development Experience
16 | 
17 | ### First steps
18 | 
19 | ## A scikit-bio-based bioinformatics file format converter
20 | 
21 | In bioinformatics, there are many defined file formats that represent very similar data. [scikit-bio] has a powerful I/O framework that enables users to load diverse formats into their relevant in-memory representations, regardless on input file format. It would be useful for many bioinformatics end-users (who are often not comfortable working with APIs) to have a file format converter for formats supported in scikit-bio. We'd want to develop a scikit-bio Python API for file format conversions and then develop a simple CLI and/or GUI app to wrap this (this app would likely be separate from scikit-bio). This project would also likely involve the development of additional file format readers and writers for scikit-bio, to increase the power of this application and of scikit-bio.
22 | 
23 | | **Intensity** | Trivial - Easy - Moderate - Hard |
24 | | **Involves** | |
25 | | **Mentors** | [@foo][], [@bar][] |
26 | 
27 | ### Technical Details
28 | 
29 | ### Open Source Development Experience
30 | 
31 | ### First steps
32 | 
33 | [scikit-bio]: http://scikit-bio.org/
34 | 


--------------------------------------------------------------------------------
/2016/ideas-list-juliaopt.md:
--------------------------------------------------------------------------------
  1 | # JuliaOpt
  2 | 
  3 | To be clear, your own ideas are most welcome!
  4 | 
  5 | ## Develop a Julia interface to the [SCIP](http://scip.zib.de/) solver
  6 | 
  7 | ### Abstract
  8 | 
  9 | SCIP is a powerful noncommerical solver for [mixed-integer programming](https://en.wikipedia.org/wiki/Integer_programming) and
 10 | [constraint programming](https://en.wikipedia.org/wiki/Constraint_programming).
 11 | It provides advanced features like [callbacks](http://jump.readthedocs.org/en/latest/callbacks.html) which are used to attack challenging combinatorial problems
 12 | like the [TSP](https://en.wikipedia.org/wiki/Travelling_salesman_problem).
 13 | The goal of this project is to develop a fully featured and documented
 14 | interface to SCIP through its C API, which will enable
 15 | users to access SCIP and all of its advanced features from Julia,
 16 | and in particular from the modeling interfaces
 17 | [JuMP](https://github.com/JuliaOpt/JuMP.jl)
 18 | and [Convex.jl](https://github.com/JuliaOpt/Convex.jl).
 19 | 
 20 | | **Intensity** | **Involves**  | **Mentors** |
 21 | | ------------- | --------------|------------ |
 22 | | Moderate | Julia, C, SCIP, Linear and Integer Programming, Combinatorial Optimization | [@mlubin][], [@joehuchette][], [@rgaiacs][] |
 23 | 
 24 | ### Motivation
 25 | 
 26 | Julia already has wrappers for many optimization solvers (listed [here](http://www.juliaopt.org/)). Compared with the open-source mixed-integer programming solvers currently supported (GLPK, Cbc), SCIP is faster and still provides access to its source code for research purposes (although unfortunately not under an open-source license). Compared with the commercial solvers, SCIP has a much lower barrier to entry in terms of licensing costs. Google itself has chosen to use SCIP internally and supports it through its [or-tools](https://github.com/google/or-tools) package.
 27 | 
 28 | Providing first-class access to SCIP from Julia will enable new applications of 
 29 | integer programming, made easier by Julia and JuMP's high-level syntax and abstractions
 30 | over solvers. SCIP will be suitable for use in courses, research, and industrial applications.
 31 | 
 32 | ### Technical details
 33 | 
 34 | SCIP has quite a large API, so it is a good idea to generate
 35 | the Julia wrappers automatically. There was a
 36 | [previous attempt](https://github.com/ryanjoneil/SCIP.jl) at wrapping
 37 | SCIP which may serve as a useful starting point. Since then,
 38 | the ``Ref{}`` syntax in Julia 0.4 may make some of the wrapping
 39 | easier than before.
 40 | 
 41 | ## Open Source Development Experience
 42 | 
 43 | This project requires knowledge of basic linear programming, experience with C, and ideally experience with JuMP or another algebraic modeling language.
 44 | 
 45 | ### Contact
 46 | 
 47 | [JuliaOpt list](https://groups.google.com/forum/#!forum/julia-opt)
 48 | 
 49 | [NumFocus GSOC list](https://groups.google.com/a/numfocus.org/forum/#!forum/gsoc)
 50 | 
 51 | 
 52 | ## Solve complex SDPs with the [Convex.jl](https://github.com/JuliaOpt/Convex.jl/i) modeling language
 53 | 
 54 | ### Abstract
 55 | 
 56 | **Convex.jl** is a [Julia](http://julialang.org) package for [Disciplined Convex Programming](http://dcp.stanford.edu/). Convex.jl makes it easy to describe optimization problems in a natural, mathematical syntax, and to solve those problems using a variety of different (commercial and open-source) solvers, through the [MathProgBase](http://mathprogbasejl.readthedocs.org/en/latest/) interface.
 57 | This project would add support for solving complex semidefinite programs (SDP) to Convex.jl.
 58 | 
 59 | | **Intensity** | **Involves**  | **Mentors** |
 60 | | ------------- | --------------|------------ |
 61 | | Moderate | Julia, Convex Programming | [@madeleineudell][], [@mlubin][], [@dvij][] |
 62 | 
 63 | ### Motivation
 64 | 
 65 | Convex.jl is widely used in industry and research to solve structured 
 66 | convex optimization problems, including LP, SOCP, and SDP with real variables and data.
 67 | This project extends the problem types that can be solved using Convex.jl.
 68 | 
 69 | Many problems in applied mathematics, engineering, and physics are most
 70 | naturally posed as convex optimization problems over complex valued
 71 | variables and with complex valued data. These include
 72 | 
 73 | a) Phase retrieval from sparse measurements.
 74 | b) Optimization problems in AC power systems
 75 | c) Frequency domain analysis in signal processing and control theory
 76 | 
 77 | While optimization over complex numbers can always be encoded as
 78 | optimization over real variables through transformations, this often
 79 | results in significant overhead (both in user effort and computation
 80 | time) in many applications. Support for complex convex
 81 | optimization in Convex.jl would boost the usage of Julia
 82 | as a language of choice for users working on these and other
 83 | applications.
 84 | 
 85 | ### Technical details
 86 | 
 87 | This project would add support for complex variables and data to Convex.jl.
 88 | This work entails writing functions to transform complex SDPs into equivalent
 89 | real valued SDPs, and to transform the solutions back from real to complex
 90 | variables. 
 91 | 
 92 | Students with further background and motivation could continue to improve
 93 | the SDP solver itself. In particular, the transformations used by Convex.jl
 94 | to write a problem as an SDP often introduce many extra variables and constraints
 95 | than are necessary, and may result in poor conditioning. A presolve routine,
 96 | eliminating redundant variables and constraints and improving conditioning before
 97 | passing the problem to a solver, would be a welcome addition to the Convex.jl library.
 98 | While many tricks for presolving LPs are well known, there is significant room for 
 99 | imagination in writing a presolve for SDP; the project might well lead to a publication
100 | were the GSoC student so inclined.
101 | 
102 | ## Open Source Development Experience
103 | 
104 | This project requires knowledge of basic linear algebra, convex optimization, 
105 | and Julia programming.
106 | 
107 | ### Contact
108 | 
109 | [JuliaOpt list](https://groups.google.com/forum/#!forum/julia-opt)
110 | 
111 | [NumFocus GSOC list](https://groups.google.com/a/numfocus.org/forum/#!forum/gsoc)
112 | 
113 | [@dvij]: https://github.com/dvij
114 | [@joehuchette]: https://github.com/joehuchette
115 | [@madeleineudell]: https://github.com/madeleineudell
116 | [@mlubin]: https://github.com/mlubin
117 | [@rgaiacs]: https://github.com/rgaiacs
118 | 


--------------------------------------------------------------------------------
/2016/ideas-list-juliaquantum.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages - JuliaQuantum
 2 | 
 3 | ## Overview
 4 | 
 5 | [JuliaQuantum](http://juliaquantum.github.io) is an open-source organization to build libraries in Julia for Quantum science and technology. 
 6 | Through the fast development of last year--especially by successfully completing a GSoC project, the organization has implemented a framework of libraries to represent basic concepts in quantum mechanics and to solve foudamental quantum dynamics equations. 
 7 | However, there are still just a few breaks of chains to fully promote the existing projects to be as useful as other packages written in other programming languages. Implementing some binding packages with applications to existing packages are the focus of the ideal J/GSoC projects this year in JuliaQuantum:
 8 | 
 9 | - Generalizing and enhancing the existing quantum state type system. For example, developing a "proper Array-with-basis type/package"--basically replacing our `QuArray` stuff and making it available for others. This may also be useful outside JuliaQuantum. In the mean time, we are still missing the tensor network representation for some quantum many-body simulations.  
10 | - Optimizing and enhancing the current quantum dynamics solvers and including more well-developed equation solvers for users to choose. Also building a plugin to let the solvers easily compatible with various parallel computing strategies. ***@obiajulu*** proposed to implement symplectic splitting methods for time-dependent equations. In the mean time, implementing new solvers for stochastic quantum dynamics and the like would be helpful.
11 | - Building a new package on the application level which can bind most of our existing repos for a fairly large group of potential users. Some possible application directions are many-body physics simulations and quantum information applications.
12 | - Visualizing the abstracts: It would be cool and useful to have a package to visualize quantum states on the Bloch sphere, quantum circuits and fidelity matrix etc using a framework which allows it to switch the underlying plotting platform. 
13 | - Fully building a symbolic calculas package for quantum machenics. Although the [SymPy.jl](https://github.com/jverzani/SymPy.jl) package is enough for calling the symbolic package in Python, our community users has found some important features not implemented in the original SymPy Python package. It would be nice if one can implement a symbolic representing package interfacing with our existing types and ecosystem for quantum science and extentable to other fields. The native type system of Julia could be a very essential feature to implement this idea efficiently. Our current [QuDirac.jl](https://github.com/JuliaQuantum/QuDirac.jl) project could be extended to this direction. 
14 | 
15 | ### Discussion threads
16 | 
17 | -Discussions can be found in the [JuliaQuantum.github.io repo](https://github.com/JuliaQuantum/JuliaQuantum.github.io/issues/32). 
18 | 
19 | ## Current proposal: Julia Quantum: Framework for Quantum Computation Simulators
20 | 
21 | ### Motivation
22 | 
23 | As quantum computation implementation in experiments grows rapidly, a toolkit for bridging the gap between experimentalist and theorists could be necessary. To be specific, Translating quantum algorithm to quantum circuits or Hamiltonians in other models with a lower resource cost is important for both experimentalists and theorists.
24 | 
25 | This project will help scientists to design better architecture of quantum computers in real life. It will link the existing base libraries under JuliaQuantum to the widely known quantum information application level, and outline a framework of the type system and necessary solvers in Julia. 
26 | 
27 | ### Proposer
28 | 
29 | Name: Xiujie (Roger) Luo: 
30 | 
31 | Email: rogerluo@mail.ustc.edu.cn
32 | 
33 | GitHub profile: [@Roger-luo](https://github.com/Roger-luo)
34 | 
35 | Location/Timezone: China, CST
36 | 
37 | University: University of Science and Technology of China
38 | 
39 | ### Mentors and contact information
40 | 
41 | 1. Yongjian Han:
42 |   
43 |   Professor, Key Lab of Quantum Information, University of Science and Technology of China.
44 |   
45 |   Contact: smhan@ustc.edu.cn
46 |   
47 | 2. Alexander Croy:
48 |   
49 |   Postdoctoral fellow, Max Plank Institure for the Physics of Complex Systems.
50 |   
51 |   Contact: [@acroy](https://github.com/acroy)
52 |   
53 | ### Detailed Proposal
54 | 
55 | [Julia Quantum: Framework for Quantum Computation Simulators](https://github.com/numfocus/gsoc/blob/master/2016/proposals/Roger-luo-proposal-for-JuliaQuantum.md).


--------------------------------------------------------------------------------
/2016/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFocus for Google Summer of Code 2016.
 4 | Since NumFocus is a umbrella organization you will only find links to the ideas
 5 | page of each organization under NumFocus umbrella at this page.
 6 | 
 7 | - biocore: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-biocore.md
 8 | - bokeh: https://github.com/bokeh/bokeh/wiki/GSOC-2016-Ideas
 9 | - DyND: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-dynd.md
10 | - EcoData Retriever: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-ecodata-retriever.md
11 | - Gensim: https://github.com/piskvorky/gensim/wiki/Student-Projects
12 | - JuliaOpt: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-juliaopt.md
13 | - JuliaQuantum: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-juliaquantum.md
14 | - matplotlib: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-matplotlib.md
15 | - Pandas: https://github.com/pydata/pandas/wiki/Google-Summer-of-Code
16 | - Software Carpentry: https://github.com/numfocus/gsoc/blob/master/2016/ideas-list-swc.md
17 | 


--------------------------------------------------------------------------------
/2016/ideas-skeleton.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | 
 3 | ## Abstract
 4 | 
 5 | Very short description of the project.
 6 | 
 7 | | **Intensity** | **Involves**  | **Mentors** |
 8 | | ------------- | --------------|------------ |
 9 | | Trivial - Easy - Moderate - Hard | | [@foo][], [@bar][] |
10 | 
11 | ## Technical Details
12 | 
13 | Long description of the project.
14 | **Must** include all technical details of the projects like libraries involved.
15 | 
16 | ## Open Source Development Experience
17 | 
18 | List of background experience that we expected from the student.
19 | 
20 | ### First steps
21 | 
22 | Students doesn't need to do this before Google Summer of Code code period starts
23 | but will be good if they do just because they will be sure if this is how they
24 | want to spend the summer.
25 | 


--------------------------------------------------------------------------------
/2016/proposals/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2016/proposals/.gitkeep


--------------------------------------------------------------------------------
/2016/proposals/Ayush_Pandey.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2016/proposals/Ayush_Pandey.pdf


--------------------------------------------------------------------------------
/2016/proposals/Ramchandran.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2016/proposals/Ramchandran.pdf


--------------------------------------------------------------------------------
/2016/proposals/aizenman-hannah.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | Categorical axes
 3 | 
 4 | ## Abstract
 5 | matplotlib 1.5 added direct support for plotting data frames.
 6 | However, there are still a few related tasks yet to be done.  An
 7 | important one is detecting when plotting categorical data
 8 | (i.e. enumerations) and updating the tick labels accordingly.
 9 | 
10 | | **Intensity** | **Involves**  | **Mentors** |
11 | | ------------- | --------------|------------ |
12 | | Intermediate | Python, Pandas, Databases, Data science | [@tacaswell][] |
13 | 
14 | ## Technical Details
15 | More broadly, this task will involve designed new user-friendly APIs
16 | to more automatically deal with certain types of data.
17 | 
18 | This work may include:
19 | 
20 | - implementing categorical axis in top of the mpl units framework (this may or
21 |   may not be possible / make sense)
22 | - implement improved / combined API for bar plots
23 |     - this API should cover all of the styles currently exposed by hist
24 | - implement proper 2D heat map API
25 |     - based on imshow or pcolormesh?
26 |     - move hinton demo into main API?
27 | - ensuring that the interactive features are categorical aware
28 | - sort out how / if multiple categorical artists should interact with
29 |   each other. This may interact with the Compound Artists project.
30 | - implement API for categorical color mapping
31 | - ensure everything works with: pandas, altair, seaborn
32 |   
33 | ## Schedule of Deliverables
34 | 
35 | ### May 25th -  June 7th
36 | Understanding the mpl units framework and prototyping of implementing catagorical axis. Work out the requirements for supporting common catagorical data types: pandas dataframes, numpy structured arrays, dictionaries, etc.
37 | 
38 | ### June 8th - June 21th
39 | Fully implement catagorical axis, with tests, and incorporate into APIs:
40 | * barplot
41 | * lineplot & scatterplot (maybe being able to pick up xlabel and ylabel natively?)
42 | 
43 | ### June 22nd - July 5th
44 | Implement proper 2D heat map API:
45 |   - based on imshow or pcolormesh?
46 |   - move hinton demo into main API?
47 |   - make heatmap catagorically aware
48 |   - 
49 | ### July 6th - July 19th
50 | Implement API for categorical color mapping (earlier because seems easier than interactivity)
51 |   
52 | ### July 20th - August 2nd
53 | Work on interactive features: 
54 | * ensure catagorically aware
55 | * work out how multiple artists should interact with each other
56 | 
57 | ### August 3rd - August 16th
58 | Ensure interactions with any pandas, altair, and seaborn use cases that haven't been tested against earlier.
59 | 
60 | ### August 17th - August 21th 19:00 UTC
61 | **Week to scrub code, write tests, improve documentation, etc.**
62 | 
63 | ## Future works
64 | Improve table by changing the API as such:
65 | * axtable: behaves like a legend in that it's specifically attached to a plot
66 | * table: standalone plot/axis object 
67 | 
68 | ## Open Source Development Experience
69 |  * Tenative matplotlib documentation fixes: [open pull requests](https://github.com/matplotlib/matplotlib/pulls/story645)
70 |  * Tentative NLTK: [open pull request](https://github.com/nltk/nltk/pull/1333)  
71 |  * Tutorials on Open Source Tools:
72 |   - AMS 2016 Python Symposium Pandas/Matplotlib Tutorials: https://github.com/story645/ams2016_tutorials
73 |   - PyCon 2014 Matplotlib Tutorial: https://github.com/story645/matplotlib-tutorial
74 |  * GSOC 2011 (Climate Code Foundation):  
75 |   - opensource embeddable climate data visualization tool
76 |   - Server side numpy/scipy/matplotlib processing
77 |   - https://github.com/story645/ccp-viz-toolkit
78 |   - https://ams.confex.com/ams/92Annual/webprogram/Paper204786.html
79 | 
80 | ##Academic Experience
81 | * Research Projects:
82 |  - Apply exploratory data analysis techniques to climate data
83 |  - Long Term Forcast Evaluation: https://bitbucket.org/story645/libltf
84 |  - River delta flood risk factor analysis
85 |  - Machine Learning: https://bitbucket.org/story645/lmclus
86 | * SIParCS Intern 2012 (National Center for Atmospheric Research) 
87 |  - Ported the C-shell driver scripts of the POP diagnostics to Python
88 |  - Created a web interface for the diagnostics using the Pyramid web-framework
89 |  - https://ams.confex.com/ams/93Annual/webprogram/Paper223980.html
90 |  
91 | ## Why this project?
92 | I have taught matplotlib so often that I'm kind of ashamed that I haven't contributed and this seems like a good entry point. 
93 | I work with a lot of catagorical data, especially climate data, and so I've written loads of boilerplate to plot it nicely in matplotlib; therefore I think it would be awesome to have this all incorporated into the project. 
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/2016/proposals/blessing-chick.md:
--------------------------------------------------------------------------------
 1 | # A scikit-bio-based bioinformatics file format converter
 2 | 
 3 | ## Abstract
 4 | In bioinformatics, there are many defined file formats that represent very similar data. scikit-bio has a powerful I/O framework that enables users to load diverse formats into their relevant in-memory representations, regardless on input file format. It would be useful for many bioinformatics end-users (who are often not comfortable working with APIs) to have a file format converter for formats supported in scikit-bio. The goal of this project is to develop a scikit-bio Python API for file format conversions and then develop a simple web based conversion tool to for easy file upload and conversion. This project would also likely involve the development of additional file format readers and writers for scikit-bio, to increase the power of this application and of scikit-bio.
 5 | 
 6 | ## Technical Details
 7 | scikit-bio is an open-source python package providing data structures, algorithms, and educational resources for bioinformatics.  It provides  a powerful I/O framework  called the I/O registry which enables users to load diverse formats into their relevant in-memory representations, regardless on input file format. The scikit-bio I/O registry provides a single entry point to all I/O using a simple procedural interface. Additionally, the registry dynamically generates an equivalent object-oriented API for any scikit-bio object that can be serialized or deserialized (i.e., written to or read from a file). Finally, the registry supports automatic file format detection via file sniffers. In this project, we will develop an API for file conversion based on this framework and in the second part of the project, we will develop a file conversion tool using our API. The file conversion tool will also allow us to upload unknown file formats and use in build sniffers from scikit-bio  to auto detect the file format before conversion.
 8 | We will use the Django REST framework in this project to develop the API.
 9 | 
10 | ## Schedule of Deliverables
11 | 
12 | ### May 25th -  June 7th
13 | 
14 | **Read Documentation of Django REST framework.**
15 | **Study the scikit-bio I/O registry**
16 | **Setup Working Environment and create Django project and adding scikit-bio to it.**	
17 | 
18 | ### June 8th - June 21th
19 | 
20 | **Extract I/O handling routines from I/O registry into a new class to handle conversion**
21 | **Setup HTML forms of GUI conversion tool.**
22 | 
23 | ### June 22th - July 5th
24 | ****
25 | **Add support for a class to handle unknown file formats using the in build sniffers**	
26 | 
27 | ### July 6th - July 19th
28 | 
29 | **Add utility  controllers to our new  I/O class and handling Exceptions.**	
30 | 
31 | ### July 20th - August 2rd
32 | 
33 | **Implement API request methods and verbs**
34 | 
35 | ### August 3rd - August 16th
36 | 
37 | **Implement API request methods and verbs and handling of Exceptions**	
38 | 
39 | ### August 17th - August 21th 19:00 UTC
40 | 
41 | **Write tests for our API**
42 | 
43 | **Perform some format conversions**
44 | 
45 | **Write Documentation**
46 | 
47 | ## Future works
48 | 
49 | This project will serve as ground work for a more robust API and conversion tool that will be enhanced to support more and even third party data formats not included in the scikit-bio package.
50 | 
51 | 
52 | ## Open Source Development Experience
53 | 
54 | I have contributed to open source projects in  the past where I developed a Data import tool for OpenMRS. I have also fixed bugs in other projects 
55 | 
56 | ## Academic Experience
57 | I am currently pursuing a B.Eng Computer Software Engineering at the university of Buea. I am equally a holder of a B.Sc in Microbiology 
58 | 
59 | ## Why this project?
60 | 
61 | I have been coding in python for 3 years now and have build couple of applications using Django and play frameworks. I am very Interested in everything python and I founded a python club at my university Pygineers meaning python engineers. 
62 | 
63 | ## Appendix
64 | 
65 | 


--------------------------------------------------------------------------------
/2016/proposals/iso-hayate.md:
--------------------------------------------------------------------------------
  1 | # Title
  2 | 
  3 | Online Word2Vec development
  4 | 
  5 | ## Abstract
  6 | 
  7 | * Word embedding (as known as Word2Vec) is one of the major topics in natural language processing research and we can make it easily using Gensim.
  8 | 
  9 | * However, we cannot update Word2Vec model after first training, so we have to retrain Word2Vec model every time you get the new data.
 10 | 
 11 | * Therefore, I want to try to add online learning feature to Gensim Word2Vec model.
 12 | 
 13 | ## Technical Details
 14 | 
 15 | * Online machine learning is a method which uses for stream data and update every time you get data. Vanilla Word2Vec train the model every time you get the data. The word usage continue to change so we need to update Word2Vec model constantly.
 16 | 
 17 | ## Schedule of Deliverables
 18 | 
 19 | ### now -  May 24th (not GSoC period)
 20 | 
 21 | * Understand Mikolov's papers [1][2][3] and Gensim Word2Vec architecture.
 22 | 
 23 | ### May 25th -  June 7th
 24 | 
 25 | * Read [online Word2Vec for Gensim](http://rutumulkar.com/blog/2015/word2vec/)[4] and Gensim's issue [#435](https://github.com/piskvorky/gensim/pull/435) and [#615](https://github.com/piskvorky/gensim/pull/615) to develop online Word2Vec.
 26 | 
 27 | ### June 8th - June 21th
 28 | 
 29 | * Start developing online Word2Vec refer to Rutu's code.
 30 | 
 31 | * I want to reproduce online Word2Vec at least in local environment.
 32 | 
 33 | ### June 22nd - July 5th
 34 | 
 35 | * Continue developing online Word2Vec.
 36 | 
 37 | * Evaluate online Word2Vec using Rutu's data.
 38 | 
 39 | * Start surveying the method of evaluating Word2Vec.
 40 | 
 41 | ### July 6th - July 19th
 42 | 
 43 | * Reread Gensim Word2Vec architecture to understand online Word2Vec code in general perspective.
 44 | 
 45 | * Find the error to resolve AppVeyor error.
 46 | 
 47 | ### July 20th - August 2nd
 48 | 
 49 | * Evaluate online Word2Vec using some data sets such as [Lee corpus](http://www.socsci.uci.edu/~mdlee/lee_pincombe_welsh_document.PDF)[5].
 50 | 
 51 | * Test online Word2Vec using several parameters.
 52 | 
 53 | * Write documentation for users to easily use online method and choose parameters.
 54 | 
 55 | ### August 3rd - August 16th
 56 | 
 57 | * Start writing a blog post about online Word2Vec usage.
 58 | 
 59 | ### August 17th - August 21th 19:00 UTC
 60 | 
 61 | * Write a blog post about online Word2Vec performance.
 62 | 
 63 | ## Future works
 64 | 
 65 | * I'm interested in word embedding methods, not only Word2Vec but also Doc2Vec and something like that. I think online method is applicable for other word embedding methods.
 66 | 
 67 | * Therefore, I want to continuous contribution for Gensim to add online features.
 68 | 
 69 | ## Open Source Development Experience
 70 | 
 71 | * I haven't had Open source development experience so far. Google Summer of Code is good opportunity for me to start developing Open source software.
 72 | 
 73 | ## Academic Experience
 74 | 
 75 | * I got a B.S. in statistics at Osaka University and I'm in Computational Linguistics Laboratory at Nara Institute of Science and Technology now.
 76 | 
 77 | * I studied statistics in the theoretical point of view. In my earlier lab, I studied sparse estimation for high-dimensional data. Many people use Lasso for high-dimensional data, but I used Boosting. Boosting also has a good feature for sparse estimation.[6]
 78 | 
 79 | * I also studied machine learning as a hobby. I finished [machine learning course](https://www.coursera.org/learn/machine-learning)[6] by Andrew Ng and read [Foundations of machine learning](http://www.cs.nyu.edu/~mohri/mlbook/)[7] which show machine learning as statistical perspective.
 80 | 
 81 | * Current research interest is natural language processing, especially word embedding.
 82 | 
 83 | * I also have an experience working as internship to develop recommender system using Gensim. I used job applicants resume data and job offer data to job matching by Gensim implementation of Latent Dirichlet allocation.
 84 | 
 85 | ## Why this project?
 86 | 
 87 | * The reason for choosing Gensim project is that I use Gensim a lot!
 88 | I belong Computational Linguistic Laboratory and I study word embedding.
 89 | Gensim give us to use some kinds of word embedding implementation easilly.
 90 | I want to make a contribution to Gensim comunity and at the same time understand Gensim architecture deeply.
 91 | 
 92 | * Therefore, I choose Gensim project as Google summer of code 2016.
 93 | 
 94 | ## Appendix
 95 | [1] Tomas Mikolov, Wen-tau Yih, Geoffrey Zweig, "Linguistic Regularities in Continuous Space Word Representations." 2013, NAACL
 96 | 
 97 | [2] Tomas Mikolov, Kai Chen, Greg Corrado, Jeffrey Dean, "Efficient estimation of word representations in vector space" 2013, ICLR
 98 | 
 99 | [3] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, Jeff Dean, "Distributed representations of words and phrases and their compositionality", 2013, NIPS
100 | 
101 | [4] Online Word2Vec for Gensim http://rutumulkar.com/blog/2015/word2vec/
102 | 
103 | [5] Lee, M., Pincombe, B., & Welsh, M. "An empirical evaluation of models of text document similarity.", 2005, Proceedings of the 27th Annual Conference of the Cognitive Science Society
104 | 
105 | [6] Machine learning course at Coursera https://www.coursera.org/learn/machine-learning
106 | 
107 | [7] Mehryar Mohri, Afshin Rostamizadeh, and Ameet Talwalkar, "Foundations of Machine Learning", 2012, MIT Press.
108 | 


--------------------------------------------------------------------------------
/2016/proposals/missfont.log:
--------------------------------------------------------------------------------
1 | mktextfm ecrm1000
2 | mktextfm ecrm1000
3 | mktextfm ecrm1000
4 | mktextfm ecrm1000
5 | 


--------------------------------------------------------------------------------
/2016/proposals/narayan-aditya-visualizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2016/proposals/narayan-aditya-visualizer.png


--------------------------------------------------------------------------------
/2016/proposals/skeleton.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | 
 3 | ## Abstract
 4 | 
 5 | ## Technical Details
 6 | 
 7 | ## Schedule of Deliverables
 8 | 
 9 | ### May 25th -  June 7th
10 | 
11 | **You need to accomplish this to mid-term.**
12 | 
13 | ### June 8th - June 21th
14 | 
15 | **You need to accomplish this to mid-term.**
16 | 
17 | ### June 22nd - July 5th
18 | 
19 | ### July 6th - July 19th
20 | 
21 | ### July 20th - August 2nd
22 | 
23 | ### August 3rd - August 16th
24 | 
25 | ### August 17th - August 21th 19:00 UTC
26 | 
27 | **Week to scrub code, write tests, improve documentation, etc.**
28 | 
29 | ## Future works
30 | 
31 | ## Open Source Development Experience
32 | 
33 | ## Academic Experience
34 | 
35 | ## Why this project?
36 | 
37 | ## Appendix
38 | 


--------------------------------------------------------------------------------
/2016/proposals/stolliker-ryan.md:
--------------------------------------------------------------------------------
  1 | # Ryan Stolliker: Result Aggregation Server for Software Carpentry
  2 | 
  3 | ## Abstract
  4 | 
  5 | During Software Carpentry's workshops which teach computing skills to the greater scientific community, learners use Python scripts to test that they have installed the programming environment and other dependencies correctly. Currently, this information is known only to the participants and cannot be used or tracked by the organizers. Thus, the goal of this project is to enable the collection of this installation data and to store it for future use and analysis.
  6 | 
  7 | ## Technical Details
  8 | 
  9 | ### Server
 10 | 
 11 | All server management will be written in Python. The server itself will be implemented using [bottle](http://bottlepy.org/docs/dev/index.html), a simple web framework with functionality to parse forms and JSON requests and responses. 
 12 | 
 13 | ####Interactions
 14 | 
 15 | * `/first/`
 16 |   * The first installation script will send platform, system, release, implementation, version information here in JSON or forms format. The server will send back a 200 response.
 17 | * `/second/`
 18 |   * The second installation script will send platform, system, release, implementation, version and dependency data here in JSON or forms format. The server will send back a 200 response.
 19 | * `/data/`
 20 |   * A request to this URL will recieve JSON from the server which contains all information from the database for analysis in some future application.
 21 | 
 22 | ###Database
 23 | 
 24 | SQLite will be used as the database to store the results of the installation scripts. SQLAlchemy will be used to manage the database and all Python interaction with the database. If database migration is needed in the future, Alembic will be used.
 25 | 
 26 | ####Proposed Schema
 27 | 
 28 | ```
 29 | CREATE TABLE System (
 30 |     SID INTEGER PRIMARY KEY AUTOINCREMENT,
 31 |     plat VARCHAR(30) NOT NULL,
 32 |     sys VARCHAR(10) NOT NULL,
 33 |     rel VARCHAR(10) NOT NULL,
 34 |     implementation VARCHAR(10) NOT NULL,
 35 |     version VARCHAR(6) NOT NULL
 36 | );
 37 | ```
 38 | `System` is the table that holds data about the machine the script is running on. `SID` is an atuomatically-generated identifier assigned each time data is entered into the server. `Plat` is the system information provided by calling `platform.platform`. `Sys` is the operating system provided by `platform.system`. `Rel` is the version of the operating system provided by `platform.release`. `Implementation` is the Python distribution (CPython, PyPy, etc.) provided by `platform.python_implementation`. `Version` is the Python version provided by `platform.python_version`. The size of each `varchar` may be different in the actual database depending on the maximum length of the string returned by the platform functions.
 39 | 
 40 | ```
 41 | CREATE TABLE Dependency (
 42 |     SID INTEGER PRIMARY KEY REFERENCES System (SID),
 43 |     success BOOLEAN NOT NULL
 44 | );
 45 | ```
 46 | Each test in the [second test script](https://github.com/wking/swc-setup-installation-test/blob/master/swc-installation-test-2.py) will have its own table in the database, so `Dependency` is meant to be a placeholder for the name of the dependency that will also be the name of the table. This data is associated with `System` information by the `SID`. `Success` is a boolean which represents whether the specific test was passed. If a workshop does not test for a specific dependency, then there will not an entry for that dependency associated with the results from those machines. If more dependencies are added in the future, then a new table would have to be created, but no existing tables would have to be altered.
 47 | 
 48 | ### Updates to Test Scripts
 49 | 
 50 | The existing installation testing scripts, already written in Python, will be updated to send data to the server. Because the point of the scripts is to determine whether Python is configured correctly, the changes should not rely on any third party libraries and be compatible with both Python 2 and 3 so that even if a system fails a test, it will still be able to send the results. Data will be sent using the http.client and urllib.parse libraries. Before sending, the user will be prompted to decide whether they actually want to send the information. System data will be collected using the platform library, which is included in Python's standard library, specifically:
 51 | 
 52 | * general platform information
 53 |   * `platform.platform()`
 54 | * Python implementation
 55 |   * `platform.python_implementation()`
 56 | * Python version
 57 |   * `platform.python_version()`
 58 | * Operating system information
 59 |   * `platform.uname()`
 60 | * Dependency test results
 61 |   * The [second test script](https://github.com/wking/swc-setup-installation-test/blob/master/swc-installation-test-2.py) tests that various libraries and programs are properly installed on the system, and this information will be sent to the server.
 62 | 
 63 | ## Schedule of Deliverables
 64 | 
 65 | ### May 25th -  June 7th
 66 | 
 67 | Create SQLite database schema using SQLAlchemy for high level manipulation of the database.
 68 | 
 69 | ### June 8th - June 21th
 70 | 
 71 | Begin work on server, with focus on handling POST requests, interpreting them, and correctly using database functions to transfer received information into SQLite database.
 72 | 
 73 | ### June 22nd - July 5th
 74 | 
 75 | Continue creating server, now focusing on aspects of how to retrieve information from database and serve it for analysis.
 76 | 
 77 | ### July 6th - July 19th
 78 | 
 79 | Modify the [first test script](https://github.com/wking/swc-setup-installation-test/blob/master/swc-installation-test-1.py) to submit simple pass/fail data about the Python version to the server.
 80 | 
 81 | ### July 20th - August 2nd
 82 | 
 83 | Modify the [second test script](https://github.com/wking/swc-setup-installation-test/blob/master/swc-installation-test-2.py) to submit data to the server on the platform and the pass/fail status of dependencies and tools.
 84 | 
 85 | ### August 3rd - August 16th
 86 | 
 87 | Make a simple front end for easy viewing of data, as either a desktop or web application, which gets its information from the created API.
 88 | 
 89 | ### August 17th - August 21th 19:00 UTC
 90 | 
 91 | Week to account for complications and delays, test on more machines and operating systems.
 92 | 
 93 | ## Future works
 94 | 
 95 | If additional software dependencies are added to the installation scripts, then the server's handling of the request would need to change with it. Alembic would be used to implement any database migrations.
 96 | 
 97 | ## Open Source Development Experience
 98 | 
 99 | This project with Software Carpentry will be my first experience with open source development. Some of my personal projects that I have done are [solving programming challenges](https://github.com/rstolliker/challenges) as well as [an iOS app](https://github.com/rstolliker/FirstApp).
100 | 
101 | ## Academic Experience
102 | 
103 | I am a Computer Science student with a specialization in information at the University of California, Irvine. Some of my relevant coursework includes classes in database management and many levels of Python, including networking and object oriented design. Outside of university I have also taught myself HTML, CSS, and JavaScript.
104 | 
105 | ## Why this project?
106 | 
107 | When I first came to college I was originally a Mechanical Engineering major. One of the first classes I took was a "programming for engineers" course that taught MATLAB for scientific analysis and graphing. My high school didn't offer any Computer Science courses so this was my first exposure to programming, but I was already hooked and soon I changed my major. Software Carpentry caught my attention because its goal of teaching computing skills to other scientific fields reminded me of my own journey into Computer Science.
108 | 
109 | ## Appendix
110 | 
111 | [This is an example project that I did](https://github.com/rstolliker/APIexample) which includes an SQLite database, a server running bottle, and a client that collects some system information and sends it to the server.
112 | 
113 | [Here is the issue thread I opened.](https://github.com/numfocus/gsoc/issues/85)
114 | ###Contact Information
115 | 
116 | Email: ryan.stolliker@gmail.com
117 | 
118 | IRC: ryanCS on freenode
119 | 
120 | Pacific Time Zone (California)


--------------------------------------------------------------------------------
/2016/proposals/tucek-vit.md:
--------------------------------------------------------------------------------
  1 | # Word Mover's Distance for Gensim
  2 | 
  3 | ## Abstract
  4 | 
  5 | The Word Mover's Distance (WMD) measures the dissimilarity between two text documents as the minimum amount of distance that the embedded words of one document need to “travel” to reach the embedded words of another document. This distance can be viewed as an instance of Earth Mover's Distance, a well studied transportation problem for which several highly efficient solvers have been developed.  The WMD metric leads to unprecedented low k-nearest neighbor document classification error rates and has no hyperparameter. While there is an academic implementation in C, there is no implementation of WMD available in Python. I will contribute a scalable implementation of WMD to the data science world in Python. A quality implementation will be widely used in the industry.
  6 | 
  7 | ## Technical Details
  8 | 
  9 | Word2Vec [1, 2] is a continous word representation technique for creating word vectors to capture the syntax and semantics of words. The vectors used to represent the words have many interesting features, for example `king-man+woman=queen`.
 10 | 
 11 | Many methods are proposed on how to measure distance between sentences in this new vector space. "Word Mover's Distance" (WMD) [3] is a novel distance-between-text-documents measure. It outperforms simple combinations like sum or mean. Visually, the distance between the two documents is the
 12 | minimum cumulative distance that all words in document A need to travel to exactly match document B. 
 13 | 
 14 | For example, these two sentences are close with respect to WMD even though they only have one word in common: "The restaurant is loud, we couldn't speak across the tabel" and "The restaurant has a lot to offer but easy conversation is not there". [4]
 15 | 
 16 | **Goals**
 17 | 
 18 | 1. Demonstrate understanding theory and practice of document distances by describing, implementing and evaluating WMD.
 19 | 
 20 | 2. Implement the WMD. Processing must be done in constant memory independent on the full training set size. The implementation must rely on Python's NumPy and SciPy libraries for high performance computing. Optionally implement a version that can use multiple cores on the same machine. 
 21 | 
 22 | 3. Practise modern, practical distributed project collaboration and engineering tools (git, mailing lists, continuous build, automated testing).
 23 | 
 24 | 
 25 | **Deliverables**
 26 | 
 27 | 1. Code: a pull request against gensim [6] on github [7]. Gensim is an open-source Python library for Natural Language Processing. The pull request is expected to contain robust, well-tested and well-documented industry-strength implementation, not flimsy academic code. I will check corner cases, summarize insights into documentation tips and examples. 
 28 | 
 29 | 2. Report: timings, memory use and accuracy of your WMD using the freely available datasets in [3], for example the "20 newsgroups" corpus [8]. A summary of insights into parameter selection and tuning of document distances.
 30 | 
 31 | **Resources**:
 32 | 
 33 | [1] [Mikolov, Tomas, et al. "Efficient estimation of word representations in vector space." arXiv preprint arXiv:1301.3781 (2013)](http://arxiv.org/pdf/1301.3781v3.pdf)
 34 | 
 35 | [2] [Gensim word2vec tutorial at Kaggle](https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-2-word-vectors)
 36 | 
 37 | [3] ["From Word Embeddings to Document Distances" Kusner et al 2015](http://jmlr.org/proceedings/papers/v37/kusnerb15.pdf) 
 38 | 
 39 | [4] [Sudeep Das "Navigating themes in restaurant reviews with Word Mover’s Distance", 2015] (http://tech.opentable.com/2015/08/11/navigating-themes-in-restaurant-reviews-with-word-movers-distance/)
 40 | 
 41 | [5] [Matthew J Kusner's WMD in C on github](https://github.com/mkusner/wmd)
 42 | 
 43 | [6] [Radim Řehůřek and Petr Sojka (2010). Software framework for topic modelling with large corpora. Proc. LREC Workshop on New Challenges for NLP Frameworks](http://www.fi.muni.cz/usr/sojka/papers/lrec2010-rehurek-sojka.pdf)
 44 | 
 45 | [7] [Gensim on github](https://github.com/piskvorky/gensim)
 46 | 
 47 | [8] [The 20 newsgroups dataset](http://qwone.com/˜jason/20Newsgroups/)
 48 | 
 49 | [9] [Gensim github issue #482](https://github.com/piskvorky/gensim/issues/482)
 50 | 
 51 | 
 52 | ## Schedule of Deliverables
 53 | 
 54 | ### May 25th -  June 7th
 55 | 
 56 | Get better acquainted with Gensim, study the WMD paper [4].
 57 | 
 58 | ### June 8th - June 21th
 59 | 
 60 | First prototype implementation of the WMD distance.
 61 | 
 62 | ### June 22nd - July 5th
 63 | 
 64 | Tests for correctness, finding and creating tests for corner cases. Possibly trying out another word embeddings apart from word2vec.
 65 | 
 66 | ### July 6th - July 19th
 67 | 
 68 | Cleaning up / enhancing of the implementation.
 69 | 
 70 | ### July 20th - August 2nd
 71 | 
 72 | Tests, benchmarking, bug hunting.
 73 | 
 74 | ### August 3rd - August 16th
 75 | 
 76 | Writing up documentation & report.
 77 | 
 78 | ### August 17th - August 21th 19:00 UTC
 79 | 
 80 | Week to scrub code, improve documentation, etc.
 81 | 
 82 | ## Future works
 83 | 
 84 | Implement and develop ideas sketched in the original WMD paper, implement other ideas from the Gensim student project  list (such as implementation of the AKSW topic coherence measure or on-line algorithm for non-negative matrix factorization).
 85 | 
 86 | ## Open Source Development Experience
 87 | 
 88 | I've contributed to the SageMath project. 
 89 | 
 90 | ## Academic Experience
 91 | 
 92 | Three peer-reviewed research articles in differential geometry / global analysis.
 93 | 
 94 | ## Why this project?
 95 | 
 96 | I am slowly transitioning from pure mathematics to the field of data analysis. Gensim seems like a perfect place to get my feet wet. Also, I've talked with Radim Rehurek after his talk on word2vec and he seemwed like a nice guy.
 97 | 
 98 | ## Appendix
 99 | 
100 | You can find more details about me at `https://www.linkedin.com/in/vittucek`


--------------------------------------------------------------------------------
/2017/accepted_student_blogs.md:
--------------------------------------------------------------------------------
 1 | # Google Summer of Code 2017
 2 | 
 3 | ## Data Retriever
 4 | 
 5 | | Student             | Blog link                                     |
 6 | | -------             | ---------                                     |
 7 | | Shivam Negi         | https://medium.com/data-retriever-gsoc17      |
 8 | 
 9 | ## Matplotlib
10 | 
11 | | Student             | Blog link                                     |
12 | | -------             | ---------                                     |
13 | | Harshit Patni       | https://patniharshit.github.io/               |
14 | | Kaitlyn Chait       | https://katierose1029.github.io/              |
15 | 
16 | ## MDAnalysis
17 | 
18 | | Student             | Blog link                                     |
19 | | -------             | ---------                                     |
20 | | Utkarsh Bansal      | http://utkarshbansal.me                       |
21 | 
22 | ## FEniCS
23 | 
24 | | Student             | Blog link                                     |
25 | | -------             | ---------                                     |
26 | | Ivan Yashchuk       | https://ivanyashchuk.github.io/               |
27 | | Michal Habera       | http://karlin.mff.cuni.cz/%7Ehabera/?p=gsoc17 |
28 | 
29 | ## PyMC3
30 | 
31 | | Student             | Blog link                                     |
32 | | -------             | ---------                                     |
33 | | Maxim Kochurov      | https://ferrine.github.io                     |
34 | | Bhargav Srinivasa   | https://summerofcode2017.wordpress.com/       |
35 | | Bill William Engels | http://bwengals.github.io/                    |
36 | 
37 | ## Gensim
38 | 
39 | | Student           | Blog link                                                                                                          |
40 | | -------           | ---------                                                                                                          |
41 | | Chinmaya Pancholi | https://chinmayapancholi13.github.io/                                                                              |
42 | | Prakhar Pratyush  | https://rare-technologies.com/google-summer-of-code-2017-live-blog-performance-improvement-in-gensim-and-fasttext/ |
43 | | Parul Sethi       | https://rare-technologies.com/gsoc17-training-and-topic-visualizations/                                                                                  |
44 | 
45 | 


--------------------------------------------------------------------------------
/2017/gensim_proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/gensim_proposal.pdf


--------------------------------------------------------------------------------
/2017/ideas-list-fenics.md:
--------------------------------------------------------------------------------
 1 | # FEniCS Project
 2 | 
 3 | ## Develop [XDMF](http://www.xdmf.org) format for visualisation and checkpointing
 4 | 
 5 | ### Abstract
 6 | 
 7 | XDMF is a file format which is designed for very large simulation datasets. The main file
 8 | is XML, but there is provision for the "heavy data" to be stored in HDF5 using MPI-IO in parallel.
 9 | Datasets may be hundreds of GB in size. In FEniCS we have used XDMF to produce visualisations, but
10 | it is also highly desirable to use for checkpointing, i.e. saving the current state of a simulation
11 | and reading it back in later. The data structures in FEniCS do not map directly onto the data
12 | structures expected by the current implementation of XDMF. We would like to be able to save FEniCS
13 | data structures directly in XDMF, and still be able to visualise the data.
14 | 
15 | | **Intensity** | **Priority | **Involves**  | **Mentors** |
16 | | ------------- | -----------| ------------- | ----------- |
17 | | Moderate      | Medium     | Python, C++, XML | [David DeMarle](mailto:dave.demarle@kitware.com), [Chris Richardson](mailto:chris@bpi.cam.ac.uk) |
18 | 
19 | ### Technical Details
20 | 
21 | FEniCS has a C++ interface, called DOLFIN, and wrappers in SWIG for a Python interface.
22 | There is already an I/O module which produces XDMF output, and this would need to be extended and
23 | adapted to produce the suitable output for an enhanced XDMF specification. Additionally, a new
24 | C++ method would nede to be implemented to read in the XDMF from file in parallel, and distribute it
25 | correctly to restore from file after checkpointing.
26 | 
27 | From the visualisation side, a set of Python filters will need to be developed to read the
28 | new data format in ParaView and display it using VTK. Some work has already been done in this direction:
29 | https://github.com/chrisrichardson/xdmf-fe-filter
30 | Ultimately, the filters will need to be translated to C++ and incorporated as part of the Xdmf3 library.
31 | 
32 | ### Open Source Development Experience
33 | 
34 | This project requires knowledge of C++ and Python, and will require working with multiple git repositories
35 | and different teams with FEniCS and Kitware developers.
36 | 
37 | ### First steps
38 | 
39 | Install FEniCS from https://bitbucket.org/fenics-project/dolfin and try out the demos. Install [ParaView](http://www.paraview.org)
40 | and view the output from the FEniCS demos. The FEniCS tutorial at https://fenicsproject.org/tutorial/ 
41 | has an up-to-date description of using FEniCS to solve partial differential equations.
42 | 
43 | ## Develop assembly of finite element forms on quadtrilateral and hexahedral meshes
44 | 
45 | ### Abstract
46 | 
47 | One of the first steps in the [finite element method](https://en.wikipedia.org/wiki/Finite_element_method) 
48 | is splitting the domain on which the partial differential equation is solved into small parts, called cells, 
49 | which in sum make a mesh. FEniCS has always supported meshes consisting of simplex cells (e.g. triangles and tetrahedrons),
50 | but has limited support for meshes consisting of of quadrilateral (quad) and hexahedral (hex) cells. 
51 | Finite element problems solved on quad/hex meshes often have better approximation properties and better robustness
52 | to cell distortion than those solved on simplex meshes. We would like to be able to assemble and solve the simplest 
53 | PDE, a Poisson problem on a quad/hex mesh in FEniCS.
54 | 
55 | | **Intensity** | **Priority | **Involves**  | **Mentors** |
56 | | ------------- | -----------| ------------- | ----------- |
57 | | High          | Medium     | Python, C++ | [Jack S. Hale](mailto:jack.hale@uni.lu), [Chris Richardson](mailto:chris@bpi.cam.ac.uk), [Martin Alnaes](mailto:martinal@simula.no) |
58 | 
59 | ### Technical Details
60 | 
61 | Many constituent parts to assemble and solve on quad/hex meshes are already in FEniCS, but
62 | there are missing links to get the pieces working as a whole. You will lead a project
63 | get these missing pieces into place.
64 | 
65 | A key technological innovation in FEniCS is the development of a domain specific language
66 | for specifying finite element variational forms (UFL) and a form compiler (FFC) that can 
67 | translates UFL into low-level C++ code that is used to generate cell tensors (local matrices) on
68 | every cell in the mesh.
69 | 
70 | Currently this toolchain cannot produce the C++ code for quad/hex cell geometries. You will
71 | need to interface FFC with an existing class in FIAT to evaluate the tensor product finite 
72 | element basis functions on the quad/hex cell geometry. You will then need to add appropriate code
73 | into FFC to compute geometric quantities on the quad/hex cell geometry, using the existing
74 | simplex code as an example. Finally, you will need to add hooks in our DOLFIN problem solving 
75 | environment to solve a complete problem.
76 | 
77 | ### Open Source Development Experience
78 | 
79 | This project requires knowledge of C++ and Python, and will require working with multiple git repositories
80 | and different teams with FEniCS developers. Some knowledge of finite element methods would also be desirable, 
81 | but not necessary. Those who are interested compiler technology might also find this project suitable.
82 | 
83 | ### First steps
84 | 
85 | Install FEniCS from https://bitbucket.org/fenics-project/dolfin and try out the demos. The FEniCS book
86 | https://fenicsproject.org/pub/book/book/fenics-book-2011-06-14.pdf (GNU Free Doc License) contains a 
87 | description of the form compiler technology behind FEniCS. The FEniCS tutorial at
88 | https://fenicsproject.org/tutorial/ has an up-to-date description of using FEniCS to solve partial
89 | differential equations.
90 | 


--------------------------------------------------------------------------------
/2017/ideas-list-skeleton.md:
--------------------------------------------------------------------------------
 1 | # {{ Sub Organization Name }}
 2 | 
 3 | {{ Table of Content with link to ideas }}
 4 | 
 5 | ## {{ Idea Title }}
 6 | 
 7 | ### Abstract
 8 | 
 9 | {{ Very short description of the project. }}
10 | 
11 | | **Intensity** | **Priority | **Involves**  | **Mentors** |
12 | | ------------- | -----------| ------------- | ----------- |
13 | | {{ Trivial - Easy - Moderate - Hard }} | {{ Low - Medium - High }} | {{ }} | {{ [@foo][], [@bar][] }} |
14 | 
15 | ### Technical Details
16 | 
17 | {{
18 | Long description of the project.
19 | **Must** include all technical details of the projects like libraries involved.
20 | }}
21 | 
22 | ### Open Source Development Experience
23 | 
24 | {{
25 | List of background experience that we expected from the student.
26 | }}
27 | 
28 | ### First steps
29 | 
30 | Students doesn't need to do this before Google Summer of Code code period starts
31 | but will be good if they do just because they will be sure if this is how they
32 | want to spend the summer.
33 | 


--------------------------------------------------------------------------------
/2017/ideas-list-stan.md:
--------------------------------------------------------------------------------
 1 | # Stan
 2 | 
 3 | ## Protocol Buffer Data Transport Layer
 4 | 
 5 | ### Abstract
 6 | 
 7 | Add protocol buffer support in C++ for data input and sampling for the Stan probabilistic programming language.
 8 | 
 9 | 
10 | | **Intensity** | **Priority | **Involves**  | **Mentors** |
11 | | ------------- | -----------| ------------- | ----------- |
12 | | Moderate      |  High      | C++, statistics | [@sakrejda](https://github.com/sakrejda) |
13 | 
14 | 
15 | ### Technical Details
16 | 
17 | [Stan](http://mc-stan.org/) is an open-source (BSD) probabilistic
18 | programming language for fitting statistical models, making
19 | predictions, and estimating event probabilities.  Its current
20 | file-based input is based on an ASCII representation based on the R
21 | language's dump format.  The format is ad hoc and it lacks library
22 | support in languages other than R.  The current file-based output is
23 | based on CSV files with metadata encoded as comments.
24 | 
25 | These both need to be replaced with a protocol buffer interface that
26 | can be used across Stan's interfaces (command line, R, and Python).
27 | 
28 | The summer of code project is to provide an implementation of Stan's
29 | I/O using [protocol
30 | buffers](https://developers.google.com/protocol-buffers/), Google's
31 | "language-neutral, platform-neutral extensible mechanism for
32 | serializing structured data."  There is a [protocol buffer C++
33 | tutorial](https://developers.google.com/protocol-buffers/docs/cpptutorial).
34 | 
35 | The implementation will be done in [Stan's C++
36 | library](https://github.com/stan-dev/stan) so that it may be exposed
37 | to the command line (C++), R, and Python interfaces.  Only the command
38 | line version in C++ is within scope, though a student with appropriate
39 | experience could add support for one or both of R and Python.
40 | 
41 | ### Open Source Development Experience
42 | 
43 | We're going to be using the following tools:
44 | 
45 | * GitHub branching, pull requests, and continuous integration
46 | 
47 | * clang, gcc compilers for cross-platform C++
48 | 
49 | * make for builds
50 | 
51 | * google test for unit testing
52 | 
53 | * doxygen and GitHub wiki for documentation
54 | 
55 | * Google hangouts for meetings
56 | 
57 | * Python, R, and statistical modeling experience useful but not necessary
58 | 
59 | 
60 | ### First steps
61 | 
62 | * install the protocol buffer package and get hello world (I/O round
63 |   trip) working
64 | 
65 | * get a Stan program working using CmdStan and the R dump format
66 | 
67 | * first real step will be implementing a `stan::io::var_context`
68 |   object based on protocol buffers.
69 | 


--------------------------------------------------------------------------------
/2017/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFocus for Google Summer of Code 2017.
 4 | Since NumFOCUS is a umbrella organization you will only find links to the ideas
 5 | page of each organization under NumFocus umbrella at this page.
 6 | 
 7 | - conda-forge https://docs.google.com/document/d/1KSQvcP3Hxr60IhV-_dcGIb4IkmAEeAXNIqdX_2sqYoM
 8 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2017-Project-Ideas
 9 | - FEniCS https://github.com/numfocus/gsoc/blob/master/2017/ideas-list-fenics.md
10 | - gensim https://github.com/RaRe-Technologies/gensim/wiki/GSOC-2017-project-ideas
11 | - nteract https://github.com/nteract/nteract/wiki/GSoC-2017-Ideas
12 | - matplotlib https://github.com/numfocus/gsoc/blob/master/2017/ideas-list-matplotlib.md
13 | - MDAnalysis https://github.com/MDAnalysis/mdanalysis/wiki/GSoC-2017-Project-Ideas
14 | - PyMC3 https://github.com/pymc-devs/pymc3/wiki/GSoC-2017-projects
15 | - Stan https://github.com/numfocus/gsoc/blob/master/2017/ideas-list-stan.md
16 | 
17 | 
18 | See the [README](https://github.com/numfocus/gsoc/blob/master/READMD.md) for contact information of each org.
19 | 


--------------------------------------------------------------------------------
/2017/proposals/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/.gitkeep


--------------------------------------------------------------------------------
/2017/proposals/Chinmaya_Pancholi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/Chinmaya_Pancholi.pdf


--------------------------------------------------------------------------------
/2017/proposals/Data_Retriever_Shivam_Negi.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/Data_Retriever_Shivam_Negi.docx


--------------------------------------------------------------------------------
/2017/proposals/Data_Retriever_Shivam_Negi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/Data_Retriever_Shivam_Negi.pdf


--------------------------------------------------------------------------------
/2017/proposals/dushatskiy-arkadiy.md:
--------------------------------------------------------------------------------
  1 | # Online NNMF​
  2 | 
  3 | ## Abstract
  4 | 
  5 | The goal of project is to make an efficient parallel version of online NNMF algorithm. This algorithm is widely used in recommender systems. The implementation is based on fast Cython code, with BLAS code snippets and also efficient parallelization techniques similar to ones used in LibFM implementation of this algorithm.
  6 | 
  7 | ## Technical Details
  8 | 
  9 | 1. Read related articles, understand problem statement, main problems with parallel implementation and possible solutions. The basic article is LibFM parallel implementation of NNMF: http://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_journal.pdf
 10 | 
 11 | 2. Implement initial sequential version of algorithm using Python + Cython,
 12 | measure its performance. Then make the implementation parallel.
 13 | 
 14 | 3. Test algorithm with thinking of corner cases, also corner cases in performance.
 15 | Make sure that sequential and parallel versions output similar results.
 16 | 
 17 | 4. Test on datasets​ . Proposed benchmark datasets are Netflix and MovieLens
 18 | datasets. Measure how performance depends on number of cores used.
 19 | 
 20 | 5. Write tutorial including examples of usage, necessary code snippets,
 21 | performance measurements.
 22 | 
 23 | ## Schedule of Deliverables
 24 | 
 25 | ### May 1th - May 28th, **Community Bonding Period**
 26 | 
 27 | Read related articles and check LibFM implementation; find and understand how to work with benchmark datasets
 28 | 
 29 | ### May 29th - June 3rd
 30 | 
 31 | Implement sequential version of NNMF using Python + Cython
 32 | 
 33 | ### June 5th - June 9th
 34 | 
 35 | Implement sequential version of NNMF using Python + Cython
 36 | 
 37 | ### June 12th - June 16th
 38 | 
 39 | Test the basic version of NNMF on datasets, check correctness and
 40 | measure speed performance
 41 | 
 42 | ### June 19th - June 23th, **End of Phase 1**
 43 | 
 44 | Code parallel version of algorithm
 45 | 
 46 | ### June 26 - June 30th, **Begin of Phase 2**
 47 | 
 48 | Code parallel version of algorithm
 49 | 
 50 | ### July 3rd - July 7th
 51 | 
 52 | Test correctness of parallel implementation
 53 | 
 54 | ### July 10th - July 14th
 55 | 
 56 | Add BLAS snippets to algorithm
 57 | 
 58 | ### July 17th - July 21th, **End of Phase 2**
 59 | 
 60 | Add BLAS snippets to algorithm
 61 | 
 62 | ### July 24th - July 28th, **Begin of Phase 3**
 63 | 
 64 | Test correctness of implementation with BLAS
 65 | 
 66 | ### July 31st - August 4th
 67 | 
 68 | Test advanced algorithm version on datasets
 69 | 
 70 | ### August 7th - August 11th
 71 | 
 72 | Measure performance on benchmark datasets, compare
 73 | performance with sequential version
 74 | 
 75 | ### August 14th - August 18th
 76 | 
 77 | Write documentation, include code snippets, test results and all
 78 | necessary plots
 79 | 
 80 | ### August 21st - August 25th, **Final Week**
 81 | 
 82 | Write documentation, include code snippets, test results and all
 83 | necessary plots
 84 | 
 85 | ### August 28th - August 29th, **Submit final work**
 86 | 
 87 | ## Future works
 88 | 
 89 | Finding bottlenecks in algorithm implementation and future optimizations
 90 | 
 91 | ## Development Experience
 92 | 
 93 | In detail, I have experience in C++ including STL, MPI, OpenMP, CUDA. Also I have experience with Python including such extensions as Cython, Scipy, Numpy, deep learning framework Keras, machine learning library Scikit-Learn and also Gensim. Have experience in accelerating industry code initially written in Python + Numpy by rewriting it to Cython. I know typical parallel algorithms terms (acceleration, efficiency etc.) and methods (types of parallelization).
 94 | 
 95 | ## Other Experiences
 96 | 
 97 | I have experience of using Gensim library for research project, in detail, I used doc2vec to creating vectors for messages in social networks and then classify them into positive and negative opininons about particular places in the city.
 98 | Link to contribution to Gensim: https://github.com/RaRe-Technologies/gensim/pull/1239
 99 | 
100 | ## Why this project?
101 | 
102 | I am fond of solving non-trivial programming tasks and implementing state-of-the art algorithms from articles, especially connected with machine learning. This project is focused on modern algorithm, widely used in recommender systems and other applications. The coding part of project seems very interesting to me because it requires parallel implementation which always gives a lot of points to think of and opportunity to solve interesting sub-tasks in the algorithm.
103 | 
104 | ## Appendix
105 | 
106 | LibFM implementation of Online NNMF algorithm: http://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_journal.pdf
107 | 


--------------------------------------------------------------------------------
/2017/proposals/ferres_pymc3_vi.md:
--------------------------------------------------------------------------------
  1 | # Extend Variational Inference Methods in PyMC3
  2 | 
  3 | ## Abstract
  4 | Variational inference is a great approach for doing really complex, often intractable Bayesian inference in approximate form. Common methods (e.g. ADVI) lack from complexity so that approximate posterior does not reveal the true nature of underlying problem. In some applications it can yield unreliable decisions. 
  5 | 
  6 | Recently on NIPS 2017 [OPVI](https://arxiv.org/abs/1610.09033) framework was presented. It generalizes variational inverence so that the problem is build with blocks. The first and essential block is Model itself. Second is Approximation, in some cases $log Q(D)$ is not really needed. Necessity depends on the third and forth part of that black box, Operator and Test Function respectively. 
  7 | 
  8 | Operator is like an approach we use, it constructs loss from given Model, Approximation and Test Function. The last one is not needed if we minimize KL Divergence from Q to posterior. As a drawback we need to compute $loq Q(D)$. Sometimes approximation family is intractable and $loq Q(D)$ is not available, here comes LS(Langevin Stein) Operator with a set of test functions.
  9 | 
 10 | Test Function has more unintuitive meaning. It is usually used with LS operator and represents all we want from our approximate distribution. For any given vector based function of $z$ LS operator yields zero mean function under posterior. $loq Q(D)$ is no more needed. That opens a door to rich approximation families as neural networks. 
 11 | 
 12 | Not only ADVI and Langevin Stein Operator VI are applicable with OPVI framework. Normalizing, Householder Flows fit well for it.
 13 | 
 14 | ## Motivation
 15 | 
 16 | My recent contributions ([Implementing OPVI](https://github.com/pymc-devs/pymc3/pull/1694)) to PyMC3 created a good basis for extending variational inference in PyMC3 even further. I tried to transfer theoretical framework to python code and it succeed. Now main logic is in base classes and all rotines are abstracted and use public interface that is provided by developer. Implementing state-of-the-art methods is now not a challenge, you should just break the problem into 4 blocks described above and implement abstract methods.
 17 |  
 18 | I also have a side project [Gelato](https://github.com/ferrine/gelato) for using PyMC3 in neural networks. So my future plans are the following:
 19 | 
 20 | 1. Implement Normalizing Flows
 21 | 2. Implement Householders Flows
 22 | 3. Implement Langein Stein Operator
 23 | 4. Integrate OPVI to Gelato
 24 | 
 25 | ## Technical Details
 26 | 
 27 | I'm going to use the following libraries:
 28 |     
 29 | * **Theano**
 30 | * **PyMC3**
 31 | * **Gelato**
 32 | * **Lasagne**
 33 | * **NumPy**
 34 | 
 35 | As support material I'll use papers from arXiv:
 36 | 
 37 | - Danilo Jimenez Rezende, Shakir Mohamed ["Variational Inference with Normalizing Flows"](https://arxiv.org/abs/1505.05770) (2015)
 38 | - Jakub M. Tomczak, Max Welling ["Improving Variational Auto-Encoders using Householder Flow"](https://arxiv.org/abs/1611.09630) (2016)
 39 | - Rajesh Ranganath, Jaan Altosaar, Dustin Tran, David M. Blei ["Operator Variational Inference"](https://arxiv.org/abs/1610.09033) (2016)
 40 | 
 41 | ## Schedule of Deliverables
 42 | 
 43 | ### May 1th - May 28th, **Community Bonding Period**
 44 | 
 45 | *Integrate OPVI to Gelato*
 46 | 
 47 | Work on documentation for OPVI and Histogram. Add a noteboook with comprehensive example using Gelato.
 48 | 
 49 | ### May 29th - June 3rd
 50 | 
 51 | *Implement Normalizing and Householder Flows*
 52 | 
 53 | They have similar interface so implementing them both at once is the best decision
 54 | 
 55 | ### June 5th - June 9th
 56 | 
 57 | Debug and documentation period for Flows
 58 | 
 59 | ### June 12th - June 16th
 60 | 
 61 | Make sure all works fine. This period I leave for unexpected problems caused by my exams
 62 | 
 63 | ### June 19th - June 23th, **End of Phase 1**
 64 | 
 65 | Prepare PR to PyMC3 with implemented Flows
 66 | 
 67 | ### June 26 - June 30th, **Begin of Phase 2**
 68 | 
 69 | *Implement Langevin Stein Operator* 
 70 | This week I devote to LS Operator only
 71 | 
 72 | ### July 3rd - July 14th
 73 | 
 74 | Debug period. It can be really hard. LS Op uses Neural Network Test Functions, here I need much more time for making things work.
 75 | 
 76 | ### July 17th - July 21th, **End of Phase 2**
 77 | 
 78 | Extra time for unexpected problems with convergence, discussions
 79 | 
 80 | ### July 24th - July 28th, **Begin of Phase 3**
 81 | 
 82 | Finishing work with LS Operator, make sure convergence is fine on prepared toy examples
 83 | 
 84 | ### July 31st - August 4th
 85 | 
 86 | *Documentation period*
 87 | 
 88 | Prepare a notebook comparing all variational inference methods
 89 | 
 90 | Begin with ADVI, FullRankADVI
 91 | 
 92 | ### August 7th - August 11th
 93 | 
 94 | Continue with Normalizing and Householder Flows
 95 | 
 96 | ### August 14th - August 18th
 97 | 
 98 | Finish with Langevin Stein Operator
 99 | 
100 | ### August 21st - August 25th, **Final Week**
101 | 
102 | Some unexpected stuff with examples / Bayesian Summer School
103 | 
104 | ### August 28th - August 29th, **Submit final work**
105 | 
106 | Submit
107 | 
108 | ## Future works
109 | 
110 | Read arXiv, collect ideas
111 | 
112 | ## Development Experience
113 | 
114 | Yandex Analyst-Developer Intern (summer 2016), PyMC3 developer
115 | 
116 | ## Other Experiences
117 | 
118 | Yandex Data Factory Analyst Intern (now)
119 | 
120 | ## Why this project?
121 | 
122 | I'm a great fan of Bayesian statistics and see it is usefull for many practical applications. I also love development and good codestyle. This project is interesting for me from both point of views. I'm also planning to use my results for my research projects and work.
123 | 
124 | ## Literature
125 | - Danilo Jimenez Rezende, Shakir Mohamed ["Variational Inference with Normalizing Flows"](https://arxiv.org/abs/1505.05770) (2015)
126 | - Jakub M. Tomczak, Max Welling ["Improving Variational Auto-Encoders using Householder Flow"](https://arxiv.org/abs/1611.09630) (2016)
127 | - Rajesh Ranganath, Jaan Altosaar, Dustin Tran, David M. Blei ["Operator Variational Inference"](https://arxiv.org/abs/1610.09033) (2016)
128 | 
129 | ## Links
130 | GitHub GSoC [PR#178](https://github.com/numfocus/gsoc/pull/178), [Issue#152](https://github.com/numfocus/gsoc/issues/152)


--------------------------------------------------------------------------------
/2017/proposals/ferres_pymc3_vi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/ferres_pymc3_vi.pdf


--------------------------------------------------------------------------------
/2017/proposals/karkada-ashok-alekh.md:
--------------------------------------------------------------------------------
  1 | # Title
  2 | 
  3 | Convert sampling methods to pure Theano
  4 | 
  5 | ## Abstract
  6 | 
  7 | pymc3 is a well known probabilistic programming framework known for its Hamiltonian Monte Carlo sampling method.
  8 | 
  9 | Hamiltonian Monte Carlo [HMC] (https://arxiv.org/pdf/1206.1901) works by reducing the correlation between successive sampled states by using a Hamiltonian evolution between states and additionally by targeting states with a higher acceptance criteria than the observed probability distribution. This causes it to converge more quickly to the absolute probability distribution.
 10 | 
 11 | Theano is a numerical computation library for Python. In Theano, computations are expressed using a NumPy-like syntax and compiled to run efficiently on either CPU or GPU architectures
 12 | 
 13 | In the current implementation, the sampling methods are a mixture of calls to Python functions and data structures, use Theano partially. By completely porting the methods to Theano, significant improvement in performance is expected.
 14 | 
 15 | 
 16 | ## Technical Details
 17 | 
 18 | HMC is already implemented in pymc3 but is currently inefficient as it doesn't leverage Theano's efficient machinery very well yet. First thing to do would be to be to identify the parts of the code not yet currently using Theano's machinery. Once identified, we should identify how this can be ported to Theano. 
 19 | 
 20 | Anand Patil's implementation [here] (https://github.com/apatil/pymc-theano) can be useful in identifying how to leverage Theano correctly.
 21 | 
 22 | Theano uses Basic Linear Algebra Subprograms [BLAS] (www.netlib.org/blas/) which prescribes a set of low-level routines for performing common linear algebra operations such as vector addition, scalar multiplication, dot products, linear combinations, and matrix multiplication. This makes Theano extremely fast and efficient which cannot be achieved by vanilla python calls. By using Theano, this can be used for our benefit.
 23 | 
 24 |  
 25 | 
 26 | ## Schedule of Deliverables
 27 | 
 28 | ### May 1th - May 28th, **Community Bonding Period**
 29 | 
 30 | - A big aim of part of Google Summer of Code is to introduce students to open-source and encourage them to become active members of open-source. This gives a chance to bond with the community and share ideas and work. I have a blog [here](http://alekhka.wordpress.com/) where I plan to give regular reports of the summer work I will be doing, and communicate the changes I will be doing to everyone.
 31 | 
 32 | - Help with existing PRs and help around with issues and bugs. 
 33 | 
 34 | - Communication with mentors to clarify doubts, read the papers and implementations. 
 35 | 
 36 | ### May 29th - June 3rd
 37 | 
 38 | - Gain more insight into sampling methods and its implementation in pymc3.
 39 |  
 40 | 
 41 | ### June 5th - June 9th
 42 | 
 43 | - Look at current implementation and identify code not using theano yet.
 44 | 
 45 | 
 46 | ### June 12th - June 16th
 47 | 
 48 | - Look at the current implementation and look for bottlenecks in the code and devise plans to mitigate them.
 49 | 
 50 | 
 51 | ### June 19th - June 23th, **End of Phase 1**
 52 | 
 53 | - Getting a proper road map in place and document it.
 54 | 
 55 | - We will have to consult the mentors regarding this and get suggestions.
 56 | 
 57 | 
 58 | ### June 26 - July 7th, **Begin of Phase 2**
 59 | 
 60 | - Phase 2 would is the start of the bulk of the coding work. 
 61 | 
 62 | - A blog post about the changes to be made and expected improvements.
 63 | 
 64 | - Identifying the Theano machinery to be used and start implementing.
 65 | 
 66 | ### July 10th - July 14th
 67 | 
 68 | - By now, lot of improvement would have happened and it would be taking a good shape.
 69 | 
 70 | ### July 17th - July 21th, **End of Phase 2**
 71 | 
 72 | - This would be enough time for most changes. Additional improvements if needed will also be addressed. 
 73 | 
 74 | - The rest of the time would be devoted to testing, benchmarks, further documentation.
 75 | 
 76 | ### July 24th - August 4th, **Begin of Phase 3**
 77 | 
 78 | - The performance of the sampling methods is to be properly documented.
 79 | 
 80 | - The bulk of this period will involve finish testing, bugs and improve documentation.
 81 | 
 82 | ### August 7th - August 11th
 83 | 
 84 | - Another blog post detailing the work done so far.
 85 | 
 86 | - Wrap up tests. Perform new performance benchmarks.
 87 | 
 88 | ### August 14th - August 18th
 89 | 
 90 | - Finishing the documentation with proper details about changes made so that bug fixing becomes easier in the future.
 91 | 
 92 | ### August 21st - August 25th, **Final Week**
 93 | 
 94 | - The last week will involve any remaining code cleaning and make changes if advised by the mentors.
 95 | 
 96 | ### August 28th - August 29th, **Submit final work**
 97 | 
 98 | - Have the PR merged.
 99 | 
100 | 
101 | 
102 | ## Future works
103 | 
104 | I intend to keep contributing to pymc3 with issues and PRs, and be an active part of the community. I would like to further improve the sampling methods if there is a chance for it.
105 | 
106 | ## Open Source Development Experience
107 | 
108 | I am huge fan of Open-source software and have contributions in them. I have served as the Lead Programmer for a autonomous vehicle and UAV project in my Sophomore year. I have extensively contributed to its private repository.
109 | 
110 | I have interned at 1byzerolabs, a startup in Bangalore area, where I have worked for almost an year (remotely for 6 months). I have contributed to the company's code base.
111 | 
112 | Finally, I have served as lead programmer for my college fest (8th mile) and have contributed significantly there too.
113 | 
114 | Even though I don't have experience with scientific computing libraries, I have been on the mailing lists of many for long and thus have good knwoledge about expectations and development pipelines.  
115 | 
116 | I intend to kick start my contribtuions to large open-source projects with GSoC, and to keep contributing to pymc3!
117 | 
118 | ## Academic Experience
119 | 
120 | I am a student at RV College of Engineering, Bangalore. I have worked with student projects specialising in autonomous vehicles and UAVs. I have also completed a research internship. My [resume](https://drive.google.com/open?id=0B4lOWUYt1pLFWWhkOWIzQzJTUGs) details my previous internships and research experiences.
121 | 
122 | ## Why this project?
123 | 
124 | I have always been motivated towards open-source and programming. I have heard a lot of good things about pymc3 from various quarters and was always wanted to be part of it. I was pleasantly surprised when I saw pymc3 in GSoC. I have interacted with the mentor (Dr Colin Carroll) and he has been very helpful. All these made me feel that I would be a right fit for the project.
125 | 
126 | ## Appendix
127 | 
128 | MCMC using Hamiltonian dynamics - Appears as Chapter 5 of the Handbook of Markov Chain Monte Carlo - Radford M. Neal, University of Toronto.
129 | 
130 | Anand Patil's theano implementation: https://github.com/apatil/pymc-theano
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/2017/proposals/kvnamipara_data_retriever.mdown:
--------------------------------------------------------------------------------
  1 | # Data Retriever
  2 | 
  3 | # Proposal for Google Summer of Code 2017
  4 | 
  5 | ## Title
  6 | 
  7 | Improve Data Retriever efficiency for out-of-memory scale datasets.
  8 | 
  9 | ## Abstract
 10 | 
 11 | This project aims to increase the efficiency of the out-of-memory scale dataset. I aim to deterministically profile the code and analyze time/memory consumption for out-of-memory scale datasets in my project. I also plan to increase query efficiency by indexing datasets.
 12 | 
 13 | ## Technical Details
 14 | 
 15 | Data Retriever is an open source project under numFocus foundation. Data Retriever downloads, cleans and stores publicly available data, so that analysts spend less time cleaning and managing data and more time on analyzing it.
 16 | 
 17 | The project involves both making the Data Retriever more efficient on large datasets and making querying from the resulting databases more efficient.
 18 | 
 19 | The project includes deterministic profiling of the code for the large scale data using the profile and memory-profiler library in python. Apart from this, the following libraries can be used for more efficient profiling:
 20 | 
 21 | - cprofile
 22 | - Memory-profiler (along with psutil)
 23 | - line_profiler
 24 | - objgraph
 25 | 
 26 | Scanning the table is useful when the table size is small, but in the case of large scale dataset, indexing is used for better query optimization. Similarly for the dataset which completely resides in memory, the difference between use of indexing and table scan is less noticeable, but for the out-of-memory scale database, indexing makes a difference.
 27 | 
 28 | The project also includes the addition of indexing for the out-of-memory scale dataset by using different database-python implementation library like given below:
 29 | - pyMySql
 30 | - pypostgresql
 31 | 
 32 | ## Schedule of Deliverables
 33 | 
 34 | I plan to allocate at least 40 hours per week on this project and share weekly progress updates with the community through blog.
 35 | 
 36 | **May 1st - May 28th, community Bonding Period :** Apart from discussing the implementation with the mentors, I will be brushing up on my knowledge of python profiling. I will look into the testing methodologies employed by the popular frameworks and libraries.  If possible, I will try to help in minor bugs and issue fixing and try to improve documentation of the Data Retriever. I will also help Data Retriever by adding new dataset script files.
 37 | 
 38 | **May 29 – June 3:** Deterministic profiling along with memory profiling to gain statistical data.
 39 | 
 40 | **June 5 – June 9:** Memory profiling to increasing efficiency for out-of-memory scale dataset
 41 | 
 42 | **June 12 – June 16:** will do suitable changes in code based on the statistical data obtained through profiling
 43 | 
 44 | **June 19 – June 23, End of phase 1:** working on indexing of out-of-memory scale dataset, Mid-term evaluation
 45 | 
 46 | **June 26 – July 30, Begin of phase 2:** Implementation and understanding of addition of indexing and how addition affects the efficiency and querying
 47 | 
 48 | **July 3 – July 7:** implementation and understanding  of addition of indexing and how addition affect the efficiency and querying
 49 | 
 50 | **July 10 – July 14:** implementation of addition of indexing in mysql using pymysql
 51 | 
 52 | **July 17 – July 21, End of phase 2:** implementation of addition of indexing in postgresql
 53 | 
 54 | **July 24 – July 28, Begin of phase 3:** implementation of indexing in all datasets if, the efficiency increases after introducing indexing to small datasets
 55 | 
 56 | **July 31 - August 4:** changes and modification in code based on the result obtained and further review by mentors
 57 | 
 58 | **August 7 – August 11:** search for other alternatives for increasing efficiency for out-of-memory scale dataset / any pending work to be completed / any further work under guidance of mentors
 59 | 
 60 | **August 14 – August 18:** search for other alternatives for increasing efficiency for out-of-memory scale dataset / any pending work to be completed / any further work under guidance of mentors
 61 | 
 62 | **August 21 – August 25, Final Week:** Adding the final documentation and tests, and cleaning the code. Look into the community feedback ,and make sure everything is in place and working. Preparing for End-term evaluation.
 63 | 
 64 | **August 28 – August 29, Submit Final work:** Adding the final documentation and tests, and cleaning
 65 | 
 66 | 
 67 | ## Future Works
 68 | 
 69 | I will actively take participation in further contribution of Data retriever after GSOC. I will continue to work on some other projects of Data Retriever and will be active in community.
 70 | 
 71 | ## Development Experience
 72 | 
 73 | This project of Data Retriever requires the knowledge of python and database-python client libraries(such as pymysql, py-postgresql etc.), and deep understanding of how DBMS works.
 74 | 
 75 | I am proficient in python and intermediate in C language. I am currently enrolled in university where i studied as DBMS as one of my subject. My interest lies in Data Science and Machine Learning. Apart from above i have knowledge of concepts of A.I., R language and Matlab/octave And I have basic knowledge of web stacks. The details of projects i have worked on can be found on my Github account.
 76 | 
 77 | I am a novice open source contributor. I have knowledge of git version control and have some working experience in Travis and Grunt testing bots. I have contributed to Data Retriever. My contribution are as below:
 78 | - Addition of nyc-tree-count dataset(db25817)
 79 | - Documentation update ( ed7ddcb )
 80 | - Update portal dataset  ( WIP - Awaiting final changes)
 81 | 
 82 | ## Other Experience
 83 | 
 84 | I am machine learning and AI enthusiast. I have trained several models in matlab/octave. I have completed several projects in web technologies. I am an active member of communities in college like Google Developers Group NIT Surat, ACM NIT Surat and Webdev labs, NIT Surat. I was the organiser/volunteer of Inout-India’s largest student run hackathon.
 85 | 
 86 | ## Why I choose Data Retriever?
 87 | 
 88 | I am a novice open source contributor. When I was looking for the organisation for GSOC-2017, I found that Data Retriever is perfect for me. Data Retriever has active community of mentors and contributors which inspire a lot to work on it. Apart from this code is well written, maintained and easy to understand for a beginner.
 89 | 
 90 | As a machine learning enthusiast, I know handling of data is an important thing, and Data Retriever is making it easy for everyone. I found it fascinating and interesting to work on something like this. Data Retriever is doing good job for data handling but it will be more useful when we increase the performance and efficiency, This makes me to apply for Data Retriever.
 91 | 
 92 | ## Why you should choose me?
 93 | 
 94 | I will prove to be a suitable candidate as I possess the required expertise with the technologies involved, and have listed my implementation idea clearly. I have been working on the Data Retriever in understanding the code and working of it for the last few days.
 95 | 
 96 | I have taught myself most of fundamentals of Computer Science, Programming fundamentals and Machine Learning and AI (MOOCs, Tutorials, etc.) The experience has made me a strong autodidact and that will prove to be useful during my work in SOC. I am a quick learner.
 97 | 
 98 | Also, I have often been called driven, creative and hardworking. And I personally value having a strong work-ethic more than most things.
 99 | 
100 | ## Personal Information
101 | 
102 | - **Kevinkumar Amipara ( kvnamipara )**
103 | - Website: kvnamipara.github.io
104 | - Email: kvnamipara@gmail.com | kevin.dakshana2015@gmail.com
105 | - Contact Number : +91 9512853801
106 | - Username (GitHub and Gitter) : kvnamipara
107 | - Time Zone : Indian Standard Time (GMT +5:30)
108 | 


--------------------------------------------------------------------------------
/2017/proposals/markus-beuckelmann.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/markus-beuckelmann.pdf


--------------------------------------------------------------------------------
/2017/proposals/patni-harshit.md:
--------------------------------------------------------------------------------
  1 | # Title
  2 | 2D color maps
  3 | 
  4 | ## Abstract
  5 | 
  6 | All of the color mapping in Matplotlib is currently derived from
  7 | `ScalerMappable` which as the name suggests maps scalers from `R^1 ->
  8 | R^4` RGBA color space.  It is common to want to map a vector to
  9 | colors, for example to control the alpha based on a second value in a
 10 | scatter plot or to show the orientation of a field.
 11 | 
 12 | | **Intensity** | **Involves**  | **Mentors** |
 13 | | ------------- | --------------|------------ |
 14 | | Intermediate  | Python | tacaswell,story645 |
 15 | 
 16 | ## Technical Details
 17 | 
 18 | - All the work will be done in Python
 19 | - It will be in accordance with current implementation of 1D colormaps.
 20 | - Extending the existing 1D normalization for 2D data : This can be done by
 21 | a family of normalizers which go from data -> unit-disk or square in the
 22 | complex plane
 23 | - Creating color maps that go from the unit disk or square -> RGBA
 24 | - Exposing these classes to user as either new API or extending the existing
 25 | `ScalerMappable` API
 26 | - Implementing a 2D color bar
 27 | - Developing 2D color maps : They can be created by doing bi-linear
 28 | interpolation between four colours. Also major focus would be on developing
 29 | perceptually friendly colormaps so that color blind people have no difficulty in
 30 | distinguishing colors
 31 | 
 32 | ## Schedule of Deliverables
 33 | 
 34 | ### May 1th - May 28th, **Community Bonding Period**
 35 | 
 36 | - Before the official time period begins I will do some tasks listed under
 37 | [MEP21](http://matplotlib.org/devel/MEP/MEP21.html): color and cm refactor.
 38 | This will greatly help in understanding the current implementation of
 39 | normalization and color mapping tools in Matplotlib. Some tasks that can be
 40 | done as part of this are:
 41 |     - Tidying up namespace
 42 |     - Defining a "Color" tuple
 43 |     - Improving construction of colormap by changing current dictionary approach
 44 |     - Analyzing  the feasibility of renaming `cm` module to something more
 45 |       descriptive. It will be a major API change so this will be done only if
 46 |       its pros outweighs cons
 47 | - Set up a blog
 48 | - Along with this I will continue to solve issues on github
 49 | 
 50 | ### May 29th - June 3rd
 51 | 
 52 | - Decide on how API will be exposed to users as new API or as extension of
 53 | ScalerMappable
 54 | - Start working on normalizers
 55 | 
 56 | ### June 5th - June 9th
 57 | 
 58 | - Finish up normalizers
 59 | - Write tests
 60 | - Starting working on color map that maps unit circle or square to rgba
 61 | - Write blog
 62 | 
 63 | ### June 12th - June 16th
 64 | 
 65 | - Finish up color maps
 66 | - Debug and test
 67 | - Write tests
 68 | 
 69 | ### June 19th - June 23th, **End of Phase 1**
 70 | 
 71 | - Complete any unfinished work in Phase 1
 72 | - Write documentation for code written so far
 73 | - Write blog for Phase 1
 74 | 
 75 | ### June 26 - June 30th, **Begin of Phase 2**
 76 | 
 77 | - Make API for exposing normalizers and color maps to user
 78 | - Test new API
 79 | - Document the API so that it is exposed to users
 80 | 
 81 | ### July 3rd - July 7th
 82 | 
 83 | - Start implementing 2D color bar
 84 | - Write blog
 85 | 
 86 | ### July 10th - July 14th
 87 | 
 88 | - Continue implementing 2D color bar
 89 | 
 90 | ### July 17th - July 21th, **End of Phase 2**
 91 | 
 92 | - Complete any unfinished work of Phase 2
 93 | - Write blog for Phase 2
 94 | 
 95 | ### July 24th - July 28th, **Begin of Phase 3**
 96 | 
 97 | - Research on perceptually friendly colormaps
 98 | 
 99 | ### July 31st - August 4th
100 | 
101 | - Develop 2D colormaps
102 | 
103 | ### August 7th - August 11th
104 | 
105 | - Test and document colormaps
106 | - Write blog
107 | 
108 | ### August 14th - August 18th
109 | 
110 | - Write examples for Matplotlib gallery to demonstrate 2D color maps
111 | 
112 | ### August 21st - August 25th, **Final Week**
113 | 
114 | - Buffer period for any unfinished work
115 | - Write blog for Phase 3
116 | - Clean up code
117 | 
118 | ### August 28th - August 29th, **Submit final work**
119 | 
120 | ## Future works
121 | 
122 | - In future the project can be extended to higher dimensions by mapping to
123 | quaternions as well.
124 | - Different types of colormaps can be added
125 | 
126 | ## Open Source Development Experience
127 | 
128 | - (Merged) [#8094](https://github.com/matplotlib/matplotlib/pull/8094) Cleaned up documentation by removing an example
129 | - (Merged) [#8097](https://github.com/matplotlib/matplotlib/pull/8097) Improved the code to use plt.gca instead of plt.axes
130 | - (Merged) [#8154](https://github.com/matplotlib/matplotlib/pull/8154) Merged fill_demo and fill_demo_features examples
131 | - (Merged) [#8190](https://github.com/matplotlib/matplotlib/pull/8190) Added link to Gitter channel in readme
132 | - (Merged) [#8234](https://github.com/matplotlib/matplotlib/pull/8234) Fixed broken Gitter badge
133 | - (Merged) [#8343](https://github.com/matplotlib/matplotlib/pull/8343) Made ArrowStyle docstrings numpydoc compatible
134 | - (Open) [#8336](https://github.com/matplotlib/matplotlib/pull/8336) Merged three streamplot examples into one plot with subplots
135 | - (Open) [#8157](https://github.com/matplotlib/matplotlib/pull/8157) Added 'which' kwarg to autofmtxdate and wrote tests
136 | 
137 | ## Other Experiences
138 | 
139 | - [AI-Bot](https://github.com/patniharshit/Ultimate-Tic-Tac-Toe) in python
140 |   for 4X4 Ultimate-Tic-Tac-Toe
141 | - [Brick-Breaker](https://github.com/patniharshit/Brick-Breaker), a 2d shooter
142 |   game in OpenGL
143 | - [Bloxorz](https://github.com/patniharshit/Bloxorz), a 3d puzzle game in OpenGL
144 | 
145 | ## Why this project?
146 | 
147 | Currently there are no multidimensional colormaps in Matplotlib. This is a
148 | big nuisance if we want to modulate the color and opacity based on data in
149 | different dimensions independently. This project has been requested for a long
150 | time by people in neuroscience, astronomy etc.
151 | Here are some of those requests :
152 | 
153 | - [#4369](https://github.com/matplotlib/matplotlib/issues/4369)
154 | - [Bivariate Colormaps](http://stackoverflow.com/questions/15207255/is-there-any-way-to-use-bivariate-colormaps-in-matplotlib)
155 | 
156 | Having used Matplotlib for displaying graphical information several times, I
157 | wanted to give something back to the community. I am the right person to do
158 | this project because not only I want to contribute to Open Source but I have
159 | also worked closely with the community for last month so I have good
160 | understanding of workflow.
161 | 
162 | ## Appendix
163 | 
164 | ### About Me
165 | 
166 | I am a sophomore at International Institute of Information Technology,
167 | Hyderabad majoring in Computer Science. I have intermediate proficiency in
168 | Python and have worked on several projects with it. I am also an active
169 | contributor of Matplotlib for some time.
170 | 
171 | ### Contact
172 | |          |                                                        |
173 | |----------|--------------------------------------------------------|
174 | | Name     | Harshit Patni                                          |
175 | | Email    | patniharshit@gmail.com                                 |
176 | |          | harshit.patni@students.iiit.ac.in                      |
177 | | Github   | [patniharshit](https://github.com/patniharshit)        |
178 | | Gitter   | patniharshit                                           |
179 | 
180 | ### Availability
181 | 
182 | I don't have any commitments in summer and GSOC will be my full time job.
183 | My summer vacations starts on 27 April and college reopens in last week of
184 | July.
185 | 
186 | * **Time Zone :** Indian Standard Time (IST) UTC +5:30
187 | *  **Hours per week :** 35-40 hours(during vacations), this may go down to
188 | 30-35 hours in August
189 | 


--------------------------------------------------------------------------------
/2017/proposals/prakhar_gsoc_17.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/prakhar_gsoc_17.pdf


--------------------------------------------------------------------------------
/2017/proposals/proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2017/proposals/proposal.pdf


--------------------------------------------------------------------------------
/2017/proposals/pymc3-proposal-float32-gp.md:
--------------------------------------------------------------------------------
 1 | # Single Precision Support, Gaussian Processes
 2 | 
 3 | ## Abstract
 4 | 
 5 | PyMC3 contains a rich suite of building blocks for probabilistic modelling and
 6 | inference.  This proposal contains two parts, the first part is finishing
 7 | support for single precision, and the second involves finishing Gaussian
 8 | Process functionality.  I have already submitted a proposal about exploring
 9 | alternative computation engines, and am open to working with the mentors on
10 | mixing and matching projects from each, for example, evaluating PyTorch in
11 | addition to the projects in this proposal.
12 | 
13 | ## Technical Details
14 | 
15 | Inference on some classes of probabilistic models would be greatly accelerated
16 | by the parallelism offered by modern GPUs.  However, most GPUs only support
17 | 32-bit precision for floating point computations.  Theano supports 32-bit
18 | precision computation, and includes many Ops with GPU implementations.  The
19 | computation graph Theano produces is constructed according to a given PyMC3
20 | model.  In order for PyMC3 to take advantage of these Ops, it must cast numeric
21 | values that become inputs to the computation graph to 32-bit precision.  The
22 | goal of this work is to finish single precision support and testing in PyMC3.
23 | 
24 | When `theano.config.floatX` is set to `'float32'`, all PyMC3 inputs should be
25 | automatically cast to that type.  Routines in PyMC3's internals need to be
26 | checked for places where values that start as `float32` are accidentally
27 | converted to `float64`.  This will involve writing an extensive test suite, or
28 | configuring the current test suite to run with 32-bit precision.
29 | 
30 | There are several open and closed issues that relate to this issue,
31 | some of which include:
32 |   - [Using PyMC3 on the GPU](https://github.com/pymc-devs/pymc3/issues/1246)
33 |   - [Cast input data and testvalues to theano.config.floatX](https://github.com/pymc-devs/pymc3/issues/1640)
34 |   - [Model raises exception with floatX=float32](https://github.com/pymc-devs/pymc3/issues/1146)
35 |   - [GPU Failure in minimal model](https://github.com/pymc-devs/pymc3/issues/1939)
36 |   - [Sampling failure using initial values with Metropolis](https://github.com/pymc-devs/pymc3/issues/1681)
37 |   - [ValueError: can't copy from un-initialized CudaNdarray during Metropolis using GPU for complex model](https://github.com/pymc-devs/pymc3/issues/1649)
38 | 
39 | Researching and working on closing these issues will provide a good starting
40 | point on this effort.  When this issue is resolved, this work can transition to
41 | testing, benchmarking, and documenting models that see significant speedups
42 | from GPU computation.
43 | 
44 | A secondary project will be to round out Gaussian Process functionality and
45 | documentation.  A large chunk of GP functionality has already been included.  I
46 | propose to add the following additional features:
47 | 
48 | - Extending the `GP` class to accommodate non-Gaussian likelihoods by
49 |   allowing the `GP` class to be specified in a fashion more similar to
50 |   mathematical notation (in pseudocode):
51 | 
52 | ```python
53 | f = GP("latent", m(X), K(X, X))
54 | y = Poisson("likelihood", mu=exp(f))
55 | ```
56 | 	
57 | - Allowing the `GP` class to be additive, allowing predictions to be made from one of the GPs:
58 | 
59 | ```python
60 | f1 = GP("latent1", 0, K1(X, X))
61 | f2 = GP("latent2", 0, K2(X, X))
62 | y = MvNormal("likelihood", mu=f1 + f2, cov=noise)
63 | 
64 | gp_samples = sample_gp(trace, samples=50, gp=[f1], evaluate_at=Z)
65 | ```
66 | 
67 | - Add more documentation and notebook examples
68 | 
69 | - I would also like to add spline functionality to PyMC3.  Splines are useful
70 | in their own right for fitting non-linear functions, but would enhance
71 | Gaussian Process functionality since they would be useful for lengthscale
72 | functions for the `Gibbs` covariance function, and for warping inputs in the
73 | `WarpedInput` covariance function.
74 | 
75 | ## Why this project?
76 | 
77 | I am interested in working on `float32` support in order to gain a deeper
78 | understanding of the internals of PyMC3, frameworks like Theano, and GPU
79 | computing.  Single precision support looks like a long-standing and high-impact
80 | issue, and addressing it looks like a great way to gain experience in these
81 | subjects.
82 | 
83 | PyMC3 provides a great foundation for experimenting with Gaussian Processes and
84 | non-linear regression, due to the symbolic differentiation provided by Theano,
85 | its handling of variables and variable transformations via Distribution
86 | objects, and its suite of advanced inference methods, such as NUTS and ADVI.
87 | 
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/2017/proposals/saparina-irina.md:
--------------------------------------------------------------------------------
  1 | # Distributed Word2vec on CPUs on multiple machines
  2 | 
  3 | ## Abstract
  4 | 
  5 | Distributed computing allows to work with large corpuses, which are difficult to process on one machine, using a cluster. Gensim contains distributed implementations of several algorithms (distributed LSA, distributed LDA). The implementations use [Pyro4](http://pythonhosted.org/Pyro4/) for network communication and are fairly low-level.
  6 | 
  7 | Word2vec model can use many worker threads for fast training on multicore machines, but it doesn’t have distributed version for fast training on the cluster.
  8 | 
  9 | We can re-implement gensim word2vec algorithm in [Tensorflow](https://www.tensorflow.org/) in order to enable distributed computation. We can try [Spark](http://spark.apache.org/) for this task and compare the results. For visualization the model we can use Tenserflow special tool - Tensorboard UI. 
 10 | 
 11 | In my university project I used gensim and word2vec for oneclass classification task, so I can make tutorial from it.
 12 | 
 13 | ## Technical Details
 14 | 
 15 | Word2vec consists of two model architectures: CBOW (predicts target words from context words) and Skip-gram (predicts context words from the target words). As these models are similar, I will be implementing both simultaneously. 
 16 | 
 17 | As we will use In TensorFlow, the following steps are required for successfully completed project:
 18 | 
 19 | ### *Build the graphs for both models* 
 20 | 
 21 | ... and re-implement function `train()` in class Word2Vec using [TensorFlow word2vec implementation](https://www.tensorflow.org/tutorials/word2vec) and [Gensim word2vec implementation](https://github.com/RaRe-Technologies/gensim/blob/develop/gensim/models/word2vec.py)
 22 | 
 23 | ### *Distribute the graphs across the cluster*
 24 | 
 25 | TensorFlow cluster is defined using a `tf.train.ClusterSpec()` object. Cluster consists of “jobs”, each divided into lists of one or more “tasks”.  Each “task” will be run on a different machine and associated with a TensorFlow “server” (by creating `tf.train.Server`), which contains a “master” (for creating sessions), and a “worker” (for executing operations in the graph). There are two types of “jobs”:  `ps` (parameter server), which hosts nodes that store and update variables and `worker`, which is responsible for compute-intensive tasks (for this project each `worker` will be train the same model on its own mini-batches of data). 
 26 | 
 27 | TensorFlow has tools for different approaches to this structure of replicated training: in-graph/between-graph replication, asynchronous/synchronous training. I’m going to find out, which is the best for this project.
 28 | 
 29 | **Resourses:** [Distributed TensorFlow guide](https://www.tensorflow.org/deploy/distributed)
 30 | 
 31 | ### *Visualize with Tensorboard*
 32 | 
 33 | **Resourses:**  [TensorBoard: Visualizing learning](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
 34 | 
 35 | ## Schedule of Deliverables
 36 | 
 37 | ### May 1th - May 28th, **Community Bonding Period**
 38 | 
 39 | - Help with issues and bugs and anwser the questions on the [gensim mailing list](https://groups.google.com/forum/#!forum/gensim)
 40 | 
 41 | - Research what tool would be better for our project (TensorFlow or Spark) 
 42 | 
 43 | - Re-read the [paper](http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) by Mikolov et al. and gensim word2vec code to make sure everything is clear
 44 | 
 45 | - Communication with mentor and community
 46 | Make sure that I have everything for testing and I can create a cluster for it.
 47 | 
 48 | ### May 29th - June 3rd
 49 | 
 50 | - Choose the tool for distribution 
 51 | 
 52 | - Benchmarking existing code. Check the Skip-gram model implementation and improve it if it will be necessary.  Start coding CBOW model (in Tenserflow it will be building the graph).
 53 | 
 54 | ### June 5th - June 9th
 55 | 
 56 | - Continue creating the CBOW model (in Tenserflow it will be building the graph).
 57 | 
 58 | ### June 12th - June 16th
 59 | 
 60 | - In Gensim word2vec algorithm was extended with additional functionality, that must be available in distributed version. This week I will integrate my code of both models with existing methods.
 61 | 
 62 | ### June 19th - June 23th, **End of Phase 1**
 63 | 
 64 | - Finish basic (non-distributed) realization of CBOW and Skip-gram models, that can work with all existing word2vec methods. End of re-implementing word2vec algorithms. 
 65 | 
 66 | ### June 26 - June 30th, **Begin of Phase 2**
 67 | 
 68 | - Test CBOW and Skip-gram models.
 69 | - Start working on distributed version: first of all, it would be necessary to determine the structure of parallel computing (what each node will do).
 70 | 
 71 | ### July 3rd - July 7th
 72 | 
 73 | - Add functionality for creating the cluster.
 74 | 
 75 | ### July 10th - July 14th
 76 | 
 77 | - Make Skip-gram model distributed, start making CBOW model distributed.
 78 | 
 79 | ### July 17th - July 21th, **End of Phase 2**
 80 | 
 81 | - Finish coding distributed models. End of making word2vec distributed.
 82 | 
 83 | ### July 24th - July 28th, **Begin of Phase 3**
 84 | 
 85 | - Test distributed word2vec with different numbers of machines in cluster. 
 86 | 
 87 | ### July 31st - August 4th
 88 | 
 89 | - Add visualization with TensorBoard.
 90 | 
 91 | ### August 7th - August 11th
 92 | 
 93 | - Make Jupyter Notebook with my university project as tutorial, start working on documentation for distributed word2vec.
 94 | 
 95 | ### August 14th - August 18th
 96 | 
 97 | - Make documentation for distributed word2vec.
 98 | 
 99 | ### August 21st - August 25th, **Final Week**
100 | 
101 | - Test all again, check documentation and tutorials.
102 | 
103 | ### August 28th - August 29th, **Submit final work**
104 | 
105 | - Have PR merged.
106 | 
107 | ## Future works
108 | 
109 | Distributed computing is actively developed now so I will be follow the news in it and add new features whenever possible. For example, in TensorFlow they are working now for easier way to set cluster specification than existing.
110 | 
111 | I will keep contributing to gensim with issues and PR and be active part of community. 
112 | 
113 | ## Development and Academic Experience
114 | 
115 | I’m a 3rd year student of Moscow State University, [Faculty of Computation Mathematics and Cybernetics](https://cs.msu.ru/en). 
116 | 
117 | I’m specialized in Machine Learning and Data Mining. For my last research project in University (“Research models of vector representations of texts based on word2vec algorithms”) I learned and used different NLP algorithms. The main part of it is about word2vec and doc2vec (and we used gensim implementation), so I well know how exactly it work. 
118 | 
119 | In this year in University we study distributed systems and parallel processing of data and I worked on cluster. 
120 | 
121 | I’m new at open-source community but I really want to be part of it. 
122 | 
123 | ## Why this project?
124 | 
125 | I want to improve my knowledge in deep learning and distributed computing, because nowadays it’s very important skills. I choose this project because it’s about word2vec that I used and it’s will be useful for gensim. When it will be finished, many researches (and me too) will have the opportunity to work with word2vec and big data. Gensim helped me in my academic project, so I want to do gensim better. 
126 | 
127 | 


--------------------------------------------------------------------------------
/2017/proposals/shannon.md:
--------------------------------------------------------------------------------
 1 | # PyMC3: Implement non-parametric Bayesian Methods #
 2 | 
 3 | ## Abstract ##
 4 | 
 5 | Bayesian nonparametrics (BNP) is a major part of the cutting edge in machine learning today. Since Tom Ferguson's initial work on the  Dirichlet process in the 1970s, there have been tremendous extensions and forays into the field. Bayesian nonparametric models promise flexibility and scalability while maintaining strong theoretical guarantees. In particular, the Bayesian paradigm offers decision-theoretic guarantees on consistency, while the nonparametric approach liberates the modeler from assumptions that are rarely satisfied and difficult to test for, but near impossible to do inference without.
 6 | 
 7 | However, BNP models are not as popular as other supervised or unsupervised learning techniques for several reasons. Firstly, BNP models begin by placing probability distributions on spaces of probability distributions. The mathematical maturity required to read papers on Bayesian nonparametrics makes them inaccessible to many practitioners of data analysis, who would otherwise benefit from these techniques. Secondly, posterior inference, in many classes of BNP models is intractable, necessitating approximations, though this is less of a problem with frameworks like PyMC3 and Stan. Finally, there exist very few if any practical implementations of Bayesian nonparametric techniques.
 8 | 
 9 | In this GSoC project, I'd like to implement some of the more commonly used Bayesian nonparametric models in PyMC3. I'd also like to write extensive documentation and tutorials to help make PyMC3 and Bayesian nonparametric modelling more accessible to end users who may not have any experience with advanced machine learning.
10 | 
11 | ## Technical Details ##
12 | 
13 | Over the course of the summer, I'd like to implement combinatorial BNP models in PyMC3, as well as improve on inference techniques for these models. I plan on implementing the Dirichlet process \[[Ferguson 1973](https://projecteuclid.org/euclid.aos/1176342360), [Sethuraman 1994](http://www3.stat.sinica.edu.tw/statistica/oldpdf/A4n216.pdf)\], the Dirichlet process mixture model \[[Antoniak 1974](https://projecteuclid.org/euclid.aos/1176342871)\], Polya trees \[[Müller 2013](https://projecteuclid.org/download/pdfview_1/euclid.cbms/1362163749)\] and the hierarchical Dirichlet process \[[Teh et al. 2006](https://www.stats.ox.ac.uk/~teh/research/npbayes/jasa2006.pdf)\].
14 | 
15 | [Edward](https://github.com/blei-lab/edward) \[[Tran el al. 2016](https://arxiv.org/abs/1610.09787)\] has recently added Dirichlet processes and I hope to use their implementation as a reference.
16 | 
17 | ## Schedule of Deliverables
18 | 
19 | ### May 1th - May 28th, **Community Bonding Period**
20 | 
21 | Spend time familiarizing myself with the PyMC3 codebase, and start with API design for the underlying combinatorial stochastic processes. Also, review existing literature to familiarize myself with any identified issues with implementing Bayesian nonparametric models.
22 | 
23 | ### May 29th - June 2nd
24 | Implement the Dirichlet process.
25 | 
26 | ### June 5th - June 9th 
27 | Debug the Dirichlet process implementation, along with adding tests and documentation.
28 | 
29 | ### June 12th - June 16th
30 | Implement Dirichlet process mixture models for clustering.
31 | 
32 | ### June 19th - June 23th, **End of Phase 1**
33 | Buffer time for anything that's overrun, along with debugging and documenting Dirichlet process mixture models. Commit everything thus far and submit a pull request.
34 | 
35 | ### June 26 - June 30th, **Begin of Phase 2**
36 | Begin implementing Polya trees for density estimation.
37 | 
38 | ### July 3rd - July 7th
39 | Polya trees are difficult to implement, so this week is meant for debugging.
40 | 
41 | ### July 10th - July 14th
42 | Write up documentation and tests for Polya trees. Commit everything thus far and submit a pull request.
43 | 
44 | ### July 17th - July 21th, **End of Phase 2**
45 | I'm leaving this week as a buffer for any overruns, as well as clearing up any issues with the PR.
46 | 
47 | ### July 24th - July 28th, **Begin of Phase 3**
48 | Begin implementing hierarchical Dirichlet processes for clustering and grouping data.
49 | 
50 | ### July 31st - August 4th
51 | Debug the hierarchical Dirichlet process implementation. Add tests and documentation.
52 | 
53 | ### August 7th - August 11th
54 | At this point, I'd like to start working on a tutorial on BNP models in PyMC3 accessible to someone with just a first course in machine learning or statistics.
55 | 
56 | ### August 14th - August 18th
57 | Complete writing the tutorial. Submit a pull request for hierarchical Dirichlet processes.
58 | 
59 | ### August 21st - August 25th, **Final Week**
60 | Fix any issues with the pull request.
61 | 
62 | ### August 28th - August 29th, **Submit final work**
63 | Celebrate!
64 | 
65 | ## Future work and extensions
66 | Bayesian nonparametrics is advancing at an extraordinary pace. A straightforward extension to the work I've proposed would be to add other prior processes: Pitman-Yor, hierarchical beta, and other stickbreaking prior processes. I plan to write the code in a modular way so that the prior distributions are decoupled from the clustering and density estimation models. This would make implementing other priors for the same models, and other models for the same priors as straightforward as possible. I plan on continuing working on BNP models in PyMC3 after GSoC as a regular contributor.
67 | 
68 | ## Development Experience
69 | While I'm comfortable coding in Python and Haskell, this is my first formal programming project. I've used both Haskell and Python for data analysis, either for college projects or at think tanks and NGOs during internships.
70 | 
71 | ## Other experience
72 | I graduated with a bachelor's in economics and mathematics from St. Stephen's College, Delhi in 2016. I am currently working for a year with a group of academics aiming to improve policy implementation in India through a data driven approach, before I begin my graduate studies in statistics this fall at the University of Minnesota, where I aim to study the posterior consistency of variational inference in Bayesian nonparametric models. I have always believed that statistical literacy is an important part of general literacy, and in the larger scope of my work I hope to make Bayesian statistics more accessible to the general public through teaching as well as through FOSS like PyMC3.
73 | 
74 | ## Why this project?
75 | As a statistics graduate student, I plan on working on theoretical guarantees for Bayesian nonparametric models. Implementing these models for PyMC3 is a great way to get open source software development experience, while bettering my grasp on the literature. Equally importantly, I'm very interested in helping people learn and understand statistics, therefore writing tutorials for lay audiences will help improve my communication skills before grad school.
76 | 


--------------------------------------------------------------------------------
/2018/ideas-list-stan.md:
--------------------------------------------------------------------------------
 1 | # Stan
 2 | 
 3 | [Stan](http://mc-stan.org/) is an open-source (BSD) probabilistic
 4 | programming language for fitting statistical models, making
 5 | predictions, and estimating event probabilities used by scientists across the 
 6 | world and many fields.
 7 | 
 8 | ## Open Source Development Experience
 9 | 
10 | We're going to be using the following tools:
11 | 
12 | * GitHub branching, pull requests, and continuous integration
13 | 
14 | * clang, gcc compilers for cross-platform C++
15 | 
16 | * make for builds
17 | 
18 | * google test for unit testing
19 | 
20 | * doxygen and GitHub wiki for documentation
21 | 
22 | * Google hangouts for meetings
23 | 
24 | * Python, R, and statistical modeling experience useful but not necessary
25 | 
26 | # Projects
27 | 
28 | ## Protocol Buffer Data Transport Layer
29 | 
30 | ### Abstract
31 | 
32 | Add protocol buffer support in C++ for data input and sampling for the Stan probabilistic programming language.
33 | 
34 | 
35 | | **Intensity** | **Priority** | **Involves**  | **Mentors** |
36 | | ------------- | -----------| ------------- | ----------- |
37 | | Moderate      |  High      | C++, statistics | [@sakrejda](https://github.com/sakrejda), [@seantalts](https://github.com/seantalts) |
38 | 
39 | 
40 | ### Technical Details
41 | 
42 | Stan's current
43 | file-based input is based on an ASCII representation based on the R
44 | language's dump format.  The format is ad hoc and it lacks library
45 | support in languages other than R.  The current file-based output is
46 | based on CSV files with metadata encoded as comments.
47 | 
48 | These both need to be replaced with a protocol buffer interface that
49 | can be used across Stan's interfaces (command line, R, and Python).
50 | 
51 | The summer of code project is to provide an implementation of Stan's
52 | I/O using [protocol
53 | buffers](https://developers.google.com/protocol-buffers/), Google's
54 | "language-neutral, platform-neutral extensible mechanism for
55 | serializing structured data."  There is a [protocol buffer C++
56 | tutorial](https://developers.google.com/protocol-buffers/docs/cpptutorial).
57 | 
58 | The implementation will be done in [Stan's C++
59 | library](https://github.com/stan-dev/stan) so that it may be exposed
60 | to the command line (C++), R, and Python interfaces.  Only the command
61 | line version in C++ is within scope, though a student with appropriate
62 | experience could add support for one or both of R and Python.
63 | 
64 | ### First steps
65 | 
66 | * install the protocol buffer package and get hello world (I/O round
67 |   trip) working
68 | 
69 | * get a Stan program working using CmdStan and the R dump format
70 | 
71 | * first real step will be implementing a `stan::io::var_context`
72 |   object based on protocol buffers.
73 | 
74 | ## Revise BUGS and ARM models to best practices and benchmark them
75 | 
76 | We have a collection of reference models based on models published in BUGS
77 | literature and the ARM textbook by Gelman and Hill. We'd like to revise these to
78 | keep them up to date with current statistical and programming techniques. We'd
79 | also like to benchmark them and create a model benchmarking tool to facilitate
80 | this and future benchmarks. The models can be found in in the [example-models
81 | repo](https://github.com/stan-dev/example-models/wiki).
82 | 
83 | We'd like to develop some tooling to make both of these tasks easier. We can
84 | draw some inspiration from [go fix](https://golang.org/cmd/fix/) for updating
85 | models automatically, and from a variety of places including [go testing](https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go)'s benchmarking facilities for benchmarking.
86 | 
87 | | **Intensity** | **Priority** | **Involves**  | **Mentors** |
88 | | ------------- | -----------| ------------- | ----------- |
89 | | Moderate      |  High      | statistics, Stan language, any language of your choice | [@seantalts](https://github.com/seantalts) |
90 | 
91 | ## First steps
92 | * Get Stan installed and running on the models in question
93 | * Work with @seantalts, @bob-carpenter, @syclik, and others to understand what
94 |   has changed and what sorts of new techniques and patterns we advocate.
95 | * Update one model and test it manually
96 | 
97 | After that, we'll want you to work with mentors to design system for automated 
98 | updates and automated benchmarks
99 | 


--------------------------------------------------------------------------------
/2018/ideas-list.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ideas Pages
 3 | 
 4 | This is the home page of projects ideas of NumFocus for Google Summer of Code 2018.
 5 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 6 | page of each organization under the NumFocus umbrella at this page.
 7 | 
 8 | - Cantera https://github.com/Cantera/cantera/wiki/GSoC-2018-Ideas
 9 | - conda-forge https://docs.google.com/document/d/1KSQvcP3Hxr60IhV-_dcGIb4IkmAEeAXNIqdX_2sqYoM
10 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2018-Project-Ideas
11 | - FEniCS Project https://github.com/numfocus/gsoc/blob/master/2018/ideas-list-fenics.md
12 | - MDAnalysis https://github.com/MDAnalysis/mdanalysis/wiki/GSoC-2018-Project-Ideas
13 | - yt https://github.com/yt-project/gsoc-2018
14 | - PyMC3 https://github.com/pymc-devs/pymc3/wiki/GSoC-2018-projects
15 | - gensim  https://github.com/RaRe-Technologies/gensim/wiki/GSoC-2018-project-ideas
16 | - Julia https://julialang.org/soc/ideas-page.html
17 | - Shogun https://github.com/shogun-toolbox/shogun/wiki/Google-Summer-of-Code-2018-Projects
18 | 
19 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md) for contact information of each org.
20 | 


--------------------------------------------------------------------------------
/2018/proposals/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/2018/proposals/.gitkeep


--------------------------------------------------------------------------------
/2019/ideas-list.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ideas Pages
 3 | 
 4 | This is the home page of projects ideas of NumFocus for Google Summer of Code 2019.
 5 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 6 | page of each organization under the NumFocus umbrella at this page.
 7 | 
 8 | 
 9 | - ArviZ https://github.com/arviz-devs/arviz/wiki/GSoC-2019-projects
10 | - Cantera https://github.com/Cantera/cantera/wiki/GSoC-2019-Ideas
11 | - Chainer https://github.com/chainer/chainer/wiki/GSoC-2019-Project-Ideas
12 | - CuPy https://github.com/cupy/cupy/wiki/GSoC-2019-Ideas--CuPy
13 | - Dask https://github.com/dask/dask/wiki/GSOC-2019-Project-Ideas
14 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2019-Project-Ideas
15 | - FEniCS https://github.com/FEniCS/gsoc/blob/fenics/ideas-2019/2019/ideas-list-fenics.md
16 | - JuMP https://github.com/JuliaOpt/GSOC2019/blob/master/ideas-list.md
17 | - matplotlib https://github.com/matplotlib/matplotlib/wiki/GSOC-2019-Ideas-Page
18 | - MDAnalysis https://github.com/MDAnalysis/mdanalysis/wiki/GSoC-2019-Project-Ideas
19 | - nteract https://github.com/nteract/nteract/wiki/GSoC-2019-Ideas
20 | - PyMC3 https://github.com/pymc-devs/pymc3/wiki/GSoC-2019-projects
21 | - QuTiP https://github.com/qutip/qutip/wiki/Google-Summer-of-Code-2019
22 | - Yellowbrick https://github.com/wagner2010/gsoc/blob/wagner2010-patch-1/templates/ideas-page-prema.md
23 | - Blosc https://github.com/Blosc/c-blosc2/wiki/GSOC-2019-Project-Ideas
24 | 
25 | 
26 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md) for contact information of each org.
27 | 


--------------------------------------------------------------------------------
/2020/ideas-list.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ideas Pages
 3 | 
 4 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2020.
 5 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 6 | page of each organization under the NumFOCUS umbrella at this page.
 7 | 
 8 | - AiiDA https://github.com/aiidateam/aiida-core/wiki/GSoC-2020-Projects
 9 | - ArviZ https://github.com/arviz-devs/arviz/wiki/GSoC-2020-projects
10 | - Bokeh https://github.com/bokeh/bokeh/wiki/GSOC-2020-Ideas-Page
11 | - Cantera https://github.com/Cantera/cantera/wiki/GSoC-2020-Ideas
12 | - Clawpack https://github.com/clawpack/clawpack/wiki/Google-Summer-of-Code-2020
13 | - Colour https://github.com/colour-science/GSoC/blob/master/2020/GSoC-2020-Project-Ideas.md
14 | - conda-forge https://github.com/conda-forge/blog/blob/gh-pages/_posts/2020-02-04-GSoC.md
15 | - CuPy https://github.com/cupy/cupy/wiki/GSoC-2020-Project-Ideas
16 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2020-Project-Ideas
17 | - Econ-Ark https://github.com/econ-ark/OverARK/wiki/GSoC-2020-Project-Ideas
18 | - Effective Quadratures https://github.com/Effective-Quadratures/Effective-Quadratures/wiki/GSoC-2020-Projects
19 | - JuMP      https://github.com/jump-dev/GSOC2020/blob/master/ideas-list.md
20 | - Matplotlib https://github.com/matplotlib/matplotlib/projects/12
21 | - nteract   https://github.com/nteract/nteract/wiki/GSoC-2020-Ideas
22 | - Optuna    https://github.com/optuna/optuna/wiki/Optuna-GSoC-2020
23 | - pvlib     https://github.com/pvlib/pvlib-python/wiki/GSoC-2020-Project
24 | - PyMC3     https://github.com/pymc-devs/pymc3/wiki/GSoC-2020-projects
25 | - PySAL     https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2020
26 | - QuTiP https://github.com/qutip/qutip/wiki/Google-Summer-of-Code-2020
27 | - signac    https://github.com/glotzerlab/signac/wiki/GSoC-2020-Projects
28 | 
29 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
30 | 


--------------------------------------------------------------------------------
/2021/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2021.
 4 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 5 | page of each organization under the NumFOCUS umbrella at this page.
 6 | 
 7 | - AiiDA https://github.com/aiidateam/aiida-core/wiki/GSoC-2021-Projects
 8 | - ArviZ https://github.com/arviz-devs/arviz/wiki/GSoC-2021-projects
 9 | - Colour https://github.com/colour-science/GSoC/blob/master/2021/GSoC-2021-Project-Ideas.md
10 | - CB-Geo MPM https://github.com/cb-geo/mpm/issues/704#issue-796189505
11 | - CuPy https://github.com/cupy/cupy/wiki/GSoC-2021-Project-Ideas
12 | - Dask https://github.com/dask/dask/wiki/GSOC-2021-Project-Ideas
13 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2021-Project-Ideas
14 | - Effective Quadratures https://github.com/Effective-Quadratures/equadratures/wiki/GSoC-2021-Projects
15 | - GeoPandas https://github.com/geopandas/geopandas/wiki/Google-Summer-of-Code-2021
16 | - Gridap https://github.com/gridap/GSoC/blob/main/2021/ideas-list.md
17 | - LFortran https://gitlab.com/lfortran/lfortran/-/wikis/GSoC-2021-Ideas
18 | - JuMP https://github.com/jump-dev/GSOC2021
19 | - matplotlib https://github.com/matplotlib/matplotlib/wiki/GSOC-2021-ideas
20 | - NetworkX https://networkx.org/documentation/latest/developer/projects.html#mentored-projects
21 | - Optuna https://github.com/optuna/optuna/wiki/Optuna-GSoC-2021
22 | - pvlib https://github.com/pvlib/pvlib-python/wiki/GSoC-2021-Projects
23 | - PyBaMM https://github.com/pybamm-team/PyBaMM/wiki/GSoC-2021-Projects
24 | - PyMC3 https://github.com/pymc-devs/pymc3/wiki/GSoC-2021-projects
25 | - PySAL https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2021
26 | - PyTorch-Ignite https://github.com/pytorch/ignite/wiki/GSoC-2021-project
27 | - QuTiP https://github.com/qutip/qutip/wiki/Google-Summer-of-Code-2021
28 | - SciML 
29 |   - Numerical Differential Equations  https://sciml.ai/gsoc/gsoc_diffeq/
30 |   - Scientific Machine Learning  https://sciml.ai/gsoc/gsoc_sciml/
31 |   - Symbolic-Numeric Computing  https://sciml.ai/gsoc/gsoc_symbolic/
32 | - Stan https://github.com/stan-dev/design-docs/blob/master/gsoc_proposals/2021/proposal_main.md
33 |   
34 | 
35 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
36 | 


--------------------------------------------------------------------------------
/2022/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2022.
 4 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 5 | page of each organization under the NumFOCUS umbrella at this page.
 6 | 
 7 | - ArviZ https://github.com/arviz-devs/arviz/wiki/GSoC-2022-projects
 8 | - CB-Geo MPM https://github.com/cb-geo/mpm/discussions/724
 9 | - CuPy https://github.com/cupy/cupy/wiki/GSoC-2022-Project-Ideas
10 | - CVXPY https://github.com/cvxpy/GSOC2022
11 | - Data Retriever https://github.com/weecology/retriever/wiki/GSoC-2022-Project-Ideas
12 | - Econ-ARK https://github.com/econ-ark/OverARK/wiki/GSoC-2022-Project-Ideas
13 | - FEniCS https://github.com/FEniCS/gsoc/blob/fenics/ideas-2022/2022/ideas-list-fenics.md
14 | - FluxML https://julialang.org/jsoc/gsoc/flux/
15 | - GeoPandas https://github.com/geopandas/geopandas/wiki/Google-Summer-of-Code-2022
16 | - Gridap https://github.com/gridap/GSoC/blob/main/2022/ideas-list.md
17 | - JuMP https://github.com/jump-dev/GSOC2022
18 | - LFortran https://gitlab.com/lfortran/lfortran/-/wikis/GSoC%202022%20Ideas
19 | - NetworkX https://networkx.org/documentation/latest/developer/projects.html
20 | - Optuna https://github.com/optuna/optuna/wiki/Optuna-GSoC-2022
21 | - pvlib https://github.com/pvlib/pvlib-python/wiki/GSoC-2022-Projects
22 | - PyBaMM https://github.com/pybamm-team/PyBaMM/wiki/GSoC-2022-Projects
23 | - PyMC https://github.com/pymc-devs/pymc/wiki/GSoC-2022-projects
24 | - PySAL https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2022
25 | - PyTorch-Ignite https://github.com/pytorch/ignite/wiki/GSoC-2022-project
26 | - QuTiP https://github.com/qutip/qutip/wiki/Google-Summer-of-Code-2022
27 | - SciML https://sciml.ai/dev/#google_summer_of_code
28 | - signac https://github.com/glotzerlab/signac/wiki/GSoC-2022-Projects
29 | - Taskflow https://github.com/taskflow/GSoC2022
30 | - Zarr https://github.com/zarr-developers/gsoc/blob/main/2022/ideas-list.md
31 | 
32 | 
33 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
34 | 


--------------------------------------------------------------------------------
/2023/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2023.
 4 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 5 | page of each organization under the NumFOCUS umbrella at this page.
 6 | 
 7 | - [AiiDA](https://github.com/aiidateam/aiida-core/wiki/GSoC-2023-Projects)
 8 | - [ArviZ](https://github.com/arviz-devs/arviz/wiki/GSoC-2023-projects)
 9 | - [CB-Geo MPM](https://github.com/cb-geo/mpm/discussions/740)
10 | - [Colour Science](https://github.com/colour-science/GSoC/blob/master/2023/GSoC-2023-Project-Ideas.md)
11 | - [conda-forge](https://hackmd.io/@conda-forge/ryxecoj2j)
12 | - [CuPy](https://github.com/cupy/cupy/wiki/GSoC-2023-Project-Ideas)
13 | - [Data Retriever](https://github.com/weecology/retriever/wiki/GSoC-2023-Project-Ideas)
14 | - [FEniCS](https://github.com/FEniCS/gsoc/blob/fenics/gsoc-2023/2023/ideas-list-fenics.md)
15 | - [FluxML](https://fluxml.ai/gsoc)
16 | - [Gridap](https://github.com/gridap/GSoC/blob/main/2023/ideas-list.md)
17 | - [JuMP](https://github.com/jump-dev/GSOC2023)
18 | - [matplotlib](https://github.com/matplotlib/matplotlib/wiki/GSoC-2023-Ideas)
19 | - [Mesa](https://github.com/projectmesa/mesa/wiki/Projects-for-Google-Summer-of-Code-2023)
20 | - [NetworkX](https://networkx.org/documentation/latest/developer/projects.html#mentored-projects)
21 | - [OpenFHE](https://github.com/openfheorg/openfhe-development/wiki/GSOC-2023)
22 | - [Open Science Labs](https://hackmd.io/@GvoPVECJQAmNl6JmM2UPMQ/BJEITVBAj)
23 | - [pvlib](https://github.com/pvlib/pvlib-python/wiki/GSoC-2023-Projects)
24 | - [PyBaMM](https://github.com/pybamm-team/PyBaMM/wiki/GSoC-2023-Projects)
25 | - [PyLops](https://github.com/PyLops/pylops/wiki/GSoC-2023-Project-Ideas)
26 | - [PyMC](https://github.com/pymc-devs/pymc/wiki/GSoC-2023-projects)
27 | - [PySAL](https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2023)
28 | - [PyTorch-Ignite](https://github.com/pytorch/ignite/wiki/GSoC-2023-project-ideas)
29 | - [QuTiP](https://github.com/qutip/qutip/wiki/Google-Summer-of-Code-2023)
30 | - [SciML](https://sciml.ai/dev/#google_summer_of_code)
31 | - [Taskflow](https://github.com/taskflow/GSoC2023)
32 | - [TNL](https://gitlab.com/tnl-project/tnl/-/wikis/GSoC-2023)
33 | 
34 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
35 | 


--------------------------------------------------------------------------------
/2024/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2024.
 4 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 5 | page of each organization under the NumFOCUS umbrella at this page.
 6 | 
 7 | - [aeon](https://github.com/aeon-toolkit/aeon-admin/blob/main/gsoc/gsoc-2024-projects.md)
 8 | - [AiiDA](https://github.com/aiidateam/aiida-core/wiki/GSoC-2024-Projects)
 9 | - [ArviZ](https://github.com/arviz-devs/arviz/wiki/GSoC-2024-projects)
10 | - [Bambi](https://github.com/bambinos/bambi/wiki/GSoC-2024-projects)
11 | - [biocommons](https://github.com/orgs/biocommons/projects/8/views/1)
12 | - [CB-Geo MPM](https://github.com/cb-geo/mpm/issues/744)
13 | - [conda](https://hackmd.io/@conda-community/conda-gsoc-ideas-2024)
14 | - [CuPy](https://github.com/cupy/cupy/wiki/GSoC-2024-Project-Ideas)
15 | - [Data Retriever](https://github.com/weecology/retriever/wiki/GSoC-2024-Project-Ideas)
16 | - [FEniCS](https://github.com/fenics/gsoc/blob/gsoc-2024/2024/fenics-ideas-list.md)
17 | - [FluxML](https://fluxml.ai/gsoc)
18 | - [Gridap](https://github.com/gridap/GSoC/blob/main/2024/ideas-list.md)
19 | - [GOSST](https://github.com/numfocus/gsoc/wiki/GSoC-2024-GOSST) by NumFOCUS and Google Open Source 
20 | - [JupyterLab](https://github.com/orgs/jupyterlab/projects/8/views/1?pane=info) by Jupyter
21 | - [matplotlib](https://github.com/matplotlib/matplotlib/wiki/Matplotlib-GSoC-2024-Ideas)
22 | - [NetworkX](https://networkx.org/documentation/latest/developer/projects.html)
23 | - [Open Science Labs](https://github.com/OpenScienceLabs/gsoc/blob/main/project-ideas/gsoc2024.md)
24 | - [Optuna](https://github.com/optuna/optuna/wiki/Optuna-GSoC-2024)
25 | - [pvlib](https://github.com/pvlib/pvlib-python/wiki/GSoC-2024-Projects)
26 | - [PyBaMM](https://pybamm.org/gsoc/2024/)
27 | - [PyMC](https://github.com/pymc-devs/pymc/wiki/GSoC-2024-projects)
28 | - [PySAL](https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2024)
29 | - [QuTiP](https://github.com/qutip/qutip/wiki//Google-Summer-of-Code-2024)
30 | - [SciML](https://sciml.ai/dev/#google_summer_of_code)
31 | - [Taskflow](https://github.com/taskflow/GSoC2024)
32 | - [Zarr](https://github.com/zarr-developers/gsoc/blob/main/2024/ideas-list.md)
33 | 
34 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
35 | 


--------------------------------------------------------------------------------
/2025/ideas-list.md:
--------------------------------------------------------------------------------
 1 | # Ideas Pages
 2 | 
 3 | This is the home page of projects ideas of NumFOCUS for Google Summer of Code 2025.
 4 | Since NumFOCUS is an umbrella organization you will only find links to the ideas
 5 | page of each organization under the NumFOCUS umbrella at this page.
 6 | 
 7 | - [aeon](https://github.com/aeon-toolkit/aeon-admin/blob/main/gsoc/gsoc-2025-projects.md)
 8 | - [AiiDA](https://github.com/aiidateam/aiida-core/wiki/GSoC-2025-Projects)
 9 | - [ArviZ](https://github.com/arviz-devs/arviz/wiki/GsoC-2025-projects)
10 | - [conda / rattler](https://github.com/conda/rattler/issues/1058)
11 | - [Data Retriever](https://github.com/weecology/retriever/wiki/GSoC-2025-Project-Ideas)
12 | - [DISCOVER Cookbook](https://github.com/numfocus/DISCOVER-Cookbook/discussions/208)
13 | - [igraph](https://github.com/igraph/igraph/wiki/Mentored-Projects)
14 | - [mlpack](https://github.com/mlpack/mlpack/wiki/SummerOfCodeIdeas)
15 | - [NetworkX](https://networkx.org/documentation/latest/developer/projects.html)
16 | - [Open2C](https://github.com/open2c/open2c.github.io/wiki/GSoC-2025)
17 | - [optimagic](https://github.com/optimagic-dev/optimagic/discussions/559)
18 | - [pvlib](https://github.com/pvlib/pvlib-python/wiki/GSoC-2025-Projects)
19 | - [PyBaMM](https://pybamm.org/gsoc/2025/)
20 | - [PyMC](https://github.com/pymc-devs/pymc/wiki/GSoC-2025-projects)
21 | - [PySAL](https://github.com/pysal/pysal/wiki/Google-Summer-of-Code-2025)
22 | - [Qutip](https://github.com/qutip/qutip/wiki//Google-Summer-of-Code-2025)
23 | - [sbi](https://github.com/sbi-dev/sbi/wiki/GSoC_2025_Projects)
24 | - [SciML](https://sciml.ai/dev/#google_summer_of_code)
25 | - [Stan](https://github.com/stan-dev/stan/wiki/GSOC-2025-Proposed-Projects)
26 | - [TNL](https://gitlab.com/tnl-project/tnl/-/wikis/GSoC-2025)
27 | - [toqito](https://github.com/vprusso/toqito/wiki/GSoC-2025-Projects)
28 | - [Zarr](https://github.com/zarr-developers/gsoc/blob/main/2025/ideas-list.md)
29 | 
30 | See the [README](https://github.com/numfocus/gsoc/blob/master/README.md#organizations-confirmed-under-numfocus-umbrella) for contact information of each org.
31 | 


--------------------------------------------------------------------------------
/CONTRIBUTING-mentors.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guide for Mentors and Sub Orgs
 2 | ## Organizations Eligible Under NumFOCUS Umbrella
 3 | 
 4 | To be eligible to participate in GSoC under the NumFOCUS umbrella, an
 5 | organization must have:
 6 | 
 7 | 1. One sub-org admin
 8 |    and at least two mentors (**the sub-org admin can be one of the mentors**)
 9 |    who are willing to commit for the full GSoC period.
10 | 2. NumFOCUS [Fiscally Sponsored][sponsored] or [Affiliated][affiliated] status
11 | 3. Have a good ideas page (**NumFOCUS Admins will help with that**).
12 | 
13 | The sub org administrator has the responsibility to communicate with
14 | NumFOCUS and advertise GSoC to the community of their organization.
15 | 
16 | You can sign up as a participating org with us until the application deadline for
17 | the current year or up to 3 days after Google announces that NumFOCUS is
18 | participating as a umbrella org.
19 | 
20 | When you sign up with us you should also tell us your preferred way for students
21 | to contact you and a link to your new contributor page if you have one.
22 | 
23 | ## Sub Org Administrators
24 | 
25 | Each project/organization under NumFOCUS umbrella for GSoC should have one
26 | administrator.
27 | 
28 | The **sub-org admins** are responsible for the following:
29 | 
30 | - provide a list of projects ideas to **organization administrator**
31 | - advertise the application to the project/organization they represent
32 | - sign up mentors with NumFOCUS
33 | - connect users with NumFOCUS profile
34 | - request to **organization administrator** a minimum and a maximum number of
35 |   slots
36 | - assign mentors and students to available slots
37 | - ensure evaluations for accepted projects are submitted on time
38 | - try to have the whole community engage with the students in their project
39 | 
40 | ## Mentors
41 | 
42 | To be a mentor please tell your sub-org administrator so they can give you all the
43 | information you need to sign up.
44 | 
45 | ## Projects
46 | 
47 | Please make sure that the project can be partly completed during the summer so
48 | that students pass the final evaluation. Please also read
49 | the [student guide][CS], especially our requirements from students.
50 | 
51 | To ensure a project can be completed we recommend as a guideline that the
52 | project should be possible to complete by the mentor in about one week of full
53 | time work. This doesn't sound like a lot of time but students generally take
54 | much longer then a long-time developer. They have to get familiar with the code
55 | and have to acquire a huge amount of domain knowledge.
56 | 
57 | There has to be at least one primary mentor and one backup mentor per project.
58 | This means you should have at least 2 mentors available. A mentor can only be
59 | 'primary' for one student but a backup for as many as they like. The sub org
60 | admin can also be a mentor.
61 | 
62 | If you have a proposal you can use our [proposal templates][template] and
63 | publish the ideas list here or you can link to an ideas list on one of your own
64 | websites/repositories.
65 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Please check
 4 | 
 5 | - [Contributing Guide for Students][CS]
 6 | - [Contributing Guide for Mentors][CM]
 7 | 
 8 | [CM]: CONTRIBUTING-mentors.md
 9 | [CS]: CONTRIBUTING-students.md
10 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE:
--------------------------------------------------------------------------------
 1 | Hi
 2 | 
 3 | Thanks that you have chosen to apply for a NumFOCUS project. You can use pull
 4 | requests on this repository to work on a draft of your proposal before the
 5 | student application period begins to get early feedback.
 6 | 
 7 | Before you open a pull request with a draft of your proposal please make sure
 8 | that you have contacted your organization beforehand. Some organization prefer
 9 | if you communicate on their preferred channel (email/irc) and are more likely to
10 | give you feedback then. On the [README][readme] we list the preferred contact
11 | information of every organization and project idea.
12 | 
13 | best NumFOCUS GSoC Admins
14 | 
15 | [readme]: https://github.com/numfocus/gsoc/blob/master/README.md


--------------------------------------------------------------------------------
/img/CVXPY-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/CVXPY-logo.png


--------------------------------------------------------------------------------
/img/NumFocus_LRG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/NumFocus_LRG.png


--------------------------------------------------------------------------------
/img/PyBaMM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/PyBaMM.png


--------------------------------------------------------------------------------
/img/aeon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/aeon.png


--------------------------------------------------------------------------------
/img/aiida.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/aiida.png


--------------------------------------------------------------------------------
/img/arviz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/arviz.png


--------------------------------------------------------------------------------
/img/bambi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/bambi.png


--------------------------------------------------------------------------------
/img/biocommons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/biocommons.png


--------------------------------------------------------------------------------
/img/blosc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/blosc.png


--------------------------------------------------------------------------------
/img/cantera-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/cantera-logo.png


--------------------------------------------------------------------------------
/img/chainer-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/chainer-logo.png


--------------------------------------------------------------------------------
/img/clawpack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/clawpack.png


--------------------------------------------------------------------------------
/img/colour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/colour.png


--------------------------------------------------------------------------------
/img/conda_forge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/conda_forge.png


--------------------------------------------------------------------------------
/img/cupy-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/cupy-logo.png


--------------------------------------------------------------------------------
/img/dash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/dash.png


--------------------------------------------------------------------------------
/img/dask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/dask.png


--------------------------------------------------------------------------------
/img/discover-cookbook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/discover-cookbook.png


--------------------------------------------------------------------------------
/img/ecodata-retriever.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/ecodata-retriever.png


--------------------------------------------------------------------------------
/img/econark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/econark.png


--------------------------------------------------------------------------------
/img/equadratures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/equadratures.png


--------------------------------------------------------------------------------
/img/fenics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/fenics.png


--------------------------------------------------------------------------------
/img/flux.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/flux.png


--------------------------------------------------------------------------------
/img/geopandas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/geopandas.png


--------------------------------------------------------------------------------
/img/gridap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/gridap.png


--------------------------------------------------------------------------------
/img/igraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/igraph.png


--------------------------------------------------------------------------------
/img/jump.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/jump.png


--------------------------------------------------------------------------------
/img/jupyter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/jupyter.png


--------------------------------------------------------------------------------
/img/lfortran.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/lfortran.png


--------------------------------------------------------------------------------
/img/logo-gensim_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/logo-gensim_large.png


--------------------------------------------------------------------------------
/img/matplotlib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/matplotlib.png


--------------------------------------------------------------------------------
/img/mdanalysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/mdanalysis.png


--------------------------------------------------------------------------------
/img/mesa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/mesa.png


--------------------------------------------------------------------------------
/img/mlpack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/mlpack.png


--------------------------------------------------------------------------------
/img/mpm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/mpm.png


--------------------------------------------------------------------------------
/img/networkx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/networkx.png


--------------------------------------------------------------------------------
/img/nteract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/nteract.png


--------------------------------------------------------------------------------
/img/openfhe_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/openfhe_logo.png


--------------------------------------------------------------------------------
/img/optimagic_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/optimagic_logo.png


--------------------------------------------------------------------------------
/img/optuna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/optuna.png


--------------------------------------------------------------------------------
/img/pvlib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/pvlib.png


--------------------------------------------------------------------------------
/img/pymc_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/pymc_logo.png


--------------------------------------------------------------------------------
/img/pysal_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/pysal_logo.png


--------------------------------------------------------------------------------
/img/pytorchignite-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/pytorchignite-logo.png


--------------------------------------------------------------------------------
/img/qutip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/qutip.png


--------------------------------------------------------------------------------
/img/sciml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/sciml.png


--------------------------------------------------------------------------------
/img/shogun-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/shogun-logo.png


--------------------------------------------------------------------------------
/img/signac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/signac.png


--------------------------------------------------------------------------------
/img/spyder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/spyder.png


--------------------------------------------------------------------------------
/img/stan-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/stan-logo.png


--------------------------------------------------------------------------------
/img/yellowbrick.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/yellowbrick.png


--------------------------------------------------------------------------------
/img/yt-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/yt-logo.png


--------------------------------------------------------------------------------
/img/zarr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/numfocus/gsoc/2a785420b9157ec8108e0e81b743aa3b0052fadc/img/zarr.png


--------------------------------------------------------------------------------
/organization/README.md:
--------------------------------------------------------------------------------
 1 | ## NumFOCUS Application Materials
 2 | 
 3 | - [NumFOCUS Operation][OP]
 4 | - [NumFOCUS Profile][OA]
 5 | - [NumFOCUS Questionnaire][OQ]
 6 | - [NumFOCUS Team][OT]
 7 | - [Organization Stipend][stipend]
 8 | - [Mentor Summit][summit]
 9 | 
10 | For informations, check
11 | http://en.flossmanuals.net/melange/org-application-period/.
12 | 
13 | 
14 | [OA]: profile.md
15 | [OP]: operations.md
16 | [OQ]: questionnaire.md
17 | [OT]: team.md
18 | [stipend]: stipend.md
19 | [summit]: summit.md
20 | 


--------------------------------------------------------------------------------
/organization/message-to-accepted-students.md:
--------------------------------------------------------------------------------
 1 | Congratulations! You already must have received an email from Google, your proposal has been accepted as one of the projects under the NumFOCUS umbrella for the Google Summer of Code this year. You should be pleased; you were selected from among many very high quality applicants.
 2 | 
 3 | Mridul and Henry are the GSoC Administrators for NumFOCUS. If you have any questions your mentor can't answer or have problems with your mentor please contact us first before you write to Google.
 4 | 
 5 | In this email you will find general information on being a NumFOCUS GSoC
 6 | student. You will also receive a email from your mentor with
 7 | information about working on your project soon.
 8 | 
 9 | 1.  Good communication between student and mentor is important for
10 |     your success at GSoC.
11 | 
12 |     If you have any problem with your relationship with your mentor,
13 |     please contact one of the [administrators][admin]. We are here to
14 |     help you.
15 | 
16 | 2.  The coding period only starts on 7 June. From now until then we
17 |     are in the "Community Bonding Period". During this period you
18 |     should
19 | 
20 |     1.  Create a blog if you don't have one. There is a [guide][guide] available to
21 |         get you started with [Github Pages]{GHP] and Jekyll.(You can use any other
22 |         blogging platform, Wordpress, personal blog, medium, etc.).If you have trouble
23 |         we can help you. 
24 |         After you create your blog, you should [send us the link to it.][blog form]
25 | 
26 |     2.  Get to know your mentors better. We recommend scheduling an
27 |         online meeting (IRC/Google Hangouts/Zoom ...).
28 | 
29 | 3.  When the coding period starts you must
30 | 
31 |     1.  follow the "Schedule of Deliverables" of your proposal. It's
32 |         OK if you need to change it a little as long as you talk with
33 |         your mentor about it.
34 | 
35 |     2.  every two weeks, at least, write a blog post status report of
36 |         your project. This way the rest of the project and NumFOCUS community can see
37 |         the exiting new things your are coding. It's best if you
38 |         go ahead and start writing a blog post during the community
39 |         bonding period, to get in the habit.
40 |     
41 | 
42 | Good luck, and don't hesitate to let us know if you need anything.
43 | 
44 | NumFOCUS Administrators for GSoC
45 | 
46 | [admin]: https://github.com/numfocus/gsoc/blob/master/organization/team.md#admin
47 | [GHP]: https://pages.github.com/
48 | [guide]: https://docs.github.com/en/pages/setting-up-a-github-pages-site-with-jekyll
49 | [blog form]: https://goo.gl/forms
50 | 


--------------------------------------------------------------------------------
/organization/message-to-rejected-students.md:
--------------------------------------------------------------------------------
 1 | As you can see above, your proposal to NumFOCUS umbrella was
 2 | not accepted for the Google Summer of Code this year. There
 3 | were a lot of excellent applications but resources, the
 4 | number of slots Google gave us, are limited and many good
 5 | applications had to be rejected.
 6 | 
 7 | You helped us a lot just by submitting such a good proposal.
 8 | That helped to raise the bar. Thanks for your time and
 9 | effort.
10 | 
11 | We hope that you learned something new. We'd like to have
12 | you around, and any contribution you make to any of the
13 | projects under NumFOCUS umbrella is very much appreciated
14 | (bug reports, participating on the list, patches, code). But
15 | of course, we understand you may not have time if you find
16 | some other job over the summer.
17 | 
18 | We liked that you discussed your proposal with us. If you
19 | decide to work on your project anyway, We are sure you'll
20 | find many people willing to help out.
21 | 
22 | We encourage you to try again next year. Experience has
23 | shown that students who are involved in the project early
24 | are more likely to be accepted, so if you do plan to do
25 | this, I recommend that you continue your contributions to
26 | the project.
27 | 
28 | Thanks and good luck!
29 | 
30 | NumFOCUS Administrators for GSoC
31 | 


--------------------------------------------------------------------------------
/organization/operations.md:
--------------------------------------------------------------------------------
  1 | # NumFOCUS GSOC Operations
  2 | 
  3 | [NumFOCUS](http://numfocus.org/)
  4 | will apply to Google Summer of Code (GSoC)
  5 | as a umbrella organization.
  6 | [All projects/organizations supported by NumFOCUS](http://numfocus.org/projects/)
  7 | can participate at GSoC under NumFOCUS umbrella.
  8 | Projects/organizations not supported by NumFOCUS
  9 | can participate at GSoC under NumFOCUS umbrella
 10 | if approved by NumFOCUS board.
 11 | 
 12 | ## NumFOCUS Administrator for GSoC
 13 | 
 14 | The NumFOCUS board will indicate two persons,
 15 | named in this document as **organization administrator**,
 16 | to be in charge of NumFOCUS application **for each edition** of GSoC.
 17 | 
 18 | The **organization administrators** are responsible for
 19 | 
 20 | - create the initial application for NumFOCUS to apply for GSoC
 21 | - advertise the application to **all** projects/organization supported
 22 |   by NumFOCUS
 23 | - manage NumFOCUS profile at GSoC
 24 | - request a minimum and a maximum number of slots
 25 |   that can accomodate the slots requested by each project/organization
 26 |   under NumFOCUS umbrella.
 27 | - stay in contact with sub-org admins
 28 | 
 29 | ### How to sign up mentors
 30 | 
 31 | You can use google forms to sign up mentors who want to work with NumFOCUS. This
 32 | will have the advantage that we automatically have a list of all mentors and
 33 | their contact information. Mentors should be given access to numfocus mentoring
 34 | mailing list.
 35 | 
 36 | ## Timeline for Organization Administrators
 37 | 
 38 | *January*:
 39 | - Ask NumFOCUS if they want to participate. Preferably with help of NumFOCUS staff
 40 | 
 41 | *Feburary*:
 42 | - Check if application docs are  up to date
 43 | - Apply as umbrella organization to GSoC
 44 | - check projects ideas page
 45 |             
 46 | *March*:
 47 | - If accepted advertise to possible students.
 48 | - Accept sub-org until 5 days after Google announces participating orgs
 49 | - check projects ideas page is ready for students again
 50 |          
 51 | *April*:
 52 | - request slots numbers from sub-orgs and tell them Google (google forms are good here)
 53 | - select students for sub-orgs
 54 | - have a blog post about accepted projects
 55 |          
 56 | *First Evaluation*:
 57 | - Ensure that every mentor fulfills the evaluation
 58 | 
 59 | *Second Evaluation*:
 60 | - Ensure that every mentor fulfills the evaluation
 61 | 
 62 | *Final Evaluation*:
 63 | - Ensure that every mentor fulfills the evaluation
 64 | 
 65 | *September/October*:
 66 | - Select administrators for next year and tell information the NumFOCUS staff.
 67 | - Visit Mentor Summit
 68 | 
 69 | ## Guidelines to Select Accept Students Proposals
 70 | 
 71 | **Note**: Slots are requested after the students submission phase ends
 72 | 
 73 | **Sub Orgs** will discuss and accept/reject students proposal based on:
 74 | 
 75 | 1.  the number of slots received;
 76 | 2.  if the proposal already has at least one mentor;
 77 | 3.  student background and proposal.
 78 | 
 79 | **If** NumFOCUS received less slots than then requested the **organization
 80 | administrators** solve conflicts in slots allocation for each sub org. We will
 81 | try to assign at least one slot to each sub org.
 82 | 
 83 | **Changes for 2022**
 84 | 
 85 | GSoC changed the rules for slot requests in 2022 and now we need to provide them
 86 | with a strict ranking of our proposals with the number of slot requests.
 87 | 
 88 | Each sub organisation will submit a tiered ranked list of student proposals they
 89 | want to accept. The tiers are divided into 3 categories:
 90 | - 1: We absolutely want to mentor this contributor, this is the whole reason we want to participate in GSoC!
 91 | - 2: We would love to have them contribute and provide mentorship.
 92 | - 3: If there are enough slots we would love to take them on too!
 93 | 
 94 | Please do remember you still need to have enough mentors for all the contributors, you don't need
 95 | to rank every submitted application. Just the ones you have the bandwidth to mentor over the summer.
 96 | This system adds a new level to the minimum and maximum number of slots we used to do till 2021.
 97 | 
 98 | Please do keep in mind this system doesn't work if all the suborgs put all of the contributors in tier 1.
 99 | So if you requesting more than 3 slots please try to put them in different tiers.
100 | 
101 | Once all the suborgs have contributed the tiered ranking the NumFOCUS org admins will run a script
102 | to randomise all the applicants in their respective tier and then create the final rank list. The
103 | final ranked list will be shared with everyone.
104 | 
105 | 
106 | The script will look something like this:
107 | ``` python
108 | >>> sub_org_1 = {1: ["Contributor_1"], 2: ["Contributor_2", "Contributor_3"], 3: []}
109 | >>> sub_org_2 = {1: ["Contributor_4"], 2: [], 3: []}
110 | >>> sub_org_3 = {1: ["Contributor_5"], 2: [], 3: []}
111 | ...
112 | ...
113 | ...
114 | >>> all_sub_orgs = [sub_org_1, sub_org_2, ... ]
115 | 
116 | >>> from collections import defaultdict
117 | >>> import random
118 | >>> contributor_tiers = defaultdict(list)
119 | >>> for sub in all_sub_orgs:
120 | >>>     for tier in sub:
121 | >>>         contributor_tiers[tier].extend(sub[tier])
122 | 
123 | >>> rank = 1
124 | >>> for n in contributor_tiers:
125 | >>>     random.shuffle(contributor_tiers[n])
126 | >>>     for i in contributor_tiers[n]:
127 | >>>         print(f'Rank {rank}: {i}')
128 | >>>         rank += 1
129 | Rank 1: Contributor_5
130 | Rank 2: Contributor_4
131 | Rank 3: Contributor_1
132 | Rank 4: Contributor_2
133 | Rank 5: Contributor_3
134 | 
135 | ```
136 | 


--------------------------------------------------------------------------------
/organization/profile.md:
--------------------------------------------------------------------------------
  1 | # Organization Profile
  2 | 
  3 | ## Your details
  4 | 
  5 | -   Why does your org want to participate in Google Summer of Code?
  6 | 
  7 |     NumFOCUS promotes and supports research and development of open-source
  8 |     computing tools. It is the fiscal sponsor for cutting-edge,
  9 |     high-profile
 10 |     data science and numeric computing tools such as numpy, pandas,
 11 |     Jupyter, Julia, and rOpenSci.
 12 | 
 13 |     NumFOCUS would like to participate in Google Summer of Code 2017 as an
 14 |     umbrella organization for these tools in continuance of its mission.
 15 |     By participating, we hope to provide our community with another
 16 |     opportunity
 17 |     to help develop critical pieces of the open-source data science and
 18 |     numeric computing ecosystem.
 19 | 
 20 | -   What would your org consider to be a successful summer?
 21 | 
 22 |     As an umbrella organisation for open source scientific tools, it's
 23 |     important for the projects to not just get new features implemented
 24 |     but develop a sustainable welcoming community for new contributors.
 25 |     A successful summer for us is more active new contributors to our projects.
 26 |     Previous GSoC students with NumFOCUS have ended up becoming
 27 |     core developers and maintainers of the projects.
 28 |     
 29 | -   How many potential mentors have agreed to mentor this year?
 30 | 
 31 |     11-15
 32 | 
 33 | -   How will you keep mentors engaged with their students?
 34 | 
 35 |     We plan to engage our project leaders and students on a regular basis to
 36 |     ensure that measurable, achievable goals are set for their Summer of
 37 |     Code projects, that progress is being made toward their completion,
 38 |     and that there is clear communication of expectations and adequate
 39 |     knowledge-sharing between mentors and students.
 40 | 
 41 |     Our individual projects have different organizing strategies, and we
 42 |     intend to respect these, relying on our board of directors and project
 43 |     management staff to also guarantee that Summer of Code participants
 44 |     will have a positive and rewarding experience and ensure that their
 45 |     participation in these projects will have clear, tangible results.
 46 | 
 47 | -   How will you help your students stay on schedule to complete their projects?
 48 | 
 49 |     We believe that measurable, achievable goals and clear, transparent
 50 |     expectations are necessary in order to ensure that both students and the
 51 |     projects have a positive experience with the Summer of Code program. To
 52 |     facilitate this, we intend to meet with project leaders and students on a
 53 |     regular basis to allow them to report any issues they have encountered. Of
 54 |     course, for day-to-day progress on issues, we intend to use tools like
 55 |     GitHub and Bugtrack.
 56 | 
 57 |     Furthermore, we offer students some unique opportunities to share their work
 58 |     and their progress with the broader open source scientific and numeric
 59 |     computing community. We believe that these will also serve as incentives to
 60 |     keep their projects on track with tangible, presentable results. We intend
 61 |     to offer students the opportunity to present their progress to the entire
 62 |     NumFOCUS community via blog posts hosted and distributed on NumFOCUS.org,
 63 |     via sections in our regular newsletter, and via speaking opportunities at
 64 |     conferences.
 65 | 
 66 | -   How will you get your students involved in your community during GSoC?
 67 | 
 68 |     The NumFOCUS community is passionate, engaged, and growing.
 69 |     We have many conferences and chapters (meetups) planned for 2017
 70 |     around the world.
 71 |     We will offer students the
 72 |     opportunity to present their work at one of those conferences
 73 |     or chapter meetups
 74 |     and facilitate connections to allow them to
 75 |     present their work at other non-affiliated events.
 76 | 
 77 |     Furthermore, we intend to ask students to present their progress to
 78 |     our
 79 |     community on a regular basis. We will promote these progress reports
 80 |     via
 81 |     the NumFOCUS newsletter, Twitter feed, and our public blog.
 82 | 
 83 | -   How will you keep students involved in your community after GSoC?
 84 | 
 85 |     We intend to invite GSoC participants to present their work at future
 86 |     conferences and chapter meetups that we support. Additionally, individual
 87 |     projects under NumFOCUS have their own respective communities, and
 88 |     have
 89 |     historically had great experiences involving and engaging volunteer
 90 |     contributors.
 91 | 
 92 | -   Has your org been accepted as a mentoring org in Google Summer of Code before?
 93 | 
 94 |     Yes
 95 | 
 96 | -   Which years did your org participate in GSoC?
 97 | 
 98 |     2015, 2016, 2017, 2018, 2019, 2020
 99 | 
100 | -   What is your success/fail rate per year?
101 | 
102 |     2015: 3/3, 2016: 7/7, 2017: 10/12 , 2018: 40/45, 2019: 19/23, 2020: 28/30
103 | 
104 | -   Are you part of a foundation/umbrella organization?
105 | 
106 |     No.
107 | 
108 | -   What year was your project started?
109 | 
110 |     2012
111 | 
112 | -   Anything else we should know?
113 | 
114 | ## Public Profile
115 | 
116 | -   Website URL
117 | 
118 |     http://numfocus.org/
119 | 
120 | -   Tagline
121 | 
122 |     NumFOCUS promotes open source scientific software.
123 | 
124 | -   Logo
125 | 
126 |     See logo file in repository
127 | 
128 | -   Primary Open Source License
129 | 
130 |     MIT
131 | 
132 | -   Organization Category
133 | 
134 |     Science and Medicine
135 | 
136 | -   Technology Tags
137 | 
138 |     python, javacript, r, c/c++
139 | 
140 | -   Topic Tags
141 | 
142 |     scientific computing, numerical computation, graphics, data science
143 | 
144 | -   Ideas List
145 | 
146 |     https://github.com/numfocus/gsoc/blob/master/2017/ideas-list.md
147 | 
148 | ## Descriptions
149 | 
150 | -   Short Description
151 | 
152 |     NumFOCUS supports and promotes world-class, innovative, open source
153 |     scientific software.
154 | 
155 | -   Description
156 | 
157 |     NumFOCUS supports and promotes world-class, innovative, open source
158 |     scientific software. Most individual projects, even the wildly successful
159 |     ones, find the overhead of a non-profit to be too large for their community
160 |     to bear. NumFOCUS provides a critical service as an umbrella organization
161 |     for this projects.
162 | 
163 | ## Proposals
164 | 
165 | -   Application Instructions
166 | 
167 |     [CONTRIBUTING-students.md](../CONTRIBUTING-students.md)
168 | 
169 | -   Proposal Tags
170 | 
171 |     - don't provide them
172 | 
173 | ## Contact Methods
174 | 
175 | -   IRC Channel
176 | 
177 | -   Mailing list
178 | 
179 |     https://groups.google.com/a/numfocus.org/forum/#!forum/gsoc
180 | 
181 | -   General Email
182 | 
183 |     info@numfocus.org
184 | 
185 | ## Links
186 | 
187 | 
188 | -   Twitter URL
189 | 
190 |     https://twitter.com/numfocus
191 | 
192 | -   Blog URL
193 | 
194 |     http://www.numfocus.org/blog
195 | 


--------------------------------------------------------------------------------
/organization/stipend.md:
--------------------------------------------------------------------------------
 1 | # Organization Stipend
 2 | 
 3 | At the end of the Program,
 4 | mentoring organizations,
 5 | **in this case NumFOCUS**,
 6 | may receive a $500 USD stipend per student mentored.
 7 | 
 8 | NumFOCUS agrees to pass along stipend per student mentored
 9 | to the project who mentored the student.
10 | 
11 | ## Fiscally Sponsored Projects
12 | 
13 | Fiscally sponsored projects will have their stipend
14 | transfered to their bank account
15 | as soon as NumFOCUS receive Google's payment,
16 | what normally happens in December.
17 | 
18 | ## Affiliated Projects
19 | 
20 | Affiliated projects will be contacted by NumFOCUS
21 | after NumFOCUS receive Google's payment
22 | with a bank account enquire.
23 | 
24 | ## Non Affiliated Projects
25 | 
26 | Non affiliated projects will be contacted by NumFOCUS
27 | after NumFOCUS receive Google's payment
28 | with a bank account enquire.
29 | 


--------------------------------------------------------------------------------
/organization/summit.md:
--------------------------------------------------------------------------------
 1 | # Mentor Summit
 2 | 
 3 | Every year Google promote a Summit
 4 | and each [**mentoring**] organization,
 5 | **in this case only NumFOCUS**,
 6 | may send two (2) members to the Mentor Summit.
 7 | 
 8 | NumFOCUS will nominate as Summit ambassador
 9 | the lead administrator of the current year
10 | **and** the lead administrator of following year.
11 | If the current lead administrator
12 | are going to continue on that role on the following year
13 | then one of the other administrator can be invited
14 | by the lead administrator.
15 | 
16 | Any mentor interested to attend the Summit
17 | **in case any of the administrators**
18 | aren't able to attend the event
19 | can email at any time the lead administrator
20 | manifesting their interest.
21 | **The lead administrator will select
22 | at his/her own discretion who will attend
23 | the Summit on his/her place.**
24 | 


--------------------------------------------------------------------------------
/templates/ideas-page.md:
--------------------------------------------------------------------------------
 1 | # Sub Organization Name
 2 | 
 3 | ## Mentors
 4 | 
 5 | Please list the names of available mentors
 6 | 
 7 | ## Information for Students 
 8 | 
 9 | If you have special information / instructions for an application you should
10 | list/link to it here. If not you can delete it. Please keep in mind that we
11 | do have some general instructions for students.
12 | 
13 | ## Project Ideas
14 | 
15 | ###  Idea Title 
16 | 
17 | #### Abstract
18 | 
19 | One line project.
20 | 
21 | | **Intensity**                          | **Priority**              | **Involves**  | **Mentors**              |
22 | | -------------                          | ------------              | ------------- | -----------              |
23 | | {{ Trivial - Easy - Moderate - Hard }} | {{ Low - Medium - High }} | {{ }}         | {{ [@foo][], [@bar][] }} |
24 | 
25 | #### Technical Details
26 | 
27 | Long description of the project. **Should** include all technical details of the
28 | projects like libraries involved. Please also link to relevant docs / issues / theory /
29 | papers for your project if available.
30 | 
31 | #### Helpful Experience
32 | 
33 | List of background experience that we would like / expect from the student.
34 | 
35 | #### First steps
36 | 
37 | Students doesn't need to do this before Google Summer of Code code period starts
38 | but will be good if they do just because they will be sure if this is how they
39 | want to spend the summer.
40 | 


--------------------------------------------------------------------------------
/templates/proposal.md:
--------------------------------------------------------------------------------
 1 | #  Title 
 2 | 
 3 | ## Abstract
 4 | 
 5 | Short description of your project. Max 10 sentences. This **SHOULD NOT** be a
 6 | copy of the project idea text.
 7 | 
 8 | ## Technical Details
 9 | 
10 | Long description of the project. **Must** include all technical details of the
11 | projects like libraries involved.
12 | 
13 | Here it is important to show if you had previous conversations with your
14 | mentors. You can show relevant pieces of code that you want to change. You can
15 | link to literature you used during the research.
16 | 
17 | ## Schedule of Deliverables
18 | 
19 | Here should come a list of your milestones. This list is a start based on the
20 | difference phases of GSoC. Use it as a start. You can/should add more details
21 | for each phase by breaking it down into weeks or set specific targets for each
22 | phase. Each target should be split into sub task with a time estimate, [work
23 | breakdown structures][wbs] are helpful here.
24 | 
25 | ### **Community Bonding Period**
26 | 
27 | This phase is to get to know the community better. Check that your build
28 | environment is setup. This time should also be used to discuss your project in
29 | more detail with the community and in general introduce it. 
30 | 
31 | *Note:* We require you to write regular blog posts. Now is a good time to make
32 | sure your blog works and send us the link.
33 | 
34 | ### **Phase 1**
35 | 
36 | Deliverables
37 | 
38 | ### **Phase 2**
39 | 
40 | Deliverables
41 | 
42 | ### **Final Week**
43 | 
44 | At this stage you should finish up your project. At this stage you should make
45 | sure that you have code submitted to your organization. Our criteria to mark
46 | your project as a success is to submit code before the end of GSoC.
47 | 
48 | ## Development Experience
49 | 
50 | Do you have code on github? Can you show previous contributions to other projects?
51 | Did you do other code related projects or university courses?
52 | 
53 | ## Other Experiences
54 | 
55 | 
56 | ## Why this project?
57 | 
58 | Why you want to do this project?
59 | 
60 | ## Appendix
61 | 
62 | Extra content
63 | 
64 | [wbs]: https://en.wikipedia.org/wiki/Work_breakdown_structure
65 | 


--------------------------------------------------------------------------------