├── .gitattributes
├── Dataset
└── CSV files
│ ├── RawData.7z.001
│ ├── RawData.7z.002
│ ├── RawData.7z.003
│ ├── RawData.7z.004
│ ├── RawData.7z.005
│ ├── RawData.7z.006
│ ├── RawData.7z.007
│ ├── RawData.7z.008
│ ├── RawData.7z.009
│ ├── RawData.7z.010
│ ├── RawData.7z.011
│ ├── RawData.7z.012
│ ├── RawData.7z.013
│ ├── RawData.7z.014
│ ├── RawData.7z.015
│ ├── RawData.7z.016
│ ├── RawData.7z.017
│ ├── RawData.7z.018
│ ├── RawData.7z.019
│ ├── RawData.7z.020
│ ├── RawData.7z.021
│ └── RawData.7z.022
├── README.md
├── Scraper
├── App.config
├── JSONReader.cs
├── KragleCore.csproj
├── Program.cs
├── Properties
│ └── AssemblyInfo.cs
├── PropertiesReader.cs
├── bin
│ └── Release
│ │ ├── Kragle.exe
│ │ ├── Kragle.exe.config
│ │ ├── Kragle.pdb
│ │ ├── Scraper.exe
│ │ ├── Scraper.pdb
│ │ ├── System.Json.dll
│ │ └── System.Json.xml
├── obj
│ ├── Debug
│ │ ├── DesignTimeResolveAssemblyReferencesInput.cache
│ │ ├── TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs
│ │ ├── TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs
│ │ └── TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs
│ └── Release
│ │ ├── DesignTimeResolveAssemblyReferencesInput.cache
│ │ ├── Kragle.exe
│ │ ├── Kragle.pdb
│ │ ├── KragleCore.csproj.FileListAbsolute.txt
│ │ ├── KragleCore.csprojResolveAssemblyReference.cache
│ │ ├── TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs
│ │ ├── TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs
│ │ └── TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs
└── test.txt
└── extreme Scratch projects
├── deadScriptsWithMoreThan70Lines.csv
├── projectsWithMoreThan100procedures.csv
├── projectsWithMoreThan20000LOC.csv
├── projectsWithMoreThan250Variables.csv
├── projectsWithMoreThan300Sprites.csv
├── recursiveProcedureCalls.csv
├── scriptsClonedMoreThan50Times.csv
└── scriptsWithMoreThan100cyclomaticComplexity.csv
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.gz filter=lfs diff=lfs merge=lfs -text
2 | *.zip filter=lfs diff=lfs merge=lfs -text
3 |
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.001
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.002
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.003:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.003
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.004:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.004
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.005:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.005
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.006:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.006
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.007:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.007
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.008
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.009:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.009
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.010:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.010
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.011:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.011
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.012:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.012
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.013:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.013
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.014:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.014
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.015:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.015
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.016:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.016
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.017:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.017
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.018:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.018
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.019:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.019
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.020:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.020
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.021:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.021
--------------------------------------------------------------------------------
/Dataset/CSV files/RawData.7z.022:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.022
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This repository contains the dataset of 250K Scratch projects as described in paper [A Dataset of Scratch Programs: Scraped, Shaped and Scored](https://www.computer.org/csdl/proceedings/msr/2017/1544/00/07962409.pdf).
2 |
3 | The dataset is available as:
4 | * a MySQL database dump, in Gdrive: [/MySQL/](https://drive.google.com/open?id=1zzVzLvzXFYihVyTQaouIEuWMAMf57emY)
5 | * an SQL Server database backup, in Gdrive: [/SQLServer/](https://drive.google.com/open?id=1A0g3HcenH2ohruqDRwIQOJ0gUl7FCUHJ)
6 | * CSV files, in folder [/Dataset/CSV files/](Dataset/CSV%20files) or in Gdrive: [/CSV files/](https://drive.google.com/drive/folders/12L-ot-zOde35hViINe9wzTl9DkVTtDCs?usp=sharing). The version on Gdrive includes headers!
7 |
8 |
9 |
10 | The JSON files of the scraped Scratch projects are available in GDrive: https://drive.google.com/file/d/0B5RLHmerPR2SZ25XMWI5SGxhbTA/view?usp=sharing
11 |
12 | The source files of the scraping program that we used for obtaining this information from the Scratch website are in the [/Scraper](Scraper) folder.
13 |
--------------------------------------------------------------------------------
/Scraper/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Scraper/JSONReader.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections;
3 | using System.Collections.Generic;
4 | using System.IO;
5 | using System.Json;
6 | using System.Linq;
7 | using System.Net;
8 | using Scraper;
9 |
10 |
11 | namespace Kragle
12 | {
13 | public class JSONReader
14 | {
15 | public class Script
16 | {
17 | public JsonArray Code;
18 | public string Scope;
19 | public string ScopeName;
20 | public string Location;
21 | public string ScriptId;
22 | public string ProgramId;
23 |
24 | public Script(JsonArray code, string scope, string scopeName, string location, string scriptId, string programId)
25 | {
26 | Code = code;
27 | Scope = scope;
28 | ScopeName = scopeName;
29 | Location = location;
30 | ScriptId = scriptId;
31 | ProgramId = programId;
32 | }
33 | }
34 |
35 | public static void ProcessJSON(string path)
36 | {
37 | DirectoryInfo d = new DirectoryInfo(path);
38 |
39 | FileInfo[] Files = d.GetFiles();
40 | int i = 0;
41 |
42 | foreach (FileInfo file in Files)
43 | {
44 | int dot = file.Name.IndexOf(".");
45 | string id = file.Name.Substring(0, dot);
46 |
47 | string filename = file.FullName;
48 |
49 | System.IO.StreamReader fileRead = new System.IO.StreamReader(filename);
50 | string JSON = fileRead.ReadToEnd();
51 |
52 | var allScripts = new List