├── .gitattributes ├── Dataset └── CSV files │ ├── RawData.7z.001 │ ├── RawData.7z.002 │ ├── RawData.7z.003 │ ├── RawData.7z.004 │ ├── RawData.7z.005 │ ├── RawData.7z.006 │ ├── RawData.7z.007 │ ├── RawData.7z.008 │ ├── RawData.7z.009 │ ├── RawData.7z.010 │ ├── RawData.7z.011 │ ├── RawData.7z.012 │ ├── RawData.7z.013 │ ├── RawData.7z.014 │ ├── RawData.7z.015 │ ├── RawData.7z.016 │ ├── RawData.7z.017 │ ├── RawData.7z.018 │ ├── RawData.7z.019 │ ├── RawData.7z.020 │ ├── RawData.7z.021 │ └── RawData.7z.022 ├── README.md ├── Scraper ├── App.config ├── JSONReader.cs ├── KragleCore.csproj ├── Program.cs ├── Properties │ └── AssemblyInfo.cs ├── PropertiesReader.cs ├── bin │ └── Release │ │ ├── Kragle.exe │ │ ├── Kragle.exe.config │ │ ├── Kragle.pdb │ │ ├── Scraper.exe │ │ ├── Scraper.pdb │ │ ├── System.Json.dll │ │ └── System.Json.xml ├── obj │ ├── Debug │ │ ├── DesignTimeResolveAssemblyReferencesInput.cache │ │ ├── TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs │ │ ├── TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs │ │ └── TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs │ └── Release │ │ ├── DesignTimeResolveAssemblyReferencesInput.cache │ │ ├── Kragle.exe │ │ ├── Kragle.pdb │ │ ├── KragleCore.csproj.FileListAbsolute.txt │ │ ├── KragleCore.csprojResolveAssemblyReference.cache │ │ ├── TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs │ │ ├── TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs │ │ └── TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs └── test.txt └── extreme Scratch projects ├── deadScriptsWithMoreThan70Lines.csv ├── projectsWithMoreThan100procedures.csv ├── projectsWithMoreThan20000LOC.csv ├── projectsWithMoreThan250Variables.csv ├── projectsWithMoreThan300Sprites.csv ├── recursiveProcedureCalls.csv ├── scriptsClonedMoreThan50Times.csv └── scriptsWithMoreThan100cyclomaticComplexity.csv /.gitattributes: -------------------------------------------------------------------------------- 1 | *.gz filter=lfs diff=lfs merge=lfs -text 2 | *.zip filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.001 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.002 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.003: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.003 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.004: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.004 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.005: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.005 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.006: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.006 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.007: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.007 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.008 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.009: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.009 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.010: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.010 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.011: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.011 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.012: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.012 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.013: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.013 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.014: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.014 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.015: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.015 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.016: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.016 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.017: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.017 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.018: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.018 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.019: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.019 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.020: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.020 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.021: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.021 -------------------------------------------------------------------------------- /Dataset/CSV files/RawData.7z.022: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUDelftScratchLab/ScratchDataset/7b1d636ac29fc4da3605874b9a56155a310017cb/Dataset/CSV files/RawData.7z.022 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository contains the dataset of 250K Scratch projects as described in paper [A Dataset of Scratch Programs: Scraped, Shaped and Scored](https://www.computer.org/csdl/proceedings/msr/2017/1544/00/07962409.pdf). 2 | 3 | The dataset is available as: 4 | * a MySQL database dump, in Gdrive: [/MySQL/](https://drive.google.com/open?id=1zzVzLvzXFYihVyTQaouIEuWMAMf57emY) 5 | * an SQL Server database backup, in Gdrive: [/SQLServer/](https://drive.google.com/open?id=1A0g3HcenH2ohruqDRwIQOJ0gUl7FCUHJ) 6 | * CSV files, in folder [/Dataset/CSV files/](Dataset/CSV%20files) or in Gdrive: [/CSV files/](https://drive.google.com/drive/folders/12L-ot-zOde35hViINe9wzTl9DkVTtDCs?usp=sharing). The version on Gdrive includes headers! 7 | 8 | 9 | 10 | The JSON files of the scraped Scratch projects are available in GDrive: https://drive.google.com/file/d/0B5RLHmerPR2SZ25XMWI5SGxhbTA/view?usp=sharing 11 | 12 | The source files of the scraping program that we used for obtaining this information from the Scratch website are in the [/Scraper](Scraper) folder. 13 | -------------------------------------------------------------------------------- /Scraper/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Scraper/JSONReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Json; 6 | using System.Linq; 7 | using System.Net; 8 | using Scraper; 9 | 10 | 11 | namespace Kragle 12 | { 13 | public class JSONReader 14 | { 15 | public class Script 16 | { 17 | public JsonArray Code; 18 | public string Scope; 19 | public string ScopeName; 20 | public string Location; 21 | public string ScriptId; 22 | public string ProgramId; 23 | 24 | public Script(JsonArray code, string scope, string scopeName, string location, string scriptId, string programId) 25 | { 26 | Code = code; 27 | Scope = scope; 28 | ScopeName = scopeName; 29 | Location = location; 30 | ScriptId = scriptId; 31 | ProgramId = programId; 32 | } 33 | } 34 | 35 | public static void ProcessJSON(string path) 36 | { 37 | DirectoryInfo d = new DirectoryInfo(path); 38 | 39 | FileInfo[] Files = d.GetFiles(); 40 | int i = 0; 41 | 42 | foreach (FileInfo file in Files) 43 | { 44 | int dot = file.Name.IndexOf("."); 45 | string id = file.Name.Substring(0, dot); 46 | 47 | string filename = file.FullName; 48 | 49 | System.IO.StreamReader fileRead = new System.IO.StreamReader(filename); 50 | string JSON = fileRead.ReadToEnd(); 51 | 52 | var allScripts = new List