├── .gitignore ├── JSONParser.php ├── README.md ├── convert-to-csv.php └── extract-fields.php /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | test* 3 | data/* 4 | fields/* 5 | output/* -------------------------------------------------------------------------------- /JSONParser.php: -------------------------------------------------------------------------------- 1 | filePath = $filePath; 10 | 11 | $this->calculateNoOfLines(); 12 | } 13 | 14 | public function convertToCSV($outputFilePath, $fields) { 15 | $fieldNames = array(); 16 | $this->prepareFieldNames($fieldNames, $fields); 17 | 18 | $outputHandle = fopen($outputFilePath, 'w'); 19 | fputcsv($outputHandle, $fieldNames); 20 | 21 | $inputHandle = fopen($this->filePath, "r") or die("Couldn't get handle"); 22 | 23 | $count = 0; 24 | while (!feof($inputHandle)) { 25 | $buffer = fgets($inputHandle); 26 | $record = json_decode($buffer, true); 27 | 28 | if ($record != null) { 29 | $csvRecord = array(); 30 | $this->prepareCSVRecord($csvRecord, $record, $fields); 31 | 32 | fputcsv($outputHandle, $csvRecord); 33 | } 34 | 35 | $count++; 36 | if ($count % 1000 == 0) { 37 | $this->showProgress($count); 38 | } 39 | } 40 | 41 | echo "Processed $count lines. 100%.\n\n"; 42 | echo "CSV file written to:\n\t $outputFilePath \n"; 43 | } 44 | 45 | public function getAllFields() { 46 | $fields = array(); 47 | $handle = fopen($this->filePath, "r") or die("Couldn't get handle"); 48 | if (!$handle) { 49 | die('Unexpected error'); 50 | } 51 | 52 | $count = 0; 53 | while (!feof($handle)) { 54 | $buffer = fgets($handle); 55 | $record = json_decode($buffer, true); 56 | 57 | if ($record != null) { 58 | $fields += $this->getKeys($record); 59 | } 60 | 61 | $count++; 62 | if ($count % 1000 == 0) { 63 | $this->showProgress($count); 64 | } 65 | } 66 | 67 | echo "Processed $count lines. 100%.\n\n"; 68 | 69 | return $fields; 70 | } 71 | 72 | private function prepareCSVRecord(&$csvRecord, $record, $fields) { 73 | foreach ($fields as $key => $value) { 74 | if ($value == null) { 75 | $csvValue = isset($record[$key]) ? $record[$key] : 'N/A'; 76 | $csvRecord[] = is_array($csvValue) ? implode($csvValue, ',') : $csvValue; 77 | } else { 78 | if (!isset($record[$key])) { 79 | // Because we need to iterate through every keys 80 | $record[$key] = array(); 81 | } 82 | $this->prepareCSVRecord($csvRecord, $record[$key], $fields[$key]); 83 | } 84 | } 85 | } 86 | 87 | private function prepareFieldNames(&$fieldNames, $fields, $prefix = '') { 88 | foreach ($fields as $key => $value) { 89 | if ($value == null) { 90 | $fieldNames[] = $prefix . $key; 91 | } else { 92 | $this->prepareFieldNames($fieldNames, $fields[$key], $prefix . $key . '.'); 93 | } 94 | } 95 | } 96 | 97 | private function isAssoc(array $arr) { 98 | if (array() === $arr) return false; 99 | return array_keys($arr) !== range(0, count($arr) - 1); 100 | } 101 | 102 | private function getKeys($arr) { 103 | $fields = array(); 104 | $keys = array_keys($arr); 105 | foreach ($keys as $key) { 106 | if (is_array($arr[$key]) && $this->isAssoc($arr[$key])) { 107 | $fields[$key] = $this->getKeys($arr[$key]); 108 | } else { 109 | $fields[$key] = null; 110 | } 111 | } 112 | 113 | return $fields; 114 | } 115 | 116 | /* 117 | |-------------------------------------------------------------------------- 118 | | OUTPUT UTILITIES 119 | |-------------------------------------------------------------------------- 120 | */ 121 | 122 | private function calculateNoOfLines() { 123 | // $output = exec('wc -l ' . $this->filePath); 124 | exec('wc -l ' . $this->filePath, $output, $return); 125 | if (!$return) { // Returns 0 if successfully ran 126 | preg_match('/\d+/', $output[0], $matches); 127 | $this->totalLines = intval($matches[0]); 128 | } 129 | 130 | } 131 | 132 | private function showProgress($count) { 133 | if ($this->totalLines != null) { 134 | $percent = ( $count / $this->totalLines ) * 100; 135 | $percent = number_format((float) $percent, 2, '.', ''); 136 | echo "Processed $count lines. $percent%.\n"; 137 | } else { 138 | echo "Processed $count lines.\n"; 139 | } 140 | 141 | flush(); 142 | } 143 | 144 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JSON to CSV converter 2 | Parse nested JSON file and convert to CSV; Convert Yelp dataset to JSON 3 | 4 | ## Usage 5 | We need to first populate all the possible fields (JSON keys and sub keys). Ideally, place your JSON file under `data` folder. 6 | 7 | ``` 8 | $ php extract-fields.php data/[file_name].json 9 | ``` 10 | This will generate `[file_name].json` under `fields` folder. This file will be used in the next step. If you want to change the order of the fields in the CSV file or remove some fields, you can modify this file. However you need to be careful not to break JSON syntax. 11 | 12 | Next, use `convert-to-csv.php` script to finally convert your JSON file to CSV. 13 | 14 | ``` 15 | $ php convert-to-csv.php data/[file_name].json 16 | ``` 17 | 18 | This will generate `[file_name].csv` file under `output` folder. 19 | -------------------------------------------------------------------------------- /convert-to-csv.php: -------------------------------------------------------------------------------- 1 | convertToCSV($outputFilePath, $fields); 27 | -------------------------------------------------------------------------------- /extract-fields.php: -------------------------------------------------------------------------------- 1 | getAllFields(); 15 | 16 | $fieldsJSON = json_encode($fields); 17 | 18 | // Write fields to the file 19 | $outputFilePath = 'fields/' . basename(__DIR__ . '/' . $filePath); 20 | $handle = fopen($outputFilePath, "w"); 21 | fwrite($handle, $fieldsJSON); 22 | fclose($handle); 23 | 24 | echo "Fields written to:\n\t $outputFilePath \n"; --------------------------------------------------------------------------------