├── .gitignore ├── src ├── proto │ ├── protoc-gen-php │ │ ├── .gitignore │ │ ├── php_options.proto │ │ ├── addressbook.proto │ │ ├── README │ │ ├── Makefile │ │ ├── test.php │ │ ├── protocolbuffers.inc.php │ │ ├── strutil.h │ │ ├── protoc-gen-php.cc │ │ └── strutil.cc │ ├── market.proto │ └── protocolbuffers.inc.php └── MarketSession.php ├── examples ├── local.php.example ├── test_categories.php ├── test_screenshot.php ├── test_top.php └── test_search.php ├── composer.json ├── .github └── FUNDING.yml ├── README.md ├── CITATION.cff └── LICENCE /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .project 3 | -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.pb.cc 3 | *.pb.h 4 | protoc-gen-php 5 | -------------------------------------------------------------------------------- /examples/local.php.example: -------------------------------------------------------------------------------- 1 | login(GOOGLE_EMAIL, GOOGLE_PASSWD); 11 | $session->setAndroidId(ANDROID_DEVICEID); 12 | sleep(1);#Reduce Throttling 13 | }catch(Exception $e){ 14 | echo "Exception: ".$e->getMessage()."\n"; 15 | echo "ERROR: cannot login as " . GOOGLE_EMAIL; 16 | exit(1); 17 | } 18 | 19 | $cr = new CategoriesRequest(); 20 | $reqGroup = new Request_RequestGroup(); 21 | $reqGroup->setCategoriesRequest($cr); 22 | 23 | //Fetch Request 24 | try{ 25 | $response = $session->execute($reqGroup); 26 | }catch(Exception $e){ 27 | echo "Exception: ".$e->getMessage(); 28 | } 29 | 30 | $groups = $response->getResponsegroupArray(); 31 | foreach ($groups as $rg) { 32 | $categoriesResponse = $rg->getCategoriesResponse(); 33 | $categories = $categoriesResponse->getCategoriesArray(); 34 | foreach ($categories as $category) { 35 | echo $category->getTitle()."
"; 36 | 37 | $subcategories = $category->getSubCategoriesArray(); 38 | foreach ($subcategories as $subcategory) { 39 | echo "- ".$subcategory->getTitle()."
"; 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /examples/test_screenshot.php: -------------------------------------------------------------------------------- 1 | login(GOOGLE_EMAIL, GOOGLE_PASSWD); 11 | $session->setAndroidId(ANDROID_DEVICEID); 12 | sleep(1);#Reduce Throttling 13 | }catch(Exception $e){ 14 | echo "Exception: ".$e->getMessage()."\n"; 15 | echo "ERROR: cannot login as " . GOOGLE_EMAIL; 16 | exit(1); 17 | } 18 | 19 | //Build Request 20 | $appId = "7059973813889603239"; 21 | $imageId = 1; 22 | $gir = new GetImageRequest(); 23 | $gir->setImageUsage(GetImageRequest_AppImageUsage::SCREENSHOT); 24 | $gir->setAppId($appId); 25 | $gir->setImageId($imageId); 26 | 27 | $reqGroup = new Request_RequestGroup(); 28 | $reqGroup->setImageRequest($gir); 29 | 30 | //Fetch Request 31 | try{ 32 | $response = $session->execute($reqGroup); 33 | }catch(Exception $e){ 34 | echo "Exception: ".$e->getMessage(); 35 | } 36 | 37 | //Loop And Display 38 | $groups = $response->getResponsegroupArray(); 39 | #echo "".print_r($groups, true).""; 40 | foreach ($groups as $rg) { 41 | $imageResponse = $rg->getImageResponse(); 42 | file_put_contents("../".$appId."_".$imageId.".png", $imageResponse->getImageData()); 43 | 44 | ?>"> temp.hd; \ 56 | done ; 57 | -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/test.php: -------------------------------------------------------------------------------- 1 | "\x01", 7 | 2 => "\x02", 8 | 127 => "\x7F", 9 | 128 => "\x80\x01", 10 | 300 => "\xAC\x02", 11 | ); 12 | 13 | function test_varint() { 14 | global $varint_tests; 15 | 16 | $fp = fopen('php://memory', 'r+b'); 17 | if ($fp === false) 18 | exit('Unable to open stream'); 19 | 20 | foreach ($varint_tests as $i => $enc) { 21 | 22 | // Write the answer into the buffer 23 | fseek($fp, 0, SEEK_SET); 24 | fwrite($fp, $enc); 25 | fseek($fp, 0, SEEK_SET); 26 | 27 | $a = Protobuf::read_varint($fp); 28 | if ($a != $i) 29 | exit("Failed to decode varint($i) got $a\n"); 30 | 31 | $len = Protobuf::write_varint($fp, $i); 32 | fseek($fp, 0, SEEK_SET); 33 | $b = fread($fp, $len); 34 | if ($b != $enc) 35 | exit("Failed to encode varint($i)\n"); 36 | 37 | $len = Protobuf::size_varint($i); 38 | 39 | echo "$i len($len) OK\n"; 40 | } 41 | fclose($fp); 42 | } 43 | test_varint(); 44 | */ 45 | 46 | if ($argc > 1) { 47 | $test = $argv[1]; 48 | require("$test.php"); 49 | 50 | if ($test == 'addressbook.proto') { 51 | $fp = fopen('test.book', 'rb'); 52 | 53 | $m = new tutorial_AddressBook($fp); 54 | 55 | var_dump($m); 56 | 57 | fclose($fp); 58 | 59 | } else if ($test == 'market.proto') { 60 | //$fp = fopen('market2-in-1.dec', 'rb'); 61 | $fp = fopen('market2-in-2.dec', 'rb'); 62 | //$fp = fopen('temp', 'rb'); 63 | 64 | $m = new Response($fp); 65 | 66 | echo $m; 67 | 68 | //$mem = fopen('php://memory', 'wb'); 69 | $mem = fopen('temp', 'wb'); 70 | if ($mem === false) 71 | exit('Unable to open output stream'); 72 | 73 | $s = fstat($fp); 74 | echo 'File size: ' . $s['size'] . "\n"; 75 | echo 'Guested size: ' . $m->size() . "\n"; 76 | $m->write($mem); 77 | echo 'Write size: ' . ftell($mem) . "\n"; 78 | 79 | fclose($mem); 80 | fclose($fp); 81 | } 82 | } 83 | 84 | ?> 85 | -------------------------------------------------------------------------------- /examples/test_top.php: -------------------------------------------------------------------------------- 1 | login(GOOGLE_EMAIL, GOOGLE_PASSWD); 11 | $session->setAndroidId(ANDROID_DEVICEID); 12 | sleep(1);#Reduce Throttling 13 | }catch(Exception $e){ 14 | echo "Exception: ".$e->getMessage()."\n"; 15 | echo "ERROR: cannot login as " . GOOGLE_EMAIL; 16 | exit(1); 17 | } 18 | 19 | //Build Request 20 | $ar = new AppsRequest(); 21 | $ar->setOrderType(AppsRequest_OrderType::POPULAR); 22 | $ar->setStartIndex(0); 23 | $ar->setEntriesCount(5); 24 | $ar->setViewType(AppsRequest_ViewType::PAID); 25 | $ar->setCategoryId("ARCADE"); 26 | $reqGroup = new Request_RequestGroup(); 27 | $reqGroup->setAppsRequest($ar); 28 | 29 | //Fetch Request 30 | try{ 31 | $response = $session->execute($reqGroup); 32 | }catch(Exception $e){ 33 | echo "Exception: ".$e->getMessage(); 34 | } 35 | 36 | //Loop And Display 37 | $groups = $response->getResponsegroupArray(); 38 | foreach ($groups as $rg) { 39 | $appsResponse = $rg->getAppsResponse(); 40 | $apps = $appsResponse->getAppArray(); 41 | foreach ($apps as $app) { 42 | echo $app->getTitle()." (".$app->getId().")
"; 43 | 44 | //Get comments 45 | echo "
"; 46 | $cr = new CommentsRequest(); 47 | $cr->setAppId($app->getId()); 48 | $cr->setEntriesCount(3); 49 | 50 | $reqGroup = new Request_RequestGroup(); 51 | $reqGroup->setCommentsRequest($cr); 52 | 53 | $response = $session->execute($reqGroup); 54 | $groups = $response->getResponsegroupArray(); 55 | foreach ($groups as $rg) { 56 | $commentsResponse = $rg->getCommentsResponse(); 57 | 58 | $comments = $commentsResponse->getCommentsArray(); 59 | foreach ($comments as $comment) { 60 | echo "".$comment->getAuthorName()." [".str_repeat("*", $comment->getRating())."]
"; 61 | echo $comment->getText()."

"; 62 | } 63 | } 64 | 65 | echo "
"; 66 | } 67 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Android Market API (PHP) 2 | 3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14769247.svg)](https://doi.org/10.5281/zenodo.14769247) 4 | 5 | Un-official PHP client for the (legacy) Android Market / Google Play Store — first released in 2012 and still used for archival research, market-data collection, and automation pipelines. 6 | 7 | --- 8 | 9 | ## Table of Contents 10 | 1. [Overview](#overview) 11 | 2. [Connection Settings & Troubleshooting](#connection-settings--troubleshooting) 12 | 3. [Examples](#examples) 13 | 4. [How to Cite](#how-to-cite) 14 | 5. [Issues & Support](#issues--support) 15 | 6. [Credits](#credits) 16 | 7. [License](#license) 17 | 18 | --- 19 | 20 | ## Overview 21 | This library exposes most of the original Android Market RPCs (login, 22 | app details, search, downloads) to PHP. **Google has never released an 23 | official Play Store API**, so this code relies on reverse-engineered 24 | protobuf calls that still work for many use cases. 25 | 26 | --- 27 | 28 | ## Connection Settings & Troubleshooting 29 | Configure **`examples/local.php`** with: 30 | 31 | | Setting | Notes | 32 | |---------|-------| 33 | | **Google Account (`USERNAME`, `PASSWORD`)** | Use an _App Password_ from a Google account with 2-step verification to reduce CAPTCHA blocks. | 34 | | **Android Device ID** | Retrieve via the free app . | 35 | | **Rate-Limiting** | The Play backend will 403/400 if you spam requests. Insert `sleep()` between calls. | 36 | | **CAPTCHA Unlock** | Log in to the same account in a browser and visit . | 37 | 38 | --- 39 | 40 | ## Examples 41 | See code for examples 42 | 43 | --- 44 | 45 | ## How to Cite 46 | If this software was helpful in your research, please cite **version v1**: 47 | 48 | ```bibtex 49 | @software{Koc_2025_android_market_api_php, 50 | author = {Vincent Koc}, 51 | title = {{Android Market API (PHP)}}, 52 | version = {v1}, 53 | year = {2025}, 54 | doi = {10.5281/zenodo.14769247}, 55 | url = {https://doi.org/10.5281/zenodo.14769247} 56 | } 57 | -------------------------------------------------------------------------------- /examples/test_search.php: -------------------------------------------------------------------------------- 1 | login(GOOGLE_EMAIL, GOOGLE_PASSWD); 11 | $session->setAndroidId(ANDROID_DEVICEID); 12 | sleep(1);#Reduce Throttling 13 | }catch(Exception $e){ 14 | echo "Exception: ".$e->getMessage()."\n"; 15 | echo "ERROR: cannot login as " . GOOGLE_EMAIL; 16 | exit(1); 17 | } 18 | 19 | //Build Request 20 | $ar = new AppsRequest(); 21 | $ar->setQuery($_GET["search"] ? $_GET["search"] : "froyo"); 22 | #$ar->setOrderType(AppsRequest_OrderType::NONE); 23 | $ar->setStartIndex(0); 24 | $ar->setEntriesCount(5); 25 | 26 | $ar->setWithExtendedInfo(true); 27 | #$ar->setViewType(AppsRequest_ViewType::PAID); 28 | #$ar->setAppType(AppType::WALLPAPER); 29 | 30 | $reqGroup = new Request_RequestGroup(); 31 | $reqGroup->setAppsRequest($ar); 32 | 33 | //Fetch Request 34 | try{ 35 | $response = $session->execute($reqGroup); 36 | }catch(Exception $e){ 37 | echo "Exception: ".$e->getMessage(); 38 | } 39 | 40 | //Loop And Display 41 | $groups = $response->getResponsegroupArray(); 42 | foreach ($groups as $rg) { 43 | $appsResponse = $rg->getAppsResponse(); 44 | $apps = $appsResponse->getAppArray(); 45 | foreach ($apps as $app) { 46 | echo $app->getTitle()." (".$app->getId().")
"; 47 | echo $app->getExtendedInfo()->getDescription()."

"; 48 | 49 | //Get comments 50 | echo "
"; 51 | $cr = new CommentsRequest(); 52 | $cr->setAppId($app->getId()); 53 | $cr->setEntriesCount(3); 54 | 55 | $reqGroup = new Request_RequestGroup(); 56 | $reqGroup->setCommentsRequest($cr); 57 | 58 | $response = $session->execute($reqGroup); 59 | $groups = $response->getResponsegroupArray(); 60 | foreach ($groups as $rg) { 61 | $commentsResponse = $rg->getCommentsResponse(); 62 | 63 | $comments = $commentsResponse->getCommentsArray(); 64 | foreach ($comments as $comment) { 65 | echo "".$comment->getAuthorName()." [".str_repeat("*", $comment->getRating())."]
"; 66 | echo $comment->getText()."

"; 67 | } 68 | } 69 | 70 | echo "
"; 71 | } 72 | } -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # ============================================================ 2 | # CITATION.cff ― Android Market API (PHP) 3 | # Format-spec: Citation File Format v1.2.0 4 | # ============================================================ 5 | 6 | cff-version: "1.2.0" 7 | message: "If you use this software, please cite it using the metadata below." 8 | 9 | title: "Android Market API (PHP)" 10 | version: "v1" 11 | doi: 10.5281/zenodo.14769247 # DOI for this specific release 12 | date-released: 2025-01-30 13 | 14 | type: software 15 | url: "https://github.com/splitfeed/android-market-api-php" 16 | repository: "https://github.com/splitfeed/android-market-api-php" 17 | repository-code: "https://github.com/splitfeed/android-market-api-php" 18 | 19 | authors: 20 | - given-names: Vincent 21 | family-names: Koc 22 | name: "Vincent Koc" 23 | affiliation: "Hyperthink LLC" 24 | 25 | # ------------------------------------------------------------------ 26 | # Alternate / historical identifiers (helps citation crawlers merge) 27 | # ------------------------------------------------------------------ 28 | identifiers: 29 | - type: doi 30 | value: 10.5281/zenodo.14769246 # concept DOI – all versions 31 | description: "DOI representing all versions (latest resolves here)" 32 | - type: doi 33 | value: 10.5281/zenodo.14769247 # version-specific DOI (v1) 34 | description: "DOI for version v1" 35 | - type: url 36 | value: "https://github.com/splitfeed/android-market-api-php" 37 | description: "Primary GitHub repository (current)" 38 | - type: url 39 | value: "https://github.com/ondervincentkoc/android-market-api-php" 40 | description: "Original GitHub location (2012 – 2018)" 41 | - type: url 42 | value: "https://code.google.com/archive/p/android-market-api-php/" 43 | description: "Google Code read-only mirror (legacy)" 44 | - type: url 45 | value: "http://android-market-api-php.googlecode.com/files/" 46 | description: "Legacy direct-download directory referenced in early blogs" 47 | 48 | keywords: 49 | - Android 50 | - Google Play 51 | - PHP 52 | - API 53 | - Software 54 | 55 | abstract: | 56 | PHP implementation of the (legacy) Android Market / Google Play API, enabling 57 | programmatic access to app metadata, purchase history, and APK downloads 58 | without relying on the official SDK. First released in 2012; still used in 59 | archival research, market-data collection, and automation pipelines. 60 | 61 | preferred-citation: 62 | type: software 63 | title: "Android Market API (PHP)" 64 | version: "v1" 65 | doi: 10.5281/zenodo.14769247 66 | url: "https://doi.org/10.5281/zenodo.14769247" 67 | authors: 68 | - given-names: Vincent 69 | family-names: Koc 70 | date-released: 2025-01-30 71 | -------------------------------------------------------------------------------- /src/proto/market.proto: -------------------------------------------------------------------------------- 1 | option java_package = "com.gc.android.market.api.model"; 2 | 3 | enum AppType { 4 | NONE = 0; 5 | APPLICATION = 1; 6 | RINGTONE = 2; 7 | WALLPAPER = 3; 8 | GAME = 4; 9 | } 10 | 11 | message AppsRequest { 12 | optional AppType appType = 1; 13 | optional string query = 2; 14 | optional string categoryId = 3; 15 | optional string appId = 4; 16 | optional bool withExtendedInfo = 6; 17 | 18 | enum OrderType { 19 | NONE = 0; 20 | POPULAR = 1; 21 | NEWEST = 2; 22 | FEATURED = 3; 23 | } 24 | 25 | enum ViewType { 26 | ALL = 0; 27 | FREE = 1; 28 | PAID = 2; 29 | } 30 | 31 | optional OrderType orderType = 7 [default = NONE]; 32 | optional uint64 startIndex = 8; 33 | optional int32 entriesCount = 9; 34 | optional ViewType viewType = 10 [default = ALL]; 35 | } 36 | 37 | 38 | message AppsResponse { 39 | repeated App app = 1; 40 | optional int32 entriesCount = 2; 41 | } 42 | 43 | message Category { 44 | optional int32 appType = 2; 45 | optional string title = 4; 46 | optional string categoryId = 3; 47 | optional string subtitle = 5; 48 | repeated Category subCategories = 8; 49 | } 50 | 51 | message CommentsRequest { 52 | optional string appId = 1; 53 | optional int32 startIndex = 2; 54 | optional int32 entriesCount = 3; 55 | } 56 | 57 | message CommentsResponse { 58 | repeated Comment comments = 1; 59 | optional int32 entriesCount = 2; 60 | } 61 | 62 | message App { 63 | optional string id = 1; 64 | optional string title = 2; 65 | 66 | optional AppType appType = 3 [default = NONE]; 67 | 68 | optional string creator = 4; 69 | optional string version = 5; 70 | optional string price = 6; 71 | optional string rating = 7; 72 | optional int32 ratingsCount = 8; 73 | 74 | optional group ExtendedInfo = 12 { 75 | optional string description = 13; 76 | optional int32 downloadsCount = 14; 77 | repeated string permissionId = 15; 78 | optional int32 installSize = 16; 79 | optional string packageName = 17; 80 | optional string category = 18; 81 | optional string contactEmail = 20; 82 | optional string downloadsCountText = 23; 83 | optional string contactPhone = 26; 84 | optional string contactWebsite = 27; 85 | 86 | // V2 87 | optional int32 screenshotsCount = 30; 88 | optional string promoText = 31; 89 | 90 | // V3 91 | optional string recentChanges = 38; 92 | optional string promotionalVideo = 43; 93 | } 94 | 95 | optional string creatorId = 22; 96 | optional string packageName = 24; 97 | optional int32 versionCode = 25; 98 | //V2 99 | optional string priceCurrency = 32; 100 | optional int32 priceMicros = 33; 101 | } 102 | 103 | message Comment { 104 | optional string text = 1; 105 | optional int32 rating = 2; 106 | optional string authorName = 3; 107 | optional uint64 creationTime = 4; 108 | optional string authorId = 5; 109 | } 110 | 111 | message CategoriesRequest { 112 | } 113 | 114 | message CategoriesResponse { 115 | repeated Category categories = 1; 116 | } 117 | 118 | message SubCategoriesRequest { 119 | optional AppType appType = 1; 120 | } 121 | 122 | message SubCategoriesResponse { 123 | repeated Category category = 1; 124 | optional string subCategoryDisplay = 2; 125 | optional int32 subCategoryId = 3; 126 | } 127 | 128 | // operator : http://www.2030.tk/wiki/Android_market_switch 129 | // operatorNumeric : see http://en.wikipedia.org/wiki/Mobile_Network_Code 130 | message RequestContext { 131 | required string authSubToken = 1; // authsub token for service 'android' 132 | required int32 unknown1 = 2; // always 0 133 | required int32 version = 3; // always 1002 134 | required string androidId = 4; // android id converted to hexadecimal 135 | optional string deviceAndSdkVersion = 5; // ro.product.device ':' ro.build.version.sdk 136 | optional string userLanguage = 6; // ro.product.locale.language 137 | optional string userCountry = 7; // ro.product.locale.region 138 | optional string operatorAlpha = 8; // gsm.operator.alpha 139 | optional string simOperatorAlpha = 9; // gsm.sim.operator.alpha 140 | optional string operatorNumeric = 10; // gsm.operator.numeric 141 | optional string simOperatorNumeric = 11; // sim.gsm.operator.numeric 142 | } 143 | 144 | message GetImageRequest { 145 | optional string appId = 1; 146 | enum AppImageUsage { 147 | ICON = 0; 148 | SCREENSHOT = 1; 149 | SCREENSHOT_THUMBNAIL = 2; 150 | PROMO_BADGE = 3; 151 | BILING_ICON = 4; 152 | } 153 | optional AppImageUsage imageUsage = 3; 154 | optional string imageId = 4; // 0 or 1 155 | } 156 | 157 | message GetImageResponse { 158 | optional bytes imageData = 1; 159 | } 160 | 161 | message Request { 162 | optional RequestContext context = 1; 163 | repeated group RequestGroup = 2 { 164 | optional AppsRequest appsRequest = 4; 165 | optional CommentsRequest commentsRequest = 5; 166 | optional GetImageRequest imageRequest = 11; 167 | optional SubCategoriesRequest subCategoriesRequest = 14; 168 | optional CategoriesRequest categoriesRequest = 21; 169 | } 170 | } 171 | 172 | message ResponseContext { 173 | optional int32 result = 1; 174 | optional int32 unknown1 = 2; 175 | optional string unknown2 = 3; 176 | optional int32 unknown3 = 4; 177 | } 178 | 179 | message Response { 180 | repeated group ResponseGroup = 1 { 181 | optional ResponseContext context = 2; 182 | optional AppsResponse appsResponse = 3; 183 | optional CommentsResponse commentsResponse = 4; 184 | optional GetImageResponse imageResponse = 10; 185 | optional CategoriesResponse categoriesResponse = 20; 186 | optional SubCategoriesResponse subCategoriesResponse = 13; 187 | } 188 | } -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/protocolbuffers.inc.php: -------------------------------------------------------------------------------- 1 | read($fp, $limit); 26 | if (isset($str)) 27 | fclose($fp); 28 | } 29 | } 30 | } 31 | 32 | /** 33 | * Class to aid in the parsing and creating of Protocol Buffer Messages 34 | * This class should be included by the developer before they use a 35 | * generated protobuf class. 36 | * 37 | * @author Andrew Brampton 38 | * 39 | */ 40 | class Protobuf { 41 | 42 | const TYPE_DOUBLE = 1; // double, exactly eight bytes on the wire. 43 | const TYPE_FLOAT = 2; // float, exactly four bytes on the wire. 44 | const TYPE_INT64 = 3; // int64, varint on the wire. Negative numbers 45 | // take 10 bytes. Use TYPE_SINT64 if negative 46 | // values are likely. 47 | const TYPE_UINT64 = 4; // uint64, varint on the wire. 48 | const TYPE_INT32 = 5; // int32, varint on the wire. Negative numbers 49 | // take 10 bytes. Use TYPE_SINT32 if negative 50 | // values are likely. 51 | const TYPE_FIXED64 = 6; // uint64, exactly eight bytes on the wire. 52 | const TYPE_FIXED32 = 7; // uint32, exactly four bytes on the wire. 53 | const TYPE_BOOL = 8; // bool, varint on the wire. 54 | const TYPE_STRING = 9; // UTF-8 text. 55 | const TYPE_GROUP = 10; // Tag-delimited message. Deprecated. 56 | const TYPE_MESSAGE = 11; // Length-delimited message. 57 | 58 | const TYPE_BYTES = 12; // Arbitrary byte array. 59 | const TYPE_UINT32 = 13; // uint32, varint on the wire 60 | const TYPE_ENUM = 14; // Enum, varint on the wire 61 | const TYPE_SFIXED32 = 15; // int32, exactly four bytes on the wire 62 | const TYPE_SFIXED64 = 16; // int64, exactly eight bytes on the wire 63 | const TYPE_SINT32 = 17; // int32, ZigZag-encoded varint on the wire 64 | const TYPE_SINT64 = 18; // int64, ZigZag-encoded varint on the wire 65 | 66 | /** 67 | * Returns a string representing this wiretype 68 | */ 69 | public static function get_wiretype($wire_type) { 70 | switch ($wire_type) { 71 | case 0: return 'varint'; 72 | case 1: return '64-bit'; 73 | case 2: return 'length-delimited'; 74 | case 3: return 'group start'; 75 | case 4: return 'group end'; 76 | case 5: return '32-bit'; 77 | default: return 'unknown'; 78 | } 79 | } 80 | 81 | /** 82 | * Returns how big (in bytes) this number would be as a varint 83 | */ 84 | public static function size_varint($i) { 85 | /* $len = 0; 86 | do { 87 | $i = $i >> 7; 88 | $len++; 89 | } while ($i != 0); 90 | return $len; 91 | */ 92 | // TODO Change to a binary search 93 | if ($i < 0x80) 94 | return 1; 95 | if ($i < 0x4000) 96 | return 2; 97 | if ($i < 0x200000) 98 | return 3; 99 | if ($i < 0x10000000) 100 | return 4; 101 | if ($i < 0x800000000) 102 | return 5; 103 | if ($i < 0x40000000000) 104 | return 6; 105 | if ($i < 0x2000000000000) 106 | return 7; 107 | if ($i < 0x100000000000000) 108 | return 8; 109 | if ($i < 0x8000000000000000) 110 | return 9; 111 | } 112 | 113 | /** 114 | * Tries to read a varint from $fp. 115 | * @returns the Varint from the stream, or false if the stream has reached eof. 116 | */ 117 | public static function read_varint($fp, &$limit = null) { 118 | $value = ''; 119 | $len = 0; 120 | do { // Keep reading until we find the last byte 121 | $b = fread($fp, 1); 122 | if ($b === false) 123 | throw new Exception("read_varint(): Error reading byte"); 124 | if (strlen($b) < 1) 125 | break; 126 | 127 | $value .= $b; 128 | $len++; 129 | } while ($b >= "\x80"); 130 | 131 | if ($len == 0) { 132 | if (feof($fp)) 133 | return false; 134 | throw new Exception("read_varint(): Error reading byte"); 135 | } 136 | 137 | if ($limit !== null) 138 | $limit -= $len; 139 | 140 | $i = 0; 141 | $shift = 0; 142 | for ($j = 0; $j < $len; $j++) { 143 | $i |= ((ord($value[$j]) & 0x7F) << $shift); 144 | $shift += 7; 145 | } 146 | 147 | return $i; 148 | } 149 | 150 | public static function read_double($fp){throw "I've not coded it yet Exception";} 151 | public static function read_float ($fp){throw "I've not coded it yet Exception";} 152 | public static function read_uint64($fp){throw "I've not coded it yet Exception";} 153 | public static function read_int64 ($fp){throw "I've not coded it yet Exception";} 154 | public static function read_uint32($fp){throw "I've not coded it yet Exception";} 155 | public static function read_int32 ($fp){throw "I've not coded it yet Exception";} 156 | public static function read_zint32($fp){throw "I've not coded it yet Exception";} 157 | public static function read_zint64($fp){throw "I've not coded it yet Exception";} 158 | 159 | /** 160 | * Writes a varint to $fp 161 | * returns the number of bytes written 162 | * @param $fp 163 | * @param $i The int to encode 164 | * @return The number of bytes written 165 | */ 166 | public static function write_varint($fp, $i) { 167 | $len = 0; 168 | do { 169 | $v = $i & 0x7F; 170 | $i = $i >> 7; 171 | 172 | if ($i != 0) 173 | $v |= 0x80; 174 | 175 | if (fwrite($fp, chr($v)) !== 1) 176 | throw new Exception("write_varint(): Error writing byte"); 177 | 178 | $len++; 179 | } while ($i != 0); 180 | 181 | return $len; 182 | } 183 | 184 | public static function write_double($fp, $d){throw "I've not coded it yet Exception";} 185 | public static function write_float ($fp, $f){throw "I've not coded it yet Exception";} 186 | public static function write_uint64($fp, $i){throw "I've not coded it yet Exception";} 187 | public static function write_int64 ($fp, $i){throw "I've not coded it yet Exception";} 188 | public static function write_uint32($fp, $i){throw "I've not coded it yet Exception";} 189 | public static function write_int32 ($fp, $i){throw "I've not coded it yet Exception";} 190 | public static function write_zint32($fp, $i){throw "I've not coded it yet Exception";} 191 | public static function write_zint64($fp, $i){throw "I've not coded it yet Exception";} 192 | 193 | /** 194 | * Seek past a varint 195 | */ 196 | public static function skip_varint($fp) { 197 | $len = 0; 198 | do { // Keep reading until we find the last byte 199 | $b = fread($fp, 1); 200 | if ($b === false) 201 | throw new Exception("skip(varint): Error reading byte"); 202 | $len++; 203 | } while ($b >= "\x80"); 204 | return $len; 205 | } 206 | 207 | /** 208 | * Seek past the current field 209 | */ 210 | public static function skip_field($fp, $wire_type) { 211 | switch ($wire_type) { 212 | case 0: // varint 213 | return Protobuf::skip_varint($fp); 214 | 215 | case 1: // 64bit 216 | if (fseek($fp, 8, SEEK_CUR) === -1) 217 | throw new Exception('skip(' . ProtoBuf::get_wiretype(1) . '): Error seeking'); 218 | return 8; 219 | 220 | case 2: // length delimited 221 | $varlen = 0; 222 | $len = Protobuf::read_varint($fp, $varlen); 223 | if (fseek($fp, $len, SEEK_CUR) === -1) 224 | throw new Exception('skip(' . ProtoBuf::get_wiretype(2) . '): Error seeking'); 225 | return $len - $varlen; 226 | 227 | //case 3: // Start group TODO we must keep looping until we find the closing end grou 228 | 229 | //case 4: // End group - We should never skip a end group! 230 | // return 0; // Do nothing 231 | 232 | case 5: // 32bit 233 | if (fseek($fp, 4, SEEK_CUR) === -1) 234 | throw new Exception('skip('. ProtoBuf::get_wiretype(5) . '): Error seeking'); 235 | return 4; 236 | 237 | default: 238 | throw new Exception('skip('. ProtoBuf::get_wiretype($wire_type) . '): Unsupported wire_type'); 239 | } 240 | } 241 | 242 | /** 243 | * Read a unknown field from the stream and return its raw bytes 244 | */ 245 | public static function read_field($fp, $wire_type, &$limit = null) { 246 | switch ($wire_type) { 247 | case 0: // varint 248 | return Protobuf::read_varint($fp, $limit); 249 | 250 | case 1: // 64bit 251 | $limit -= 8; 252 | return fread($fp, 8); 253 | 254 | case 2: // length delimited 255 | $len = Protobuf::read_varint($fp, $limit); 256 | $limit -= $len; 257 | return fread($fp, $len); 258 | 259 | //case 3: // Start group TODO we must keep looping until we find the closing end grou 260 | 261 | //case 4: // End group - We should never skip a end group! 262 | // return 0; // Do nothing 263 | 264 | case 5: // 32bit 265 | $limit -= 4; 266 | return fread($fp, 4); 267 | 268 | default: 269 | throw new Exception('read_unknown('. ProtoBuf::get_wiretype($wire_type) . '): Unsupported wire_type'); 270 | } 271 | } 272 | 273 | /** 274 | * Used to aid in pretty printing of Protobuf objects 275 | */ 276 | private static $print_depth = 0; 277 | private static $indent_char = "\t"; 278 | private static $print_limit = 50; 279 | 280 | public static function toString($key, $value) { 281 | if (is_null($value)) 282 | return; 283 | $ret = str_repeat(self::$indent_char, self::$print_depth) . "$key=>"; 284 | if (is_array($value)) { 285 | $ret .= "array(\n"; 286 | self::$print_depth++; 287 | foreach($value as $i => $v) 288 | $ret .= self::toString("[$i]", $v); 289 | self::$print_depth--; 290 | $ret .= str_repeat(self::$indent_char, self::$print_depth) . ")\n"; 291 | } else { 292 | if (is_object($value)) { 293 | self::$print_depth++; 294 | $ret .= get_class($value) . "(\n"; 295 | $ret .= $value->__toString() . "\n"; 296 | self::$print_depth--; 297 | $ret .= str_repeat(self::$indent_char, self::$print_depth) . ")\n"; 298 | } elseif (is_string($value)) { 299 | $safevalue = addcslashes($value, "\0..\37\177..\377"); 300 | if (strlen($safevalue) > self::$print_limit) { 301 | $safevalue = substr($safevalue, 0, self::$print_limit) . '...'; 302 | } 303 | 304 | $ret .= '"' . $safevalue . '" (' . strlen($value) . " bytes)\n"; 305 | 306 | } elseif (is_bool($value)) { 307 | $ret .= ($value ? 'true' : 'false') . "\n"; 308 | } else { 309 | $ret .= (string)$value . "\n"; 310 | } 311 | } 312 | return $ret; 313 | } 314 | } 315 | ?> 316 | -------------------------------------------------------------------------------- /src/proto/protocolbuffers.inc.php: -------------------------------------------------------------------------------- 1 | read($fp, $limit); 26 | if (isset($str)) 27 | fclose($fp); 28 | } 29 | } 30 | } 31 | 32 | /** 33 | * Class to aid in the parsing and creating of Protocol Buffer Messages 34 | * This class should be included by the developer before they use a 35 | * generated protobuf class. 36 | * 37 | * @author Andrew Brampton 38 | * 39 | */ 40 | class Protobuf { 41 | 42 | const TYPE_DOUBLE = 1; // double, exactly eight bytes on the wire. 43 | const TYPE_FLOAT = 2; // float, exactly four bytes on the wire. 44 | const TYPE_INT64 = 3; // int64, varint on the wire. Negative numbers 45 | // take 10 bytes. Use TYPE_SINT64 if negative 46 | // values are likely. 47 | const TYPE_UINT64 = 4; // uint64, varint on the wire. 48 | const TYPE_INT32 = 5; // int32, varint on the wire. Negative numbers 49 | // take 10 bytes. Use TYPE_SINT32 if negative 50 | // values are likely. 51 | const TYPE_FIXED64 = 6; // uint64, exactly eight bytes on the wire. 52 | const TYPE_FIXED32 = 7; // uint32, exactly four bytes on the wire. 53 | const TYPE_BOOL = 8; // bool, varint on the wire. 54 | const TYPE_STRING = 9; // UTF-8 text. 55 | const TYPE_GROUP = 10; // Tag-delimited message. Deprecated. 56 | const TYPE_MESSAGE = 11; // Length-delimited message. 57 | 58 | const TYPE_BYTES = 12; // Arbitrary byte array. 59 | const TYPE_UINT32 = 13; // uint32, varint on the wire 60 | const TYPE_ENUM = 14; // Enum, varint on the wire 61 | const TYPE_SFIXED32 = 15; // int32, exactly four bytes on the wire 62 | const TYPE_SFIXED64 = 16; // int64, exactly eight bytes on the wire 63 | const TYPE_SINT32 = 17; // int32, ZigZag-encoded varint on the wire 64 | const TYPE_SINT64 = 18; // int64, ZigZag-encoded varint on the wire 65 | 66 | /** 67 | * Returns a string representing this wiretype 68 | */ 69 | public static function get_wiretype($wire_type) { 70 | switch ($wire_type) { 71 | case 0: return 'varint'; 72 | case 1: return '64-bit'; 73 | case 2: return 'length-delimited'; 74 | case 3: return 'group start'; 75 | case 4: return 'group end'; 76 | case 5: return '32-bit'; 77 | default: return 'unknown'; 78 | } 79 | } 80 | 81 | /** 82 | * Returns how big (in bytes) this number would be as a varint 83 | */ 84 | public static function size_varint($i) { 85 | /* $len = 0; 86 | do { 87 | $i = $i >> 7; 88 | $len++; 89 | } while ($i != 0); 90 | return $len; 91 | */ 92 | // TODO Change to a binary search 93 | if ($i < 0x80) 94 | return 1; 95 | if ($i < 0x4000) 96 | return 2; 97 | if ($i < 0x200000) 98 | return 3; 99 | if ($i < 0x10000000) 100 | return 4; 101 | if ($i < 0x800000000) 102 | return 5; 103 | if ($i < 0x40000000000) 104 | return 6; 105 | if ($i < 0x2000000000000) 106 | return 7; 107 | if ($i < 0x100000000000000) 108 | return 8; 109 | if ($i < 0x8000000000000000) 110 | return 9; 111 | } 112 | 113 | /** 114 | * Tries to read a varint from $fp. 115 | * @returns the Varint from the stream, or false if the stream has reached eof. 116 | */ 117 | public static function read_varint($fp, &$limit = null) { 118 | $value = ''; 119 | $len = 0; 120 | do { // Keep reading until we find the last byte 121 | $b = fread($fp, 1); 122 | if ($b === false) 123 | throw new Exception("read_varint(): Error reading byte"); 124 | if (strlen($b) < 1) 125 | break; 126 | 127 | $value .= $b; 128 | $len++; 129 | } while ($b >= "\x80"); 130 | 131 | if ($len == 0) { 132 | if (feof($fp)) 133 | return false; 134 | throw new Exception("read_varint(): Error reading byte"); 135 | } 136 | 137 | if ($limit !== null) 138 | $limit -= $len; 139 | 140 | $i = 0.0; 141 | for ($j = $len-1; $j >= 0; $j--)$i = $i * 128 + (ord($value[$j]) & 0x7F); 142 | 143 | return $i; 144 | } 145 | 146 | public static function read_double($fp){throw "I've not coded it yet Exception";} 147 | public static function read_float ($fp){throw "I've not coded it yet Exception";} 148 | public static function read_uint64($fp){throw "I've not coded it yet Exception";} 149 | public static function read_int64 ($fp){throw "I've not coded it yet Exception";} 150 | public static function read_uint32($fp){throw "I've not coded it yet Exception";} 151 | public static function read_int32 ($fp){throw "I've not coded it yet Exception";} 152 | public static function read_zint32($fp){throw "I've not coded it yet Exception";} 153 | public static function read_zint64($fp){throw "I've not coded it yet Exception";} 154 | 155 | /** 156 | * Writes a varint to $fp 157 | * returns the number of bytes written 158 | * @param $fp 159 | * @param $i The int to encode 160 | * @return The number of bytes written 161 | */ 162 | public static function write_varint($fp, $i) { 163 | $len = 0; 164 | do { 165 | $v = $i & 0x7F; 166 | $i = $i >> 7; 167 | 168 | if ($i != 0) 169 | $v |= 0x80; 170 | 171 | if (fwrite($fp, chr($v)) !== 1) 172 | throw new Exception("write_varint(): Error writing byte"); 173 | 174 | $len++; 175 | } while ($i != 0); 176 | 177 | return $len; 178 | } 179 | 180 | public static function write_double($fp, $d){throw "I've not coded it yet Exception";} 181 | public static function write_float ($fp, $f){throw "I've not coded it yet Exception";} 182 | public static function write_uint64($fp, $i){throw "I've not coded it yet Exception";} 183 | public static function write_int64 ($fp, $i){throw "I've not coded it yet Exception";} 184 | public static function write_uint32($fp, $i){throw "I've not coded it yet Exception";} 185 | public static function write_int32 ($fp, $i){throw "I've not coded it yet Exception";} 186 | public static function write_zint32($fp, $i){throw "I've not coded it yet Exception";} 187 | public static function write_zint64($fp, $i){throw "I've not coded it yet Exception";} 188 | 189 | /** 190 | * Seek past a varint 191 | */ 192 | public static function skip_varint($fp) { 193 | $len = 0; 194 | do { // Keep reading until we find the last byte 195 | $b = fread($fp, 1); 196 | if ($b === false) 197 | throw new Exception("skip(varint): Error reading byte"); 198 | $len++; 199 | } while ($b >= "\x80"); 200 | return $len; 201 | } 202 | 203 | /** 204 | * Seek past the current field 205 | */ 206 | public static function skip_field($fp, $wire_type) { 207 | switch ($wire_type) { 208 | case 0: // varint 209 | return Protobuf::skip_varint($fp); 210 | 211 | case 1: // 64bit 212 | if (fseek($fp, 8, SEEK_CUR) === -1) 213 | throw new Exception('skip(' . ProtoBuf::get_wiretype(1) . '): Error seeking'); 214 | return 8; 215 | 216 | case 2: // length delimited 217 | $varlen = 0; 218 | $len = Protobuf::read_varint($fp, $varlen); 219 | if (fseek($fp, $len, SEEK_CUR) === -1) 220 | throw new Exception('skip(' . ProtoBuf::get_wiretype(2) . '): Error seeking'); 221 | return $len - $varlen; 222 | 223 | //case 3: // Start group TODO we must keep looping until we find the closing end grou 224 | 225 | //case 4: // End group - We should never skip a end group! 226 | // return 0; // Do nothing 227 | 228 | case 5: // 32bit 229 | if (fseek($fp, 4, SEEK_CUR) === -1) 230 | throw new Exception('skip('. ProtoBuf::get_wiretype(5) . '): Error seeking'); 231 | return 4; 232 | 233 | default: 234 | throw new Exception('skip('. ProtoBuf::get_wiretype($wire_type) . '): Unsupported wire_type'); 235 | } 236 | } 237 | 238 | /** 239 | * Read a unknown field from the stream and return its raw bytes 240 | */ 241 | public static function read_field($fp, $wire_type, &$limit = null) { 242 | switch ($wire_type) { 243 | case 0: // varint 244 | return Protobuf::read_varint($fp, $limit); 245 | 246 | case 1: // 64bit 247 | $limit -= 8; 248 | return fread($fp, 8); 249 | 250 | case 2: // length delimited 251 | $len = Protobuf::read_varint($fp, $limit); 252 | $limit -= $len; 253 | if ($len <= 0) { 254 | return false; 255 | } 256 | return fread($fp, $len); 257 | //case 3: // Start group TODO we must keep looping until we find the closing end grou 258 | case (3||4||7): 259 | return null; 260 | 261 | //case 4: // End group - We should never skip a end group! 262 | // return 0; // Do nothing 263 | 264 | case 5: // 32bit 265 | $limit -= 4; 266 | return fread($fp, 4); 267 | 268 | default: 269 | throw new Exception('read_unknown('. ProtoBuf::get_wiretype($wire_type) . '): Unsupported wire_type'); 270 | } 271 | } 272 | 273 | /** 274 | * Used to aid in pretty printing of Protobuf objects 275 | */ 276 | private static $print_depth = 0; 277 | private static $indent_char = "\t"; 278 | private static $print_limit = 50; 279 | 280 | public static function toString($key, $value) { 281 | if (is_null($value)) 282 | return; 283 | $ret = str_repeat(self::$indent_char, self::$print_depth) . "$key=>"; 284 | if (is_array($value)) { 285 | $ret .= "array(\n"; 286 | self::$print_depth++; 287 | foreach($value as $i => $v) 288 | $ret .= self::toString("[$i]", $v); 289 | self::$print_depth--; 290 | $ret .= str_repeat(self::$indent_char, self::$print_depth) . ")\n"; 291 | } else { 292 | if (is_object($value)) { 293 | self::$print_depth++; 294 | $ret .= get_class($value) . "(\n"; 295 | $ret .= $value->__toString() . "\n"; 296 | self::$print_depth--; 297 | $ret .= str_repeat(self::$indent_char, self::$print_depth) . ")\n"; 298 | } elseif (is_string($value)) { 299 | $safevalue = addcslashes($value, "\0..\37\177..\377"); 300 | if (strlen($safevalue) > self::$print_limit) { 301 | $safevalue = substr($safevalue, 0, self::$print_limit) . '...'; 302 | } 303 | 304 | $ret .= '"' . $safevalue . '" (' . strlen($value) . " bytes)\n"; 305 | 306 | } elseif (is_bool($value)) { 307 | $ret .= ($value ? 'true' : 'false') . "\n"; 308 | } else { 309 | $ret .= (string)$value . "\n"; 310 | } 311 | } 312 | return $ret; 313 | } 314 | } 315 | ?> 316 | -------------------------------------------------------------------------------- /src/MarketSession.php: -------------------------------------------------------------------------------- 1 | 10 | * 11 | * 12 | */ 13 | class MarketSession { 14 | public $context = NULL; 15 | public $authSubToken = ""; 16 | 17 | /** 18 | * 19 | */ 20 | function __construct () { 21 | $this->context = new RequestContext(); 22 | $this->context->setUnknown1(0); 23 | $this->context->setVersion(8013013); 24 | $this->context->setDeviceAndSdkVersion("crespo:8"); 25 | 26 | $this->context->setUserLanguage("en"); 27 | $this->context->setUserCountry("US"); 28 | 29 | $this->setOperatorTmobile(); 30 | } 31 | 32 | function setOperatorTmobile() { 33 | $this->setOperator("T-Mobile", "310260"); 34 | } 35 | 36 | public function setOperatorSFR() { 37 | $this->setOperator("F SFR", "20810"); 38 | } 39 | 40 | public function setOperatorO2() { 41 | $this->setOperator("o2 - de", "26207"); 42 | } 43 | 44 | public function setOperatorSimyo() { 45 | $this->setOperator("E-Plus", "simyo", "26203", "26203"); 46 | } 47 | 48 | public function setOperatorSunrise() { 49 | $this->setOperator("sunrise", "22802"); 50 | } 51 | 52 | public function setOperator($alpha, $simAlpha, $numeric = "", $simNumeric = "") { 53 | if (!$numeric && !$simNumeric) { 54 | $this->context->setOperatorAlpha($alpha); 55 | $this->context->setSimOperatorAlpha($alpha); 56 | 57 | $this->context->setOperatorNumeric($simAlpha); 58 | $this->context->setSimOperatorNumeric($simAlpha); 59 | 60 | } else { 61 | $this->context->setOperatorAlpha($alpha); 62 | $this->context->setSimOperatorAlpha($simAlpha); 63 | 64 | $this->context->setOperatorNumeric($numeric); 65 | $this->context->setSimOperatorNumeric($simNumeric); 66 | } 67 | } 68 | 69 | /** 70 | * 71 | * @param unknown_type $email 72 | * @param unknown_type $password 73 | */ 74 | public function login($email, $password) { 75 | $postFields = array( 76 | "Email" => $email, 77 | "Passwd" => $password, 78 | "service" => "android", 79 | "accountType" => "GOOGLE", 80 | ); 81 | $post = ""; 82 | foreach ($postFields as $field => $val) { 83 | $post .= $field."=".urlencode($val)."&"; 84 | } 85 | 86 | // create a new cURL resource 87 | $ch = curl_init(); 88 | curl_setopt($ch, CURLOPT_URL, "https://www.google.com/accounts/ClientLogin"); 89 | curl_setopt($ch, CURLOPT_HEADER, 0); 90 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 91 | curl_setopt($ch, CURLOPT_POST, 1); 92 | curl_setopt($ch, CURLOPT_POSTFIELDS, $post); 93 | @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 94 | curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); 95 | 96 | 97 | $headers = array( 98 | //"User-Agent: Android-Market/2 (sapphire PLAT-RC33); gzip", 99 | //"Content-Type: application/x-www-form-urlencoded", 100 | //"Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", 101 | 102 | //New Headers - Old Ones Commented Out for Refernce 103 | "User-Agent: Android-Finsky/3.7.13 (api=3,versionCode=8013013,sdk=15,device=crespo,hardware=herring,product=soju)", 104 | "Content-Type: application/x-www-form-urlencoded", 105 | "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", 106 | ); 107 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 108 | 109 | $ret = curl_exec($ch); 110 | curl_close($ch); 111 | 112 | $aRet = explode("\n", $ret); 113 | foreach ($aRet as $line) { 114 | if (substr($line,0,5) == "Auth=") { 115 | $this->authSubToken = substr($line,5); 116 | $this->context->setAuthSubToken($this->authSubToken); 117 | return $this->authSubToken; 118 | } 119 | } 120 | 121 | return false; 122 | } 123 | 124 | /** 125 | * 126 | * @param integer $id Android Device ID 127 | */ 128 | public function setAndroidId($deviceId) { 129 | $this->context->setAndroidId($deviceId); 130 | } 131 | 132 | /** 133 | * Validate all settings needed to make a request 134 | */ 135 | public function validate() { 136 | return true; 137 | 138 | //Check login 139 | /* 140 | if (!$this->context->hasAuthSubToken) return false; 141 | 142 | //Check androidId 143 | if (!$this->context->hasAndroidId) return false; 144 | 145 | return true; 146 | */ 147 | } 148 | 149 | /** 150 | * 151 | * @param unknown_type $requestGroup 152 | */ 153 | public function execute($requestGroup) { 154 | $request = new Request(); 155 | $request->setContext($this->context); 156 | $request->addRequestGroup($requestGroup); 157 | 158 | return $this->executeProtobuf($request); 159 | } 160 | 161 | /** 162 | * 163 | * @param Request $request 164 | * @return Response 165 | */ 166 | public function executeProtobuf($request) { 167 | if (!$this->validate()) { 168 | throw new Exception("Missing authentication or Android ID"); 169 | } 170 | 171 | $http = $this->executeRawHttpQuery($this->protobufToStr($request)); 172 | 173 | $fp = fopen("php://memory", "w+b"); 174 | fwrite($fp, $http, strlen($http)); 175 | rewind($fp); 176 | $response = new Response(); 177 | $response->read($fp); 178 | 179 | return $response; 180 | } 181 | 182 | private function protobufToStr($protoBuf) { 183 | $fp = fopen("php://memory", "w+b"); 184 | $protoBuf->write($fp); 185 | 186 | rewind($fp); 187 | $str = ''; 188 | while (!feof($fp)) { 189 | $str .= fread($fp, 8192); 190 | } 191 | 192 | return $str; 193 | } 194 | 195 | /** 196 | * 197 | * @param unknown_type $request 198 | */ 199 | private function executeRawHttpQuery($request) { 200 | $ch = curl_init(); 201 | curl_setopt($ch, CURLOPT_URL, "https://android.clients.google.com/market/api/ApiRequest"); 202 | curl_setopt($ch, CURLOPT_HEADER, 0); 203 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 204 | curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); 205 | curl_setopt($ch, CURLOPT_POST, 1); 206 | curl_setopt($ch, CURLOPT_COOKIE, "ANDROID=".$this->authSubToken); 207 | //curl_setopt($ch, CURLOPT_USERAGENT, "Android-Market/2 (sapphire PLAT-RC33); gzip"); 208 | curl_setopt($ch, CURLOPT_USERAGENT, "Android-Finsky/3.7.13 (api=3,versionCode=8013013,sdk=15,device=crespo,hardware=herring,product=soju)"); 209 | 210 | $post = "version=2&request=".base64_encode($request); 211 | curl_setopt($ch, CURLOPT_POSTFIELDS, $post); 212 | 213 | $headers = array( 214 | "Content-Type: application/x-www-form-urlencoded", 215 | "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", 216 | 'Content-Length: '.strlen($post) 217 | ); 218 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 219 | 220 | $ret = curl_exec($ch); 221 | 222 | $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); 223 | if ($http_code != 200) { 224 | throw new Exception("HTTP request returned code $http_code"); 225 | } 226 | 227 | curl_close($ch); 228 | 229 | $ret = $this->gzdecode($ret); 230 | return $ret; 231 | } 232 | 233 | /** 234 | * Borrowed from http://php.net/manual/en/function.gzdecode.php until a better solution is found 235 | * 236 | * Written by katzlbtjunk at hotmail dot com 237 | */ 238 | private function gzdecode($data,&$filename='',&$error='',$maxlength=null){ 239 | $len = strlen($data); 240 | if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { 241 | $error = "Not in GZIP format."; 242 | return null; // Not GZIP format (See RFC 1952) 243 | } 244 | $method = ord(substr($data,2,1)); // Compression method 245 | $flags = ord(substr($data,3,1)); // Flags 246 | if ($flags & 31 != $flags) { 247 | $error = "Reserved bits not allowed."; 248 | return null; 249 | } 250 | // NOTE: $mtime may be negative (PHP integer limitations) 251 | $mtime = unpack("V", substr($data,4,4)); 252 | $mtime = $mtime[1]; 253 | $xfl = substr($data,8,1); 254 | $os = substr($data,8,1); 255 | $headerlen = 10; 256 | $extralen = 0; 257 | $extra = ""; 258 | if ($flags & 4) { 259 | // 2-byte length prefixed EXTRA data in header 260 | if ($len - $headerlen - 2 < 8) { 261 | return false; // invalid 262 | } 263 | $extralen = unpack("v",substr($data,8,2)); 264 | $extralen = $extralen[1]; 265 | if ($len - $headerlen - 2 - $extralen < 8) { 266 | return false; // invalid 267 | } 268 | $extra = substr($data,10,$extralen); 269 | $headerlen += 2 + $extralen; 270 | } 271 | $filenamelen = 0; 272 | $filename = ""; 273 | if ($flags & 8) { 274 | // C-style string 275 | if ($len - $headerlen - 1 < 8) { 276 | return false; // invalid 277 | } 278 | $filenamelen = strpos(substr($data,$headerlen),chr(0)); 279 | if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { 280 | return false; // invalid 281 | } 282 | $filename = substr($data,$headerlen,$filenamelen); 283 | $headerlen += $filenamelen + 1; 284 | } 285 | $commentlen = 0; 286 | $comment = ""; 287 | if ($flags & 16) { 288 | // C-style string COMMENT data in header 289 | if ($len - $headerlen - 1 < 8) { 290 | return false; // invalid 291 | } 292 | $commentlen = strpos(substr($data,$headerlen),chr(0)); 293 | if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { 294 | return false; // Invalid header format 295 | } 296 | $comment = substr($data,$headerlen,$commentlen); 297 | $headerlen += $commentlen + 1; 298 | } 299 | $headercrc = ""; 300 | if ($flags & 2) { 301 | // 2-bytes (lowest order) of CRC32 on header present 302 | if ($len - $headerlen - 2 < 8) { 303 | return false; // invalid 304 | } 305 | $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; 306 | $headercrc = unpack("v", substr($data,$headerlen,2)); 307 | $headercrc = $headercrc[1]; 308 | if ($headercrc != $calccrc) { 309 | $error = "Header checksum failed."; 310 | return false; // Bad header CRC 311 | } 312 | $headerlen += 2; 313 | } 314 | // GZIP FOOTER 315 | $datacrc = unpack("V",substr($data,-8,4)); 316 | $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF); 317 | $isize = unpack("V",substr($data,-4)); 318 | $isize = $isize[1]; 319 | // decompression: 320 | $bodylen = $len-$headerlen-8; 321 | if ($bodylen < 1) { 322 | // IMPLEMENTATION BUG! 323 | return null; 324 | } 325 | $body = substr($data,$headerlen,$bodylen); 326 | $data = ""; 327 | if ($bodylen > 0) { 328 | switch ($method) { 329 | case 8: 330 | // Currently the only supported compression method: 331 | $data = gzinflate($body,$maxlength); 332 | break; 333 | default: 334 | $error = "Unknown compression method."; 335 | return false; 336 | } 337 | } // zero-byte body content is allowed 338 | // Verifiy CRC32 339 | $crc = sprintf("%u",crc32($data)); 340 | $crcOK = $crc == $datacrc; 341 | $lenOK = $isize == strlen($data); 342 | if (!$lenOK || !$crcOK) { 343 | $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.'); 344 | return false; 345 | } 346 | return $data; 347 | } 348 | } 349 | ?> 350 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | PHP Implementation of JAVA Android API for pulling data from 294 | Android Marketplace 295 | Copyright (C) 2014 Onder Vincent Koc & Niklas Nilsson 296 | 297 | This program is free software; you can redistribute it and/or modify 298 | it under the terms of the GNU General Public License as published by 299 | the Free Software Foundation; either version 2 of the License, or 300 | (at your option) any later version. 301 | 302 | This program is distributed in the hope that it will be useful, 303 | but WITHOUT ANY WARRANTY; without even the implied warranty of 304 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 305 | GNU General Public License for more details. 306 | 307 | You should have received a copy of the GNU General Public License along 308 | with this program; if not, write to the Free Software Foundation, Inc., 309 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) year name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | {signature of Ty Coon}, 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Lesser General 340 | Public License instead of this License. 341 | -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/strutil.h: -------------------------------------------------------------------------------- 1 | // Protocol Buffers - Google's data interchange format 2 | // Copyright 2008 Google Inc. All rights reserved. 3 | // http://code.google.com/p/protobuf/ 4 | // 5 | // Redistribution and use in source and binary forms, with or without 6 | // modification, are permitted provided that the following conditions are 7 | // met: 8 | // 9 | // * Redistributions of source code must retain the above copyright 10 | // notice, this list of conditions and the following disclaimer. 11 | // * Redistributions in binary form must reproduce the above 12 | // copyright notice, this list of conditions and the following disclaimer 13 | // in the documentation and/or other materials provided with the 14 | // distribution. 15 | // * Neither the name of Google Inc. nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | // from google3/strings/strutil.h 32 | 33 | #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34 | #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 | 36 | #include 37 | #include 38 | #include 39 | 40 | namespace google { 41 | namespace protobuf { 42 | 43 | #ifdef _MSC_VER 44 | #define strtoll _strtoi64 45 | #define strtoull _strtoui64 46 | #elif defined(__DECCXX) && defined(__osf__) 47 | // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 48 | #define strtoll strtol 49 | #define strtoull strtoul 50 | #endif 51 | 52 | // ---------------------------------------------------------------------- 53 | // ascii_isalnum() 54 | // Check if an ASCII character is alphanumeric. We can't use ctype's 55 | // isalnum() because it is affected by locale. This function is applied 56 | // to identifiers in the protocol buffer language, not to natural-language 57 | // strings, so locale should not be taken into account. 58 | // ascii_isdigit() 59 | // Like above, but only accepts digits. 60 | // ---------------------------------------------------------------------- 61 | 62 | inline bool ascii_isalnum(char c) { 63 | return ('a' <= c && c <= 'z') || 64 | ('A' <= c && c <= 'Z') || 65 | ('0' <= c && c <= '9'); 66 | } 67 | 68 | inline bool ascii_isdigit(char c) { 69 | return ('0' <= c && c <= '9'); 70 | } 71 | 72 | // ---------------------------------------------------------------------- 73 | // HasPrefixString() 74 | // Check if a string begins with a given prefix. 75 | // StripPrefixString() 76 | // Given a string and a putative prefix, returns the string minus the 77 | // prefix string if the prefix matches, otherwise the original 78 | // string. 79 | // ---------------------------------------------------------------------- 80 | inline bool HasPrefixString(const string& str, 81 | const string& prefix) { 82 | return str.size() >= prefix.size() && 83 | str.compare(0, prefix.size(), prefix) == 0; 84 | } 85 | 86 | inline string StripPrefixString(const string& str, const string& prefix) { 87 | if (HasPrefixString(str, prefix)) { 88 | return str.substr(prefix.size()); 89 | } else { 90 | return str; 91 | } 92 | } 93 | 94 | // ---------------------------------------------------------------------- 95 | // HasSuffixString() 96 | // Return true if str ends in suffix. 97 | // StripSuffixString() 98 | // Given a string and a putative suffix, returns the string minus the 99 | // suffix string if the suffix matches, otherwise the original 100 | // string. 101 | // ---------------------------------------------------------------------- 102 | inline bool HasSuffixString(const string& str, 103 | const string& suffix) { 104 | return str.size() >= suffix.size() && 105 | str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 106 | } 107 | 108 | inline string StripSuffixString(const string& str, const string& suffix) { 109 | if (HasSuffixString(str, suffix)) { 110 | return str.substr(0, str.size() - suffix.size()); 111 | } else { 112 | return str; 113 | } 114 | } 115 | 116 | // ---------------------------------------------------------------------- 117 | // StripString 118 | // Replaces any occurrence of the character 'remove' (or the characters 119 | // in 'remove') with the character 'replacewith'. 120 | // Good for keeping html characters or protocol characters (\t) out 121 | // of places where they might cause a problem. 122 | // ---------------------------------------------------------------------- 123 | LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 124 | char replacewith); 125 | 126 | // ---------------------------------------------------------------------- 127 | // LowerString() 128 | // UpperString() 129 | // Convert the characters in "s" to lowercase or uppercase. ASCII-only: 130 | // these functions intentionally ignore locale because they are applied to 131 | // identifiers used in the Protocol Buffer language, not to natural-language 132 | // strings. 133 | // ---------------------------------------------------------------------- 134 | 135 | inline void LowerString(string * s) { 136 | string::iterator end = s->end(); 137 | for (string::iterator i = s->begin(); i != end; ++i) { 138 | // tolower() changes based on locale. We don't want this! 139 | if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 140 | } 141 | } 142 | 143 | inline void UpperString(string * s) { 144 | string::iterator end = s->end(); 145 | for (string::iterator i = s->begin(); i != end; ++i) { 146 | // toupper() changes based on locale. We don't want this! 147 | if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 148 | } 149 | } 150 | 151 | // ---------------------------------------------------------------------- 152 | // StringReplace() 153 | // Give me a string and two patterns "old" and "new", and I replace 154 | // the first instance of "old" in the string with "new", if it 155 | // exists. RETURN a new string, regardless of whether the replacement 156 | // happened or not. 157 | // ---------------------------------------------------------------------- 158 | 159 | LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 160 | const string& newsub, bool replace_all); 161 | 162 | // ---------------------------------------------------------------------- 163 | // SplitStringUsing() 164 | // Split a string using a character delimiter. Append the components 165 | // to 'result'. If there are consecutive delimiters, this function skips 166 | // over all of them. 167 | // ---------------------------------------------------------------------- 168 | LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 169 | vector* res); 170 | 171 | // ---------------------------------------------------------------------- 172 | // JoinStrings() 173 | // These methods concatenate a vector of strings into a C++ string, using 174 | // the C-string "delim" as a separator between components. There are two 175 | // flavors of the function, one flavor returns the concatenated string, 176 | // another takes a pointer to the target string. In the latter case the 177 | // target string is cleared and overwritten. 178 | // ---------------------------------------------------------------------- 179 | LIBPROTOBUF_EXPORT void JoinStrings(const vector& components, 180 | const char* delim, string* result); 181 | 182 | inline string JoinStrings(const vector& components, 183 | const char* delim) { 184 | string result; 185 | JoinStrings(components, delim, &result); 186 | return result; 187 | } 188 | 189 | // ---------------------------------------------------------------------- 190 | // UnescapeCEscapeSequences() 191 | // Copies "source" to "dest", rewriting C-style escape sequences 192 | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 193 | // equivalents. "dest" must be sufficiently large to hold all 194 | // the characters in the rewritten string (i.e. at least as large 195 | // as strlen(source) + 1 should be safe, since the replacements 196 | // are always shorter than the original escaped sequences). It's 197 | // safe for source and dest to be the same. RETURNS the length 198 | // of dest. 199 | // 200 | // It allows hex sequences \xhh, or generally \xhhhhh with an 201 | // arbitrary number of hex digits, but all of them together must 202 | // specify a value of a single byte (e.g. \x0045 is equivalent 203 | // to \x45, and \x1234 is erroneous). 204 | // 205 | // It also allows escape sequences of the form \uhhhh (exactly four 206 | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 207 | // hex digits, upper or lower case) to specify a Unicode code 208 | // point. The dest array will contain the UTF8-encoded version of 209 | // that code-point (e.g., if source contains \u2019, then dest will 210 | // contain the three bytes 0xE2, 0x80, and 0x99). For the inverse 211 | // transformation, use UniLib::UTF8EscapeString 212 | // (util/utf8/unilib.h), not CEscapeString. 213 | // 214 | // Errors: In the first form of the call, errors are reported with 215 | // LOG(ERROR). The same is true for the second form of the call if 216 | // the pointer to the string vector is NULL; otherwise, error 217 | // messages are stored in the vector. In either case, the effect on 218 | // the dest array is not defined, but rest of the source will be 219 | // processed. 220 | // ---------------------------------------------------------------------- 221 | 222 | LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 223 | LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 224 | vector *errors); 225 | 226 | // ---------------------------------------------------------------------- 227 | // UnescapeCEscapeString() 228 | // This does the same thing as UnescapeCEscapeSequences, but creates 229 | // a new string. The caller does not need to worry about allocating 230 | // a dest buffer. This should be used for non performance critical 231 | // tasks such as printing debug messages. It is safe for src and dest 232 | // to be the same. 233 | // 234 | // The second call stores its errors in a supplied string vector. 235 | // If the string vector pointer is NULL, it reports the errors with LOG(). 236 | // 237 | // In the first and second calls, the length of dest is returned. In the 238 | // the third call, the new string is returned. 239 | // ---------------------------------------------------------------------- 240 | 241 | LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 242 | LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 243 | vector *errors); 244 | LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 245 | 246 | // ---------------------------------------------------------------------- 247 | // CEscapeString() 248 | // Copies 'src' to 'dest', escaping dangerous characters using 249 | // C-style escape sequences. This is very useful for preparing query 250 | // flags. 'src' and 'dest' should not overlap. 251 | // Returns the number of bytes written to 'dest' (not including the \0) 252 | // or -1 if there was insufficient space. 253 | // 254 | // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 255 | // ---------------------------------------------------------------------- 256 | LIBPROTOBUF_EXPORT int CEscapeString(const char* src, int src_len, 257 | char* dest, int dest_len); 258 | 259 | // ---------------------------------------------------------------------- 260 | // CEscape() 261 | // More convenient form of CEscapeString: returns result as a "string". 262 | // This version is slower than CEscapeString() because it does more 263 | // allocation. However, it is much more convenient to use in 264 | // non-speed-critical code like logging messages etc. 265 | // ---------------------------------------------------------------------- 266 | LIBPROTOBUF_EXPORT string CEscape(const string& src); 267 | 268 | namespace strings { 269 | // Like CEscape() but does not escape bytes with the upper bit set. 270 | LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 271 | 272 | // Like CEscape() but uses hex (\x) escapes instead of octals. 273 | LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 274 | } // namespace strings 275 | 276 | // ---------------------------------------------------------------------- 277 | // strto32() 278 | // strtou32() 279 | // strto64() 280 | // strtou64() 281 | // Architecture-neutral plug compatible replacements for strtol() and 282 | // strtoul(). Long's have different lengths on ILP-32 and LP-64 283 | // platforms, so using these is safer, from the point of view of 284 | // overflow behavior, than using the standard libc functions. 285 | // ---------------------------------------------------------------------- 286 | LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 287 | int base); 288 | LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 289 | int base); 290 | 291 | inline int32 strto32(const char *nptr, char **endptr, int base) { 292 | if (sizeof(int32) == sizeof(long)) 293 | return strtol(nptr, endptr, base); 294 | else 295 | return strto32_adaptor(nptr, endptr, base); 296 | } 297 | 298 | inline uint32 strtou32(const char *nptr, char **endptr, int base) { 299 | if (sizeof(uint32) == sizeof(unsigned long)) 300 | return strtoul(nptr, endptr, base); 301 | else 302 | return strtou32_adaptor(nptr, endptr, base); 303 | } 304 | 305 | // For now, long long is 64-bit on all the platforms we care about, so these 306 | // functions can simply pass the call to strto[u]ll. 307 | inline int64 strto64(const char *nptr, char **endptr, int base) { 308 | GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 309 | sizeof_int64_is_not_sizeof_long_long); 310 | return strtoll(nptr, endptr, base); 311 | } 312 | 313 | inline uint64 strtou64(const char *nptr, char **endptr, int base) { 314 | GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 315 | sizeof_uint64_is_not_sizeof_long_long); 316 | return strtoull(nptr, endptr, base); 317 | } 318 | 319 | // ---------------------------------------------------------------------- 320 | // FastIntToBuffer() 321 | // FastHexToBuffer() 322 | // FastHex64ToBuffer() 323 | // FastHex32ToBuffer() 324 | // FastTimeToBuffer() 325 | // These are intended for speed. FastIntToBuffer() assumes the 326 | // integer is non-negative. FastHexToBuffer() puts output in 327 | // hex rather than decimal. FastTimeToBuffer() puts the output 328 | // into RFC822 format. 329 | // 330 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 331 | // padded to exactly 16 bytes (plus one byte for '\0') 332 | // 333 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 334 | // padded to exactly 8 bytes (plus one byte for '\0') 335 | // 336 | // All functions take the output buffer as an arg. 337 | // They all return a pointer to the beginning of the output, 338 | // which may not be the beginning of the input buffer. 339 | // ---------------------------------------------------------------------- 340 | 341 | // Suggested buffer size for FastToBuffer functions. Also works with 342 | // DoubleToBuffer() and FloatToBuffer(). 343 | static const int kFastToBufferSize = 32; 344 | 345 | LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 346 | LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 347 | char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 348 | char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 349 | LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 350 | LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 351 | LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 352 | 353 | // at least 22 bytes long 354 | inline char* FastIntToBuffer(int i, char* buffer) { 355 | return (sizeof(i) == 4 ? 356 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 357 | } 358 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 359 | return (sizeof(i) == 4 ? 360 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 361 | } 362 | inline char* FastLongToBuffer(long i, char* buffer) { 363 | return (sizeof(i) == 4 ? 364 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 365 | } 366 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { 367 | return (sizeof(i) == 4 ? 368 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 369 | } 370 | 371 | // ---------------------------------------------------------------------- 372 | // FastInt32ToBufferLeft() 373 | // FastUInt32ToBufferLeft() 374 | // FastInt64ToBufferLeft() 375 | // FastUInt64ToBufferLeft() 376 | // 377 | // Like the Fast*ToBuffer() functions above, these are intended for speed. 378 | // Unlike the Fast*ToBuffer() functions, however, these functions write 379 | // their output to the beginning of the buffer (hence the name, as the 380 | // output is left-aligned). The caller is responsible for ensuring that 381 | // the buffer has enough space to hold the output. 382 | // 383 | // Returns a pointer to the end of the string (i.e. the null character 384 | // terminating the string). 385 | // ---------------------------------------------------------------------- 386 | 387 | LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 388 | LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 389 | LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 390 | LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 391 | 392 | // Just define these in terms of the above. 393 | inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 394 | FastUInt32ToBufferLeft(i, buffer); 395 | return buffer; 396 | } 397 | inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 398 | FastUInt64ToBufferLeft(i, buffer); 399 | return buffer; 400 | } 401 | 402 | // ---------------------------------------------------------------------- 403 | // SimpleItoa() 404 | // Description: converts an integer to a string. 405 | // 406 | // Return value: string 407 | // ---------------------------------------------------------------------- 408 | LIBPROTOBUF_EXPORT string SimpleItoa(int i); 409 | LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 410 | LIBPROTOBUF_EXPORT string SimpleItoa(long i); 411 | LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 412 | LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 413 | LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 414 | 415 | // ---------------------------------------------------------------------- 416 | // SimpleDtoa() 417 | // SimpleFtoa() 418 | // DoubleToBuffer() 419 | // FloatToBuffer() 420 | // Description: converts a double or float to a string which, if 421 | // passed to NoLocaleStrtod(), will produce the exact same original double 422 | // (except in case of NaN; all NaNs are considered the same value). 423 | // We try to keep the string short but it's not guaranteed to be as 424 | // short as possible. 425 | // 426 | // DoubleToBuffer() and FloatToBuffer() write the text to the given 427 | // buffer and return it. The buffer must be at least 428 | // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 429 | // bytes for floats. kFastToBufferSize is also guaranteed to be large 430 | // enough to hold either. 431 | // 432 | // Return value: string 433 | // ---------------------------------------------------------------------- 434 | LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 435 | LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 436 | 437 | LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 438 | LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 439 | 440 | // In practice, doubles should never need more than 24 bytes and floats 441 | // should never need more than 14 (including null terminators), but we 442 | // overestimate to be safe. 443 | static const int kDoubleToBufferSize = 32; 444 | static const int kFloatToBufferSize = 24; 445 | 446 | // ---------------------------------------------------------------------- 447 | // NoLocaleStrtod() 448 | // Exactly like strtod(), except it always behaves as if in the "C" 449 | // locale (i.e. decimal points must be '.'s). 450 | // ---------------------------------------------------------------------- 451 | 452 | LIBPROTOBUF_EXPORT double NoLocaleStrtod(const char* text, char** endptr); 453 | 454 | } // namespace protobuf 455 | } // namespace google 456 | 457 | #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 458 | 459 | 460 | -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/protoc-gen-php.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * PHP Protocol Buffer Generator Plugin for protoc 3 | * By Andrew Brampton (c) 2010 4 | * 5 | * TODO 6 | * Support the packed option 7 | * Lots of optimisations 8 | * Extensions 9 | * Services 10 | * Packages 11 | * Better validation (add code to check setted values are valid) 12 | * option optimize_for = CODE_SIZE/SPEED; 13 | */ 14 | #include "strutil.h" // TODO This header is from the offical protobuf source, but it is not normally installed 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include // for sprintf 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | 34 | #include "php_options.pb.h" 35 | 36 | using namespace google::protobuf; 37 | using namespace google::protobuf::compiler; 38 | using namespace google::protobuf::internal; 39 | 40 | class PHPCodeGenerator : public CodeGenerator { 41 | private: 42 | 43 | void PrintMessage (io::Printer &printer, const Descriptor & message) const; 44 | void PrintMessages (io::Printer &printer, const FileDescriptor & file) const; 45 | 46 | void PrintEnum (io::Printer &printer, const EnumDescriptor & e) const; 47 | void PrintEnums (io::Printer &printer, const FileDescriptor & file) const; 48 | 49 | void PrintService (io::Printer &printer, const ServiceDescriptor & service) const; 50 | void PrintServices (io::Printer &printer, const FileDescriptor & file) const; 51 | 52 | string DefaultValueAsString(const FieldDescriptor & field, bool quote_string_type) const; 53 | 54 | // Print the read() method 55 | void PrintMessageRead(io::Printer &printer, const Descriptor & message, vector & required_fields, const FieldDescriptor * parentField) const; 56 | 57 | // Print the write() method 58 | void PrintMessageWrite(io::Printer &printer, const Descriptor & message, const FieldDescriptor * parentField) const; 59 | 60 | // Print the size() method 61 | void PrintMessageSize(io::Printer &printer, const Descriptor & message) const; 62 | 63 | // Maps names into PHP names 64 | template 65 | string ClassName(const DescriptorType & descriptor) const; 66 | 67 | string VariableName(const FieldDescriptor & field) const; 68 | 69 | public: 70 | 71 | PHPCodeGenerator(); 72 | ~PHPCodeGenerator(); 73 | 74 | bool Generate(const FileDescriptor* file, const string& parameter, OutputDirectory* output_directory, string* error) const; 75 | 76 | }; 77 | 78 | PHPCodeGenerator::PHPCodeGenerator() {} 79 | PHPCodeGenerator::~PHPCodeGenerator() {} 80 | 81 | string UnderscoresToCamelCaseImpl(const string& input, bool cap_next_letter) { 82 | string result; 83 | // Note: I distrust ctype.h due to locales. 84 | for (int i = 0; i < input.size(); i++) { 85 | if ('a' <= input[i] && input[i] <= 'z') { 86 | if (cap_next_letter) { 87 | result += input[i] + ('A' - 'a'); 88 | } else { 89 | result += input[i]; 90 | } 91 | cap_next_letter = false; 92 | } else if ('A' <= input[i] && input[i] <= 'Z') { 93 | if (i == 0 && !cap_next_letter) { 94 | // Force first letter to lower-case unless explicitly told to 95 | // capitalize it. 96 | result += input[i] + ('a' - 'A'); 97 | } else { 98 | // Capital letters after the first are left as-is. 99 | result += input[i]; 100 | } 101 | cap_next_letter = false; 102 | } else if ('0' <= input[i] && input[i] <= '9') { 103 | result += input[i]; 104 | cap_next_letter = true; 105 | } else { 106 | cap_next_letter = true; 107 | } 108 | } 109 | return result; 110 | } 111 | 112 | string UnderscoresToCamelCase(const FieldDescriptor & field) { 113 | return UnderscoresToCamelCaseImpl(field.name(), false); 114 | } 115 | 116 | string UnderscoresToCapitalizedCamelCase(const FieldDescriptor & field) { 117 | return UnderscoresToCamelCaseImpl(field.name(), true); 118 | } 119 | 120 | string LowerString(const string & s) { 121 | string newS (s); 122 | LowerString(&newS); 123 | return newS; 124 | } 125 | 126 | string UpperString(const string & s) { 127 | string newS (s); 128 | UpperString(&newS); 129 | return newS; 130 | } 131 | 132 | // Maps a Message full_name into a PHP name 133 | template 134 | string PHPCodeGenerator::ClassName(const DescriptorType & descriptor) const { 135 | string name (descriptor.full_name()); 136 | replace(name.begin(), name.end(), '.', '_'); 137 | return name; 138 | } 139 | 140 | string PHPCodeGenerator::VariableName(const FieldDescriptor & field) const { 141 | return UnderscoresToCamelCase(field) + '_'; 142 | } 143 | 144 | string PHPCodeGenerator::DefaultValueAsString(const FieldDescriptor & field, bool quote_string_type) const { 145 | switch (field.cpp_type()) { 146 | case FieldDescriptor::CPPTYPE_INT32: 147 | return SimpleItoa(field.default_value_int32()); 148 | 149 | case FieldDescriptor::CPPTYPE_INT64: 150 | return SimpleItoa(field.default_value_int64()); 151 | 152 | case FieldDescriptor::CPPTYPE_UINT32: 153 | return SimpleItoa(field.default_value_uint32()); 154 | 155 | case FieldDescriptor::CPPTYPE_UINT64: 156 | return SimpleItoa(field.default_value_uint64()); 157 | 158 | case FieldDescriptor::CPPTYPE_FLOAT: 159 | return SimpleFtoa(field.default_value_float()); 160 | 161 | case FieldDescriptor::CPPTYPE_DOUBLE: 162 | return SimpleDtoa(field.default_value_double()); 163 | 164 | case FieldDescriptor::CPPTYPE_BOOL: 165 | return field.default_value_bool() ? "true" : "false"; 166 | 167 | case FieldDescriptor::CPPTYPE_STRING: 168 | if (quote_string_type) 169 | return "\"" + CEscape(field.default_value_string()) + "\""; 170 | 171 | if (field.type() == FieldDescriptor::TYPE_BYTES) 172 | return CEscape(field.default_value_string()); 173 | 174 | return field.default_value_string(); 175 | 176 | case FieldDescriptor::CPPTYPE_ENUM: 177 | return ClassName(*field.enum_type()) + "::" + field.default_value_enum()->name(); 178 | 179 | case FieldDescriptor::CPPTYPE_MESSAGE: 180 | return "null"; 181 | 182 | } 183 | return ""; 184 | } 185 | 186 | void PHPCodeGenerator::PrintMessageRead(io::Printer &printer, const Descriptor & message, vector & required_fields, const FieldDescriptor * parentField) const { 187 | 188 | // Parse the file options 189 | const PHPFileOptions & options ( message.file()->options().GetExtension(php) ); 190 | bool skip_unknown = options.skip_unknown(); 191 | const char * pb_namespace = options.namespace_().empty() ? "" : "\\"; 192 | 193 | // Read 194 | printer.Print( 195 | "\n" 196 | "function read($fp, &$limit = PHP_INT_MAX) {\n" 197 | ); 198 | printer.Indent(); 199 | 200 | printer.Print("while(!feof($fp) && $limit > 0) {\n"); 201 | printer.Indent(); 202 | 203 | printer.Print( 204 | "$tag = `ns`Protobuf::read_varint($fp, $limit);\n" 205 | "if ($tag === false) break;\n" 206 | "$wire = $tag & 0x07;\n" 207 | "$field = $tag >> 3;\n" 208 | "//var_dump(\"`name`: Found $field type \" . `ns`Protobuf::get_wiretype($wire) . \" $limit bytes left\");\n" 209 | "switch($field) {\n", 210 | "name", ClassName(message), 211 | "ns", pb_namespace 212 | ); 213 | printer.Indent(); 214 | 215 | // If we are a group message, we need to add a end group case 216 | if (parentField && parentField->type() == FieldDescriptor::TYPE_GROUP) { 217 | printer.Print("case `index`:\n", "index", SimpleItoa(parentField->number()) ); 218 | printer.Print( " ASSERT('$wire == 4');\n" 219 | " break 2;\n"); 220 | } 221 | 222 | for (int i = 0; i < message.field_count(); ++i) { 223 | const FieldDescriptor &field ( *message.field(i) ); 224 | 225 | string var ( VariableName(field) ); 226 | if (field.is_repeated()) 227 | var += "[]"; 228 | if (field.is_packable()) 229 | throw "Error we do not yet support packed values"; 230 | if (field.is_required()) 231 | required_fields.push_back( &field ); 232 | 233 | string commands; 234 | 235 | switch (field.type()) { 236 | case FieldDescriptor::TYPE_DOUBLE: // double, exactly eight bytes on the wire 237 | commands = "ASSERT('$wire == 1');\n" 238 | "$tmp = `ns`Protobuf::read_double($fp);\n" 239 | "if ($tmp === false)\n" 240 | " throw new Exception('Protobuf::read_double returned false');\n" 241 | "$this->`var` = $tmp;\n" 242 | "$limit-=8;"; 243 | break; 244 | 245 | case FieldDescriptor::TYPE_FLOAT: // float, exactly four bytes on the wire. 246 | commands = "ASSERT('$wire == 5');\n" 247 | "$tmp = `ns`Protobuf::read_float($fp);\n" 248 | "if ($tmp === false)\n" 249 | " throw new Exception('Protobuf::read_float returned false');\n" 250 | "$this->`var` = $tmp;\n" 251 | "$limit-=4;"; 252 | break; 253 | 254 | case FieldDescriptor::TYPE_INT64: // int64, varint on the wire. 255 | case FieldDescriptor::TYPE_UINT64: // uint64, varint on the wire. 256 | case FieldDescriptor::TYPE_INT32: // int32, varint on the wire. 257 | case FieldDescriptor::TYPE_UINT32: // uint32, varint on the wire 258 | case FieldDescriptor::TYPE_ENUM: // Enum, varint on the wire 259 | commands = "ASSERT('$wire == 0');\n" 260 | "$tmp = `ns`Protobuf::read_varint($fp, $limit);\n" 261 | "if ($tmp === false)\n" 262 | " throw new Exception('Protobuf::read_varint returned false');\n" 263 | "$this->`var` = $tmp;\n"; 264 | break; 265 | 266 | case FieldDescriptor::TYPE_FIXED64: // uint64, exactly eight bytes on the wire. 267 | commands = "ASSERT('$wire == 1');\n" 268 | "$tmp = `ns`Protobuf::read_uint64($fp);\n" 269 | "if ($tmp === false)\n" 270 | " throw new Exception('Protobuf::read_unint64 returned false');\n" 271 | "$this->`var` = $tmp;\n" 272 | "$limit-=8;"; 273 | break; 274 | 275 | case FieldDescriptor::TYPE_SFIXED64: // int64, exactly eight bytes on the wire 276 | commands = "ASSERT('$wire == 1');\n" 277 | "$tmp = `ns`Protobuf::read_int64($fp);\n" 278 | "if ($tmp === false)\n" 279 | " throw new Exception('Protobuf::read_int64 returned false');\n" 280 | "$this->`var` = $tmp;\n" 281 | "$limit-=8;"; 282 | break; 283 | 284 | case FieldDescriptor::TYPE_FIXED32: // uint32, exactly four bytes on the wire. 285 | commands = "ASSERT('$wire == 5');\n" 286 | "$tmp = `ns`Protobuf::read_uint32($fp);\n" 287 | "if ($tmp === false)\n" 288 | " throw new Exception('Protobuf::read_uint32 returned false');\n" 289 | "$this->`var` = $tmp;\n" 290 | "$limit-=4;"; 291 | break; 292 | 293 | case FieldDescriptor::TYPE_SFIXED32: // int32, exactly four bytes on the wire 294 | commands = "ASSERT('$wire == 5');\n" 295 | "$tmp = `ns`Protobuf::read_int32($fp);\n" 296 | "if ($tmp === false)\n" 297 | " throw new Exception('Protobuf::read_int32 returned false');\n" 298 | "this->`var` = $tmp\n;" 299 | "$limit-=4;"; 300 | break; 301 | 302 | case FieldDescriptor::TYPE_BOOL: // bool, varint on the wire. 303 | commands = "ASSERT('$wire == 0');\n" 304 | "$tmp = `ns`Protobuf::read_varint($fp, $limit);\n" 305 | "if ($tmp === false)\n" 306 | " throw new Exception('Protobuf::read_varint returned false');\n" 307 | "$this->`var` = $tmp > 0 ? true : false;"; 308 | break; 309 | 310 | case FieldDescriptor::TYPE_STRING: // UTF-8 text. 311 | case FieldDescriptor::TYPE_BYTES: // Arbitrary byte array. 312 | commands = "ASSERT('$wire == 2');\n" 313 | "$len = `ns`Protobuf::read_varint($fp, $limit);\n" 314 | "if ($len === false)\n" 315 | " throw new Exception('Protobuf::read_varint returned false');\n" 316 | "if ($len > 0)\n" 317 | " $tmp = fread($fp, $len);\n" 318 | "else\n" 319 | " $tmp = '';\n" 320 | "if ($tmp === false)\n" 321 | " throw new Exception(\"fread($len) returned false\");\n" 322 | "$this->`var` = $tmp;\n" 323 | "$limit-=$len;"; 324 | break; 325 | 326 | case FieldDescriptor::TYPE_GROUP: {// Tag-delimited message. Deprecated. 327 | const Descriptor & d( *field.message_type() ); 328 | commands = "ASSERT('$wire == 3');\n" 329 | "$this->`var` = new " + ClassName(d) + "($fp, $limit);"; 330 | break; 331 | } 332 | 333 | case FieldDescriptor::TYPE_MESSAGE: {// Length-delimited message. 334 | const Descriptor & d( *field.message_type() ); 335 | commands = "ASSERT('$wire == 2');\n" 336 | "$len = `ns`Protobuf::read_varint($fp, $limit);\n" 337 | "if ($len === false)\n" 338 | " throw new Exception('Protobuf::read_varint returned false');\n" 339 | "$limit-=$len;\n" 340 | "$this->`var` = new " + ClassName(d) + "($fp, $len);\n" 341 | "ASSERT('$len == 0');"; 342 | break; 343 | } 344 | 345 | case FieldDescriptor::TYPE_SINT32: // int32, ZigZag-encoded varint on the wire 346 | commands = "ASSERT('$wire == 5');\n" 347 | "$tmp = `ns`Protobuf::read_zint32($fp);\n" 348 | "if ($tmp === false)\n" 349 | " throw new Exception('Protobuf::read_zint32 returned false');\n" 350 | "$this->`var` = $tmp;\n" 351 | "$limit-=4;"; 352 | break; 353 | 354 | case FieldDescriptor::TYPE_SINT64: // int64, ZigZag-encoded varint on the wire 355 | commands = "ASSERT('$wire == 1');\n" 356 | "$tmp = `ns`Protobuf::read_zint64($fp);\n" 357 | "if ($tmp === false)\n" 358 | " throw new Exception('Protobuf::read_zint64 returned false');\n" 359 | "$this->`var` = $tmp;\n" 360 | "$limit-=8;"; 361 | break; 362 | 363 | default: 364 | throw "Error: Unsupported type";// TODO use the proper exception 365 | } 366 | 367 | printer.Print("case `index`:\n", "index", SimpleItoa(field.number()) ); 368 | 369 | printer.Indent(); 370 | printer.Print(commands.c_str(), "var", var, "ns", pb_namespace); 371 | printer.Print("\nbreak;\n"); 372 | printer.Outdent(); 373 | } 374 | 375 | if (skip_unknown) { 376 | printer.Print( 377 | "default:\n" 378 | " $limit -= `ns`Protobuf::skip_field($fp, $wire);\n", 379 | "name", ClassName(message), 380 | "ns", pb_namespace 381 | ); 382 | } else { 383 | printer.Print( 384 | "default:\n" 385 | " $this->_unknown[$field . '-' . `ns`Protobuf::get_wiretype($wire)][] = `ns`Protobuf::read_field($fp, $wire, $limit);\n", 386 | "name", ClassName(message), 387 | "ns", pb_namespace 388 | ); 389 | } 390 | 391 | printer.Outdent(); 392 | printer.Outdent(); 393 | printer.Print( 394 | " }\n" // switch 395 | "}\n" // while 396 | ); 397 | 398 | printer.Outdent(); 399 | printer.Print( 400 | " if (!$this->validateRequired())\n" 401 | " throw new Exception('Required fields are missing');\n" 402 | "}\n" 403 | ); 404 | } 405 | 406 | /** 407 | * Turns a 32 bit number into a string suitable for PHP to print out. 408 | * For example, 0x12345678 would turn into "\x12\x34\x56\78". 409 | * @param tag 410 | * @return 411 | */ 412 | string arrayToPHPString(uint8 *a, size_t len) { 413 | 414 | assert(a != NULL); 415 | 416 | const int dest_length = len * 4 + 1; // Maximum possible expansion 417 | scoped_array dest(new char[dest_length]); 418 | 419 | char *p = dest.get(); 420 | 421 | while(len > 0) { 422 | uint8 c = *a++; 423 | if ((c >= 0 && c <= 31) || c >= 127 ) { 424 | p += sprintf(p, "\\x%02x", c); 425 | } else if (c == '"'){ 426 | *p++ = '\\'; 427 | *p++ = c; 428 | } else { 429 | *p++ = c; 430 | } 431 | 432 | len--; 433 | } 434 | 435 | *p = '\0'; // Null terminate us 436 | 437 | return string(dest.get()); 438 | } 439 | 440 | /** 441 | * Some notes 442 | * Tag 443 | * Field 444 | * Length 445 | * Group + 446 | * Embedded Message + 447 | * Start + (You have to know what type of Message it is, and it is not length prefixed) 448 | * 449 | * The Message class should not print its own length (this should be printed by the parent Message) 450 | * The Group class should only print its field, the parent should print the start/end tag 451 | * Otherwise the Message/Group will print everything of the fields. 452 | */ 453 | 454 | /** 455 | * Prints the write() method for this Message 456 | * @param printer 457 | * @param message 458 | * @param parentField 459 | */ 460 | void PHPCodeGenerator::PrintMessageWrite(io::Printer &printer, const Descriptor & message, const FieldDescriptor * parentField) const { 461 | 462 | // Parse the file options 463 | const PHPFileOptions & options ( message.file()->options().GetExtension(php) ); 464 | const char * pb_namespace = options.namespace_().empty() ? "" : "\\"; 465 | 466 | // Write 467 | printer.Print( 468 | "\n" 469 | "function write($fp) {\n" 470 | ); 471 | printer.Indent(); 472 | 473 | printer.Print( 474 | "if (!$this->validateRequired())\n" 475 | " throw new Exception('Required fields are missing');\n" 476 | ); 477 | 478 | for (int i = 0; i < message.field_count(); ++i) { 479 | const FieldDescriptor &field ( *message.field(i) ); 480 | 481 | if (field.is_packable()) 482 | throw "Error we do not yet support packed values"; 483 | 484 | // Create the tag 485 | uint8 tag[5]; 486 | uint8 *tmp; 487 | tmp = WireFormatLite::WriteTagToArray( 488 | field.number(), 489 | WireFormat::WireTypeForFieldType(field.type()), 490 | tag); 491 | int tagLen = tmp - tag; 492 | 493 | string commands; 494 | switch (field.type()) { 495 | case FieldDescriptor::TYPE_DOUBLE: // double, exactly eight bytes on the wire 496 | commands = "`ns`Protobuf::write_double($fp, `var`);\n"; 497 | break; 498 | 499 | case FieldDescriptor::TYPE_FLOAT: // float, exactly four bytes on the wire. 500 | commands = "`ns`Protobuf::write_float($fp, `var`);\n"; 501 | break; 502 | 503 | case FieldDescriptor::TYPE_INT64: // int64, varint on the wire. 504 | case FieldDescriptor::TYPE_UINT64: // uint64, varint on the wire. 505 | case FieldDescriptor::TYPE_INT32: // int32, varint on the wire. 506 | case FieldDescriptor::TYPE_UINT32: // uint32, varint on the wire 507 | case FieldDescriptor::TYPE_ENUM: // Enum, varint on the wire 508 | commands = "`ns`Protobuf::write_varint($fp, `var`);\n"; 509 | break; 510 | 511 | case FieldDescriptor::TYPE_FIXED64: // uint64, exactly eight bytes on the wire. 512 | commands = "`ns`Protobuf::write_uint64($fp, `var`);\n"; 513 | break; 514 | 515 | case FieldDescriptor::TYPE_SFIXED64: // int64, exactly eight bytes on the wire 516 | commands = "`ns`Protobuf::write_int64($fp, `var`);\n"; 517 | break; 518 | 519 | case FieldDescriptor::TYPE_FIXED32: // uint32, exactly four bytes on the wire. 520 | commands = "`ns`Protobuf::write_uint32($fp, `var`);\n"; 521 | break; 522 | 523 | case FieldDescriptor::TYPE_SFIXED32: // int32, exactly four bytes on the wire 524 | commands = "`ns`Protobuf::write_int32($fp, `var`);\n"; 525 | break; 526 | 527 | case FieldDescriptor::TYPE_BOOL: // bool, varint on the wire. 528 | commands = "`ns`Protobuf::write_varint($fp, `var` ? 1 : 0);\n"; 529 | break; 530 | 531 | case FieldDescriptor::TYPE_STRING: // UTF-8 text. 532 | case FieldDescriptor::TYPE_BYTES: // Arbitrary byte array. 533 | commands = "`ns`Protobuf::write_varint($fp, strlen(`var`));\n" 534 | "fwrite($fp, `var`);\n"; 535 | break; 536 | 537 | case FieldDescriptor::TYPE_GROUP: {// Tag-delimited message. Deprecated. 538 | // The start tag has already been printed, but also print the end tag 539 | uint8 endtag[5]; 540 | tmp = WireFormatLite::WriteTagToArray( 541 | field.number(), 542 | WireFormatLite::WIRETYPE_END_GROUP, 543 | endtag); 544 | int endtagLen = tmp - endtag; 545 | commands = "`var`->write($fp); // group\n" 546 | "fwrite($fp, \"" + arrayToPHPString(endtag, endtagLen) + "\");\n"; 547 | break; 548 | } 549 | case FieldDescriptor::TYPE_MESSAGE: // Length-delimited message. 550 | commands = "`ns`Protobuf::write_varint($fp, `var`->size()); // message\n" 551 | "`var`->write($fp);\n"; 552 | break; 553 | 554 | case FieldDescriptor::TYPE_SINT32: // int32, ZigZag-encoded varint on the wire 555 | commands = "`ns`Protobuf::write_zint32($fp, `var`);\n"; 556 | break; 557 | 558 | case FieldDescriptor::TYPE_SINT64: // int64, ZigZag-encoded varint on the wire 559 | commands = "`ns`Protobuf::write_zint64($fp, `var`);\n"; 560 | break; 561 | 562 | default: 563 | throw "Error: Unsupported type";// TODO use the proper exception 564 | } 565 | 566 | if (field.is_repeated()) { 567 | printer.Print( 568 | "if (!is_null($this->`var`))\n" 569 | " foreach($this->`var` as $v) {\n", 570 | "var", VariableName(field) 571 | ); 572 | printer.Indent(); printer.Indent(); 573 | printer.Print("fwrite($fp, \"`tag`\");\n", "tag", arrayToPHPString(tag, tagLen)); 574 | printer.Print(commands.c_str(), 575 | "var", "$v", 576 | "ns", pb_namespace 577 | ); 578 | printer.Outdent(); printer.Outdent(); 579 | printer.Print(" }\n"); 580 | 581 | } else { 582 | printer.Print( 583 | "if (!is_null($this->`var`)) {\n", 584 | "var", VariableName(field) 585 | ); 586 | printer.Indent(); 587 | printer.Print("fwrite($fp, \"`tag`\");\n", "tag", arrayToPHPString(tag, tagLen)); 588 | printer.Print(commands.c_str(), 589 | "var", "$this->" + VariableName(field), 590 | "ns", pb_namespace 591 | ); 592 | printer.Outdent(); 593 | printer.Print("}\n"); 594 | } 595 | } 596 | 597 | printer.Outdent(); 598 | printer.Print("}\n"); 599 | } 600 | 601 | void PHPCodeGenerator::PrintMessageSize(io::Printer &printer, const Descriptor & message) const { 602 | // Parse the file options 603 | const PHPFileOptions & options ( message.file()->options().GetExtension(php) ); 604 | const char * pb_namespace = options.namespace_().empty() ? "" : "\\"; 605 | 606 | // Print the calc size method 607 | printer.Print( 608 | "\n" 609 | "public function size() {\n" 610 | " $size = 0;\n" 611 | ); 612 | printer.Indent(); 613 | 614 | for (int i = 0; i < message.field_count(); ++i) { 615 | const FieldDescriptor &field ( *message.field(i) ); 616 | 617 | // Calc the size of the tag needed 618 | int tag = WireFormat::TagSize(field.number(), field.type()); 619 | 620 | string command; 621 | 622 | switch (WireFormat::WireTypeForField(&field)) { 623 | 624 | case WireFormatLite::WIRETYPE_VARINT: 625 | if (field.type() == FieldDescriptor::TYPE_BOOL) { 626 | tag++; // A bool will always take 1 byte 627 | command = "$size += `tag`;\n"; 628 | } else { 629 | command = "$size += `tag` + `ns`Protobuf::size_varint(`var`);\n"; 630 | } 631 | break; 632 | 633 | case WireFormatLite::WIRETYPE_FIXED32: 634 | tag += 4; 635 | command = "$size += `tag`;\n"; 636 | break; 637 | 638 | case WireFormatLite::WIRETYPE_FIXED64: 639 | tag += 8; 640 | command = "$size += `tag`;\n"; 641 | break; 642 | 643 | case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: 644 | if (field.type() == FieldDescriptor::TYPE_MESSAGE) { 645 | command = "$l = `var`->size();\n"; 646 | } else { 647 | command = "$l = strlen(`var`);\n"; 648 | } 649 | 650 | command += "$size += `tag` + `ns`Protobuf::size_varint($l) + $l;\n"; 651 | break; 652 | 653 | case WireFormatLite::WIRETYPE_START_GROUP: 654 | case WireFormatLite::WIRETYPE_END_GROUP: 655 | // WireFormat::TagSize returns the tag size * two when using groups, to account for both the start and end tag 656 | command += "$size += `tag` + `var`->size();\n"; 657 | break; 658 | 659 | default: 660 | throw "Error: Unsupported wire type";// TODO use the proper exception 661 | } 662 | 663 | map variables; 664 | variables["tag"] = SimpleItoa(tag); 665 | variables["ns"] = pb_namespace; 666 | 667 | if (field.is_repeated()) { 668 | printer.Print( 669 | "if (!is_null($this->`var`))\n" 670 | " foreach($this->`var` as $v) {\n", 671 | "var", VariableName(field) 672 | ); 673 | printer.Indent(); printer.Indent(); 674 | 675 | variables["var"] ="$v"; 676 | printer.Print(variables, command.c_str()); 677 | 678 | printer.Outdent(); printer.Outdent(); 679 | printer.Print(" }\n"); 680 | 681 | } else { 682 | printer.Print( 683 | "if (!is_null($this->`var`)) {\n", 684 | "var", VariableName(field) 685 | ); 686 | printer.Indent(); 687 | 688 | variables["var"] = "$this->" + VariableName(field); 689 | printer.Print(variables, command.c_str()); 690 | 691 | printer.Outdent(); 692 | printer.Print("}\n"); 693 | } 694 | } 695 | printer.Outdent(); 696 | printer.Print( 697 | " return $size;\n" 698 | "}\n" 699 | ); 700 | } 701 | 702 | void PHPCodeGenerator::PrintMessage(io::Printer &printer, const Descriptor & message) const { 703 | // Parse the file options 704 | const PHPFileOptions & options ( message.file()->options().GetExtension(php) ); 705 | bool skip_unknown = options.skip_unknown(); 706 | const char * pb_namespace = options.namespace_().empty() ? "" : "\\"; 707 | 708 | vector required_fields; 709 | 710 | // Print nested messages 711 | for (int i = 0; i < message.nested_type_count(); ++i) { 712 | printer.Print("\n"); 713 | PrintMessage(printer, *message.nested_type(i)); 714 | } 715 | 716 | // Print nested enum 717 | for (int i = 0; i < message.enum_type_count(); ++i) { 718 | PrintEnum(printer, *message.enum_type(i) ); 719 | } 720 | 721 | // Find out if we are a nested type, if so what kind 722 | const FieldDescriptor * parentField = NULL; 723 | const char * type = "message"; 724 | if (message.containing_type() != NULL) { 725 | const Descriptor & parent ( *message.containing_type() ); 726 | 727 | // Find which field we are 728 | for (int i = 0; i < parent.field_count(); ++i) { 729 | if (parent.field(i)->message_type() == &message) { 730 | parentField = parent.field(i); 731 | break; 732 | } 733 | } 734 | if (parentField->type() == FieldDescriptor::TYPE_GROUP) 735 | type = "group"; 736 | } 737 | 738 | // Start printing the message 739 | printer.Print("// `type` `full_name`\n", 740 | "type", type, 741 | "full_name", message.full_name() 742 | ); 743 | 744 | printer.Print("class `name` {\n", 745 | "name", ClassName(message) 746 | ); 747 | printer.Indent(); 748 | 749 | // Print fields map 750 | /* 751 | printer.Print( 752 | "// Array maps field indexes to members\n" 753 | "private static $_map = array (\n" 754 | ); 755 | printer.Indent(); 756 | for (int i = 0; i < message.field_count(); ++i) { 757 | const FieldDescriptor &field ( *message.field(i) ); 758 | 759 | printer.Print("`index` => '`value`',\n", 760 | "index", SimpleItoa(field.number()), 761 | "value", VariableName(field) 762 | ); 763 | } 764 | printer.Outdent(); 765 | printer.Print(");\n\n"); 766 | */ 767 | if (!skip_unknown) 768 | printer.Print("private $_unknown;\n"); 769 | 770 | // Constructor 771 | printer.Print( 772 | "\n" // TODO maybe some kind of inhertiance would reduce all this code! 773 | "function __construct($in = NULL, &$limit = PHP_INT_MAX) {\n" 774 | " if($in !== NULL) {\n" 775 | " if (is_string($in)) {\n" 776 | " $fp = fopen('php://memory', 'r+b');\n" 777 | " fwrite($fp, $in);\n" 778 | " rewind($fp);\n" 779 | " } else if (is_resource($in)) {\n" 780 | " $fp = $in;\n" 781 | " } else {\n" 782 | " throw new Exception('Invalid in parameter');\n" 783 | " }\n" 784 | " $this->read($fp, $limit);\n" 785 | " }\n" 786 | "}\n" 787 | ); 788 | 789 | // Print the read/write methods 790 | PrintMessageRead(printer, message, required_fields, parentField); 791 | PrintMessageWrite(printer, message, parentField); 792 | 793 | PrintMessageSize(printer, message); 794 | 795 | // Validate that the required fields are included 796 | printer.Print( 797 | "\n" 798 | "public function validateRequired() {\n" 799 | ); 800 | printer.Indent(); 801 | for (int i = 0; i < required_fields.size(); ++i) { 802 | printer.Print("if ($this->`name` === null) return false;\n", 803 | "name", VariableName(*required_fields[i]) 804 | ); 805 | } 806 | printer.Print("return true;\n"); 807 | printer.Outdent(); 808 | printer.Print("}\n"); 809 | 810 | // Print a toString method 811 | printer.Print( 812 | "\n" 813 | "public function __toString() {\n" 814 | " return ''" 815 | ); 816 | printer.Indent(); 817 | 818 | if (!skip_unknown) 819 | printer.Print("\n . `ns`Protobuf::toString('unknown', $this->_unknown)", "ns", pb_namespace); 820 | 821 | map variables; 822 | 823 | for (int i = 0; i < message.field_count(); ++i) { 824 | const FieldDescriptor &field ( *message.field(i) ); 825 | variables.clear(); 826 | variables["name"] = VariableName(field); 827 | variables["ns"] = pb_namespace; 828 | 829 | if (field.type() == FieldDescriptor::TYPE_ENUM) { 830 | variables["enum"] = ClassName(*field.enum_type()); 831 | printer.Print(variables, 832 | "\n . `ns`Protobuf::toString('`name`', `enum`::toString($this->`name`))" 833 | ); 834 | } else { 835 | printer.Print(variables, 836 | "\n . `ns`Protobuf::toString('`name`', $this->`name`)" 837 | ); 838 | } 839 | } 840 | printer.Print(";\n"); 841 | printer.Outdent(); 842 | printer.Print("}\n"); 843 | 844 | // Print fields variables and methods 845 | for (int i = 0; i < message.field_count(); ++i) { 846 | printer.Print("\n"); 847 | 848 | const FieldDescriptor &field ( *message.field(i) ); 849 | 850 | map variables; 851 | variables["name"] = VariableName(field); 852 | variables["capitalized_name"] = UnderscoresToCapitalizedCamelCase(field); 853 | variables["default"] = DefaultValueAsString(field, true); 854 | variables["comment"] = field.DebugString(); 855 | 856 | if (field.type() == FieldDescriptor::TYPE_GROUP) { 857 | size_t p = variables["comment"].find ('{'); 858 | if (p != string::npos) 859 | variables["comment"].resize (p - 1); 860 | } 861 | 862 | // TODO Check that comment is a single line 863 | 864 | switch (field.type()) { 865 | // If its a enum we should store it as a int 866 | // case FieldDescriptor::TYPE_ENUM: 867 | // variables["type"] = field.enum_type()->name() + " "; 868 | // break; 869 | 870 | case FieldDescriptor::TYPE_MESSAGE: 871 | case FieldDescriptor::TYPE_GROUP: 872 | variables["type"] = ClassName(*field.message_type()) + " "; 873 | break; 874 | 875 | default: 876 | variables["type"] = ""; 877 | } 878 | 879 | if (field.is_repeated()) { 880 | // Repeated field 881 | printer.Print(variables, 882 | "// `comment`\n" 883 | "private $`name` = null;\n" 884 | "public function clear`capitalized_name`() { $this->`name` = null; }\n" 885 | 886 | "public function get`capitalized_name`Count() { if ($this->`name` === null ) return 0; else return count($this->`name`); }\n" 887 | "public function get`capitalized_name`($index) { return $this->`name`[$index]; }\n" 888 | "public function get`capitalized_name`Array() { if ($this->`name` === null ) return array(); else return $this->`name`; }\n" 889 | ); 890 | 891 | // TODO Change the set code to validate input depending on the variable type 892 | printer.Print(variables, 893 | "public function set`capitalized_name`($index, $value) {$this->`name`[$index] = $value; }\n" 894 | "public function add`capitalized_name`($value) { $this->`name`[] = $value; }\n" 895 | "public function addAll`capitalized_name`(array $values) { foreach($values as $value) {$this->`name`[] = $value;} }\n" 896 | ); 897 | 898 | } else { 899 | // Non repeated field 900 | printer.Print(variables, 901 | "// `comment`\n" 902 | "private $`name` = null;\n" 903 | "public function clear`capitalized_name`() { $this->`name` = null; }\n" 904 | "public function has`capitalized_name`() { return $this->`name` !== null; }\n" 905 | 906 | "public function get`capitalized_name`() { if($this->`name` === null) return `default`; else return $this->`name`; }\n" 907 | ); 908 | 909 | // TODO Change the set code to validate input depending on the variable type 910 | printer.Print(variables, 911 | "public function set`capitalized_name`(`type`$value) { $this->`name` = $value; }\n" 912 | ); 913 | } 914 | } 915 | 916 | // Class Insertion Point 917 | printer.Print( 918 | "\n" 919 | "// @@protoc_insertion_point(class_scope:`full_name`)\n", 920 | "full_name", message.full_name() 921 | ); 922 | 923 | printer.Outdent(); 924 | printer.Print("}\n\n"); 925 | } 926 | 927 | void PHPCodeGenerator::PrintEnum(io::Printer &printer, const EnumDescriptor & e) const { 928 | 929 | printer.Print("// enum `full_name`\n" 930 | "class `name` {\n", 931 | "full_name", e.full_name(), 932 | "name", ClassName(e) 933 | ); 934 | 935 | printer.Indent(); 936 | 937 | // Print fields 938 | for (int j = 0; j < e.value_count(); ++j) { 939 | const EnumValueDescriptor &value ( *e.value(j) ); 940 | 941 | printer.Print( 942 | "const `name` = `number`;\n", 943 | "name", UpperString(value.name()), 944 | "number", SimpleItoa(value.number()) 945 | ); 946 | } 947 | 948 | // Print values array 949 | printer.Print("\npublic static $_values = array(\n"); 950 | printer.Indent(); 951 | for (int j = 0; j < e.value_count(); ++j) { 952 | const EnumValueDescriptor &value ( *e.value(j) ); 953 | 954 | printer.Print( 955 | "`number` => self::`name`,\n", 956 | "number", SimpleItoa(value.number()), 957 | "name", UpperString(value.name()) 958 | ); 959 | } 960 | printer.Outdent(); 961 | printer.Print(");\n\n"); 962 | 963 | // Print a toString 964 | printer.Print( 965 | "public static function toString($value) {\n" 966 | " if (is_null($value)) return null;\n" 967 | " if (array_key_exists($value, self::$_values))\n" 968 | " return self::$_values[$value];\n" 969 | " return 'UNKNOWN';\n" 970 | "}\n" 971 | ); 972 | 973 | printer.Outdent(); 974 | printer.Print("}\n\n"); 975 | } 976 | 977 | void PHPCodeGenerator::PrintMessages(io::Printer &printer, const FileDescriptor & file) const { 978 | for (int i = 0; i < file.message_type_count(); ++i) { 979 | PrintMessage(printer, *file.message_type(i)); 980 | } 981 | } 982 | 983 | void PHPCodeGenerator::PrintEnums(io::Printer &printer, const FileDescriptor & file) const { 984 | for (int i = 0; i < file.enum_type_count(); ++i) { 985 | PrintEnum(printer, *file.enum_type(i) ); 986 | } 987 | } 988 | 989 | void PHPCodeGenerator::PrintServices(io::Printer &printer, const FileDescriptor & file) const { 990 | for (int i = 0; i < file.service_count(); ++i) { 991 | printer.Print("////\n//TODO Service\n////\n"); 992 | } 993 | } 994 | 995 | bool PHPCodeGenerator::Generate(const FileDescriptor* file, 996 | const string& parameter, 997 | OutputDirectory* output_directory, 998 | string* error) const { 999 | 1000 | string php_filename ( file->name() + ".php" ); 1001 | 1002 | // Parse the options 1003 | const PHPFileOptions & options ( file->options().GetExtension(php) ); 1004 | const string & namespace_ (options.namespace_()); 1005 | 1006 | // Generate main file. 1007 | scoped_ptr output( 1008 | output_directory->Open(php_filename) 1009 | ); 1010 | 1011 | io::Printer printer(output.get(), '`'); 1012 | 1013 | try { 1014 | printer.Print( 1015 | "assign( msg ); 1037 | return false; 1038 | } 1039 | 1040 | return true; 1041 | } 1042 | 1043 | 1044 | int main(int argc, char* argv[]) { 1045 | PHPCodeGenerator generator; 1046 | return PluginMain(argc, argv, &generator); 1047 | } 1048 | -------------------------------------------------------------------------------- /src/proto/protoc-gen-php/strutil.cc: -------------------------------------------------------------------------------- 1 | // Protocol Buffers - Google's data interchange format 2 | // Copyright 2008 Google Inc. All rights reserved. 3 | // http://code.google.com/p/protobuf/ 4 | // 5 | // Redistribution and use in source and binary forms, with or without 6 | // modification, are permitted provided that the following conditions are 7 | // met: 8 | // 9 | // * Redistributions of source code must retain the above copyright 10 | // notice, this list of conditions and the following disclaimer. 11 | // * Redistributions in binary form must reproduce the above 12 | // copyright notice, this list of conditions and the following disclaimer 13 | // in the documentation and/or other materials provided with the 14 | // distribution. 15 | // * Neither the name of Google Inc. nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | // from google3/strings/strutil.cc 32 | 33 | #include "strutil.h" 34 | #include 35 | #include // FLT_DIG and DBL_DIG 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #ifdef _WIN32 42 | // MSVC has only _snprintf, not snprintf. 43 | // 44 | // MinGW has both snprintf and _snprintf, but they appear to be different 45 | // functions. The former is buggy. When invoked like so: 46 | // char buffer[32]; 47 | // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f); 48 | // it prints "1.23000e+10". This is plainly wrong: %g should never print 49 | // trailing zeros after the decimal point. For some reason this bug only 50 | // occurs with some input values, not all. In any case, _snprintf does the 51 | // right thing, so we use it. 52 | #define snprintf _snprintf 53 | #endif 54 | 55 | namespace google { 56 | namespace protobuf { 57 | 58 | inline bool IsNaN(double value) { 59 | // NaN is never equal to anything, even itself. 60 | return value != value; 61 | } 62 | 63 | // These are defined as macros on some platforms. #undef them so that we can 64 | // redefine them. 65 | #undef isxdigit 66 | #undef isprint 67 | 68 | // The definitions of these in ctype.h change based on locale. Since our 69 | // string manipulation is all in relation to the protocol buffer and C++ 70 | // languages, we always want to use the C locale. So, we re-define these 71 | // exactly as we want them. 72 | inline bool isxdigit(char c) { 73 | return ('0' <= c && c <= '9') || 74 | ('a' <= c && c <= 'f') || 75 | ('A' <= c && c <= 'F'); 76 | } 77 | 78 | inline bool isprint(char c) { 79 | return c >= 0x20 && c <= 0x7E; 80 | } 81 | 82 | // ---------------------------------------------------------------------- 83 | // StripString 84 | // Replaces any occurrence of the character 'remove' (or the characters 85 | // in 'remove') with the character 'replacewith'. 86 | // ---------------------------------------------------------------------- 87 | void StripString(string* s, const char* remove, char replacewith) { 88 | const char * str_start = s->c_str(); 89 | const char * str = str_start; 90 | for (str = strpbrk(str, remove); 91 | str != NULL; 92 | str = strpbrk(str + 1, remove)) { 93 | (*s)[str - str_start] = replacewith; 94 | } 95 | } 96 | 97 | // ---------------------------------------------------------------------- 98 | // StringReplace() 99 | // Replace the "old" pattern with the "new" pattern in a string, 100 | // and append the result to "res". If replace_all is false, 101 | // it only replaces the first instance of "old." 102 | // ---------------------------------------------------------------------- 103 | 104 | void StringReplace(const string& s, const string& oldsub, 105 | const string& newsub, bool replace_all, 106 | string* res) { 107 | if (oldsub.empty()) { 108 | res->append(s); // if empty, append the given string. 109 | return; 110 | } 111 | 112 | string::size_type start_pos = 0; 113 | string::size_type pos; 114 | do { 115 | pos = s.find(oldsub, start_pos); 116 | if (pos == string::npos) { 117 | break; 118 | } 119 | res->append(s, start_pos, pos - start_pos); 120 | res->append(newsub); 121 | start_pos = pos + oldsub.size(); // start searching again after the "old" 122 | } while (replace_all); 123 | res->append(s, start_pos, s.length() - start_pos); 124 | } 125 | 126 | // ---------------------------------------------------------------------- 127 | // StringReplace() 128 | // Give me a string and two patterns "old" and "new", and I replace 129 | // the first instance of "old" in the string with "new", if it 130 | // exists. If "global" is true; call this repeatedly until it 131 | // fails. RETURN a new string, regardless of whether the replacement 132 | // happened or not. 133 | // ---------------------------------------------------------------------- 134 | 135 | string StringReplace(const string& s, const string& oldsub, 136 | const string& newsub, bool replace_all) { 137 | string ret; 138 | StringReplace(s, oldsub, newsub, replace_all, &ret); 139 | return ret; 140 | } 141 | 142 | // ---------------------------------------------------------------------- 143 | // SplitStringUsing() 144 | // Split a string using a character delimiter. Append the components 145 | // to 'result'. 146 | // 147 | // Note: For multi-character delimiters, this routine will split on *ANY* of 148 | // the characters in the string, not the entire string as a single delimiter. 149 | // ---------------------------------------------------------------------- 150 | template 151 | static inline 152 | void SplitStringToIteratorUsing(const string& full, 153 | const char* delim, 154 | ITR& result) { 155 | // Optimize the common case where delim is a single character. 156 | if (delim[0] != '\0' && delim[1] == '\0') { 157 | char c = delim[0]; 158 | const char* p = full.data(); 159 | const char* end = p + full.size(); 160 | while (p != end) { 161 | if (*p == c) { 162 | ++p; 163 | } else { 164 | const char* start = p; 165 | while (++p != end && *p != c); 166 | *result++ = string(start, p - start); 167 | } 168 | } 169 | return; 170 | } 171 | 172 | string::size_type begin_index, end_index; 173 | begin_index = full.find_first_not_of(delim); 174 | while (begin_index != string::npos) { 175 | end_index = full.find_first_of(delim, begin_index); 176 | if (end_index == string::npos) { 177 | *result++ = full.substr(begin_index); 178 | return; 179 | } 180 | *result++ = full.substr(begin_index, (end_index - begin_index)); 181 | begin_index = full.find_first_not_of(delim, end_index); 182 | } 183 | } 184 | 185 | void SplitStringUsing(const string& full, 186 | const char* delim, 187 | vector* result) { 188 | back_insert_iterator< vector > it(*result); 189 | SplitStringToIteratorUsing(full, delim, it); 190 | } 191 | 192 | // ---------------------------------------------------------------------- 193 | // JoinStrings() 194 | // This merges a vector of string components with delim inserted 195 | // as separaters between components. 196 | // 197 | // ---------------------------------------------------------------------- 198 | template 199 | static void JoinStringsIterator(const ITERATOR& start, 200 | const ITERATOR& end, 201 | const char* delim, 202 | string* result) { 203 | GOOGLE_CHECK(result != NULL); 204 | result->clear(); 205 | int delim_length = strlen(delim); 206 | 207 | // Precompute resulting length so we can reserve() memory in one shot. 208 | int length = 0; 209 | for (ITERATOR iter = start; iter != end; ++iter) { 210 | if (iter != start) { 211 | length += delim_length; 212 | } 213 | length += iter->size(); 214 | } 215 | result->reserve(length); 216 | 217 | // Now combine everything. 218 | for (ITERATOR iter = start; iter != end; ++iter) { 219 | if (iter != start) { 220 | result->append(delim, delim_length); 221 | } 222 | result->append(iter->data(), iter->size()); 223 | } 224 | } 225 | 226 | void JoinStrings(const vector& components, 227 | const char* delim, 228 | string * result) { 229 | JoinStringsIterator(components.begin(), components.end(), delim, result); 230 | } 231 | 232 | // ---------------------------------------------------------------------- 233 | // UnescapeCEscapeSequences() 234 | // This does all the unescaping that C does: \ooo, \r, \n, etc 235 | // Returns length of resulting string. 236 | // The implementation of \x parses any positive number of hex digits, 237 | // but it is an error if the value requires more than 8 bits, and the 238 | // result is truncated to 8 bits. 239 | // 240 | // The second call stores its errors in a supplied string vector. 241 | // If the string vector pointer is NULL, it reports the errors with LOG(). 242 | // ---------------------------------------------------------------------- 243 | 244 | #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) 245 | 246 | inline int hex_digit_to_int(char c) { 247 | /* Assume ASCII. */ 248 | assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); 249 | assert(isxdigit(c)); 250 | int x = static_cast(c); 251 | if (x > '9') { 252 | x += 9; 253 | } 254 | return x & 0xf; 255 | } 256 | 257 | // Protocol buffers doesn't ever care about errors, but I don't want to remove 258 | // the code. 259 | #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) 260 | 261 | int UnescapeCEscapeSequences(const char* source, char* dest) { 262 | return UnescapeCEscapeSequences(source, dest, NULL); 263 | } 264 | 265 | int UnescapeCEscapeSequences(const char* source, char* dest, 266 | vector *errors) { 267 | GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; 268 | 269 | char* d = dest; 270 | const char* p = source; 271 | 272 | // Small optimization for case where source = dest and there's no escaping 273 | while ( p == d && *p != '\0' && *p != '\\' ) 274 | p++, d++; 275 | 276 | while (*p != '\0') { 277 | if (*p != '\\') { 278 | *d++ = *p++; 279 | } else { 280 | switch ( *++p ) { // skip past the '\\' 281 | case '\0': 282 | LOG_STRING(ERROR, errors) << "String cannot end with \\"; 283 | *d = '\0'; 284 | return d - dest; // we're done with p 285 | case 'a': *d++ = '\a'; break; 286 | case 'b': *d++ = '\b'; break; 287 | case 'f': *d++ = '\f'; break; 288 | case 'n': *d++ = '\n'; break; 289 | case 'r': *d++ = '\r'; break; 290 | case 't': *d++ = '\t'; break; 291 | case 'v': *d++ = '\v'; break; 292 | case '\\': *d++ = '\\'; break; 293 | case '?': *d++ = '\?'; break; // \? Who knew? 294 | case '\'': *d++ = '\''; break; 295 | case '"': *d++ = '\"'; break; 296 | case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits 297 | case '4': case '5': case '6': case '7': { 298 | char ch = *p - '0'; 299 | if ( IS_OCTAL_DIGIT(p[1]) ) 300 | ch = ch * 8 + *++p - '0'; 301 | if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice 302 | ch = ch * 8 + *++p - '0'; // now points at last digit 303 | *d++ = ch; 304 | break; 305 | } 306 | case 'x': case 'X': { 307 | if (!isxdigit(p[1])) { 308 | if (p[1] == '\0') { 309 | LOG_STRING(ERROR, errors) << "String cannot end with \\x"; 310 | } else { 311 | LOG_STRING(ERROR, errors) << 312 | "\\x cannot be followed by non-hex digit: \\" << *p << p[1]; 313 | } 314 | break; 315 | } 316 | unsigned int ch = 0; 317 | const char *hex_start = p; 318 | while (isxdigit(p[1])) // arbitrarily many hex digits 319 | ch = (ch << 4) + hex_digit_to_int(*++p); 320 | if (ch > 0xFF) 321 | LOG_STRING(ERROR, errors) << "Value of " << 322 | "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; 323 | *d++ = ch; 324 | break; 325 | } 326 | #if 0 // TODO(kenton): Support \u and \U? Requires runetochar(). 327 | case 'u': { 328 | // \uhhhh => convert 4 hex digits to UTF-8 329 | char32 rune = 0; 330 | const char *hex_start = p; 331 | for (int i = 0; i < 4; ++i) { 332 | if (isxdigit(p[1])) { // Look one char ahead. 333 | rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. 334 | } else { 335 | LOG_STRING(ERROR, errors) 336 | << "\\u must be followed by 4 hex digits: \\" 337 | << string(hex_start, p+1-hex_start); 338 | break; 339 | } 340 | } 341 | d += runetochar(d, &rune); 342 | break; 343 | } 344 | case 'U': { 345 | // \Uhhhhhhhh => convert 8 hex digits to UTF-8 346 | char32 rune = 0; 347 | const char *hex_start = p; 348 | for (int i = 0; i < 8; ++i) { 349 | if (isxdigit(p[1])) { // Look one char ahead. 350 | // Don't change rune until we're sure this 351 | // is within the Unicode limit, but do advance p. 352 | char32 newrune = (rune << 4) + hex_digit_to_int(*++p); 353 | if (newrune > 0x10FFFF) { 354 | LOG_STRING(ERROR, errors) 355 | << "Value of \\" 356 | << string(hex_start, p + 1 - hex_start) 357 | << " exceeds Unicode limit (0x10FFFF)"; 358 | break; 359 | } else { 360 | rune = newrune; 361 | } 362 | } else { 363 | LOG_STRING(ERROR, errors) 364 | << "\\U must be followed by 8 hex digits: \\" 365 | << string(hex_start, p+1-hex_start); 366 | break; 367 | } 368 | } 369 | d += runetochar(d, &rune); 370 | break; 371 | } 372 | #endif 373 | default: 374 | LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; 375 | } 376 | p++; // read past letter we escaped 377 | } 378 | } 379 | *d = '\0'; 380 | return d - dest; 381 | } 382 | 383 | // ---------------------------------------------------------------------- 384 | // UnescapeCEscapeString() 385 | // This does the same thing as UnescapeCEscapeSequences, but creates 386 | // a new string. The caller does not need to worry about allocating 387 | // a dest buffer. This should be used for non performance critical 388 | // tasks such as printing debug messages. It is safe for src and dest 389 | // to be the same. 390 | // 391 | // The second call stores its errors in a supplied string vector. 392 | // If the string vector pointer is NULL, it reports the errors with LOG(). 393 | // 394 | // In the first and second calls, the length of dest is returned. In the 395 | // the third call, the new string is returned. 396 | // ---------------------------------------------------------------------- 397 | int UnescapeCEscapeString(const string& src, string* dest) { 398 | return UnescapeCEscapeString(src, dest, NULL); 399 | } 400 | 401 | int UnescapeCEscapeString(const string& src, string* dest, 402 | vector *errors) { 403 | scoped_array unescaped(new char[src.size() + 1]); 404 | int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors); 405 | GOOGLE_CHECK(dest); 406 | dest->assign(unescaped.get(), len); 407 | return len; 408 | } 409 | 410 | string UnescapeCEscapeString(const string& src) { 411 | scoped_array unescaped(new char[src.size() + 1]); 412 | int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); 413 | return string(unescaped.get(), len); 414 | } 415 | 416 | // ---------------------------------------------------------------------- 417 | // CEscapeString() 418 | // CHexEscapeString() 419 | // Copies 'src' to 'dest', escaping dangerous characters using 420 | // C-style escape sequences. This is very useful for preparing query 421 | // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses 422 | // hexadecimal rather than octal sequences. 423 | // Returns the number of bytes written to 'dest' (not including the \0) 424 | // or -1 if there was insufficient space. 425 | // 426 | // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 427 | // ---------------------------------------------------------------------- 428 | int CEscapeInternal(const char* src, int src_len, char* dest, 429 | int dest_len, bool use_hex, bool utf8_safe) { 430 | const char* src_end = src + src_len; 431 | int used = 0; 432 | bool last_hex_escape = false; // true if last output char was \xNN 433 | 434 | for (; src < src_end; src++) { 435 | if (dest_len - used < 2) // Need space for two letter escape 436 | return -1; 437 | 438 | bool is_hex_escape = false; 439 | switch (*src) { 440 | case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; 441 | case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; 442 | case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; 443 | case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; 444 | case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; 445 | case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; 446 | default: 447 | // Note that if we emit \xNN and the src character after that is a hex 448 | // digit then that digit must be escaped too to prevent it being 449 | // interpreted as part of the character code by C. 450 | if ((!utf8_safe || static_cast(*src) < 0x80) && 451 | (!isprint(*src) || 452 | (last_hex_escape && isxdigit(*src)))) { 453 | if (dest_len - used < 4) // need space for 4 letter escape 454 | return -1; 455 | sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), 456 | static_cast(*src)); 457 | is_hex_escape = use_hex; 458 | used += 4; 459 | } else { 460 | dest[used++] = *src; break; 461 | } 462 | } 463 | last_hex_escape = is_hex_escape; 464 | } 465 | 466 | if (dest_len - used < 1) // make sure that there is room for \0 467 | return -1; 468 | 469 | dest[used] = '\0'; // doesn't count towards return value though 470 | return used; 471 | } 472 | 473 | int CEscapeString(const char* src, int src_len, char* dest, int dest_len) { 474 | return CEscapeInternal(src, src_len, dest, dest_len, false, false); 475 | } 476 | 477 | // ---------------------------------------------------------------------- 478 | // CEscape() 479 | // CHexEscape() 480 | // Copies 'src' to result, escaping dangerous characters using 481 | // C-style escape sequences. This is very useful for preparing query 482 | // flags. 'src' and 'dest' should not overlap. The 'Hex' version 483 | // hexadecimal rather than octal sequences. 484 | // 485 | // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 486 | // ---------------------------------------------------------------------- 487 | string CEscape(const string& src) { 488 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 489 | scoped_array dest(new char[dest_length]); 490 | const int len = CEscapeInternal(src.data(), src.size(), 491 | dest.get(), dest_length, false, false); 492 | GOOGLE_DCHECK_GE(len, 0); 493 | return string(dest.get(), len); 494 | } 495 | 496 | namespace strings { 497 | 498 | string Utf8SafeCEscape(const string& src) { 499 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 500 | scoped_array dest(new char[dest_length]); 501 | const int len = CEscapeInternal(src.data(), src.size(), 502 | dest.get(), dest_length, false, true); 503 | GOOGLE_DCHECK_GE(len, 0); 504 | return string(dest.get(), len); 505 | } 506 | 507 | string CHexEscape(const string& src) { 508 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 509 | scoped_array dest(new char[dest_length]); 510 | const int len = CEscapeInternal(src.data(), src.size(), 511 | dest.get(), dest_length, true, false); 512 | GOOGLE_DCHECK_GE(len, 0); 513 | return string(dest.get(), len); 514 | } 515 | 516 | } // namespace strings 517 | 518 | // ---------------------------------------------------------------------- 519 | // strto32_adaptor() 520 | // strtou32_adaptor() 521 | // Implementation of strto[u]l replacements that have identical 522 | // overflow and underflow characteristics for both ILP-32 and LP-64 523 | // platforms, including errno preservation in error-free calls. 524 | // ---------------------------------------------------------------------- 525 | 526 | int32 strto32_adaptor(const char *nptr, char **endptr, int base) { 527 | const int saved_errno = errno; 528 | errno = 0; 529 | const long result = strtol(nptr, endptr, base); 530 | if (errno == ERANGE && result == LONG_MIN) { 531 | return kint32min; 532 | } else if (errno == ERANGE && result == LONG_MAX) { 533 | return kint32max; 534 | } else if (errno == 0 && result < kint32min) { 535 | errno = ERANGE; 536 | return kint32min; 537 | } else if (errno == 0 && result > kint32max) { 538 | errno = ERANGE; 539 | return kint32max; 540 | } 541 | if (errno == 0) 542 | errno = saved_errno; 543 | return static_cast(result); 544 | } 545 | 546 | uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { 547 | const int saved_errno = errno; 548 | errno = 0; 549 | const unsigned long result = strtoul(nptr, endptr, base); 550 | if (errno == ERANGE && result == ULONG_MAX) { 551 | return kuint32max; 552 | } else if (errno == 0 && result > kuint32max) { 553 | errno = ERANGE; 554 | return kuint32max; 555 | } 556 | if (errno == 0) 557 | errno = saved_errno; 558 | return static_cast(result); 559 | } 560 | 561 | // ---------------------------------------------------------------------- 562 | // FastIntToBuffer() 563 | // FastInt64ToBuffer() 564 | // FastHexToBuffer() 565 | // FastHex64ToBuffer() 566 | // FastHex32ToBuffer() 567 | // ---------------------------------------------------------------------- 568 | 569 | // Offset into buffer where FastInt64ToBuffer places the end of string 570 | // null character. Also used by FastInt64ToBufferLeft. 571 | static const int kFastInt64ToBufferOffset = 21; 572 | 573 | char *FastInt64ToBuffer(int64 i, char* buffer) { 574 | // We could collapse the positive and negative sections, but that 575 | // would be slightly slower for positive numbers... 576 | // 22 bytes is enough to store -2**64, -18446744073709551616. 577 | char* p = buffer + kFastInt64ToBufferOffset; 578 | *p-- = '\0'; 579 | if (i >= 0) { 580 | do { 581 | *p-- = '0' + i % 10; 582 | i /= 10; 583 | } while (i > 0); 584 | return p + 1; 585 | } else { 586 | // On different platforms, % and / have different behaviors for 587 | // negative numbers, so we need to jump through hoops to make sure 588 | // we don't divide negative numbers. 589 | if (i > -10) { 590 | i = -i; 591 | *p-- = '0' + i; 592 | *p = '-'; 593 | return p; 594 | } else { 595 | // Make sure we aren't at MIN_INT, in which case we can't say i = -i 596 | i = i + 10; 597 | i = -i; 598 | *p-- = '0' + i % 10; 599 | // Undo what we did a moment ago 600 | i = i / 10 + 1; 601 | do { 602 | *p-- = '0' + i % 10; 603 | i /= 10; 604 | } while (i > 0); 605 | *p = '-'; 606 | return p; 607 | } 608 | } 609 | } 610 | 611 | // Offset into buffer where FastInt32ToBuffer places the end of string 612 | // null character. Also used by FastInt32ToBufferLeft 613 | static const int kFastInt32ToBufferOffset = 11; 614 | 615 | // Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the 616 | // compiler to generate 32 bit arithmetic instructions. It's much faster, at 617 | // least with 32 bit binaries. 618 | char *FastInt32ToBuffer(int32 i, char* buffer) { 619 | // We could collapse the positive and negative sections, but that 620 | // would be slightly slower for positive numbers... 621 | // 12 bytes is enough to store -2**32, -4294967296. 622 | char* p = buffer + kFastInt32ToBufferOffset; 623 | *p-- = '\0'; 624 | if (i >= 0) { 625 | do { 626 | *p-- = '0' + i % 10; 627 | i /= 10; 628 | } while (i > 0); 629 | return p + 1; 630 | } else { 631 | // On different platforms, % and / have different behaviors for 632 | // negative numbers, so we need to jump through hoops to make sure 633 | // we don't divide negative numbers. 634 | if (i > -10) { 635 | i = -i; 636 | *p-- = '0' + i; 637 | *p = '-'; 638 | return p; 639 | } else { 640 | // Make sure we aren't at MIN_INT, in which case we can't say i = -i 641 | i = i + 10; 642 | i = -i; 643 | *p-- = '0' + i % 10; 644 | // Undo what we did a moment ago 645 | i = i / 10 + 1; 646 | do { 647 | *p-- = '0' + i % 10; 648 | i /= 10; 649 | } while (i > 0); 650 | *p = '-'; 651 | return p; 652 | } 653 | } 654 | } 655 | 656 | char *FastHexToBuffer(int i, char* buffer) { 657 | GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i; 658 | 659 | static const char *hexdigits = "0123456789abcdef"; 660 | char *p = buffer + 21; 661 | *p-- = '\0'; 662 | do { 663 | *p-- = hexdigits[i & 15]; // mod by 16 664 | i >>= 4; // divide by 16 665 | } while (i > 0); 666 | return p + 1; 667 | } 668 | 669 | char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { 670 | static const char *hexdigits = "0123456789abcdef"; 671 | buffer[num_byte] = '\0'; 672 | for (int i = num_byte - 1; i >= 0; i--) { 673 | buffer[i] = hexdigits[uint32(value) & 0xf]; 674 | value >>= 4; 675 | } 676 | return buffer; 677 | } 678 | 679 | char *FastHex64ToBuffer(uint64 value, char* buffer) { 680 | return InternalFastHexToBuffer(value, buffer, 16); 681 | } 682 | 683 | char *FastHex32ToBuffer(uint32 value, char* buffer) { 684 | return InternalFastHexToBuffer(value, buffer, 8); 685 | } 686 | 687 | static inline char* PlaceNum(char* p, int num, char prev_sep) { 688 | *p-- = '0' + num % 10; 689 | *p-- = '0' + num / 10; 690 | *p-- = prev_sep; 691 | return p; 692 | } 693 | 694 | // ---------------------------------------------------------------------- 695 | // FastInt32ToBufferLeft() 696 | // FastUInt32ToBufferLeft() 697 | // FastInt64ToBufferLeft() 698 | // FastUInt64ToBufferLeft() 699 | // 700 | // Like the Fast*ToBuffer() functions above, these are intended for speed. 701 | // Unlike the Fast*ToBuffer() functions, however, these functions write 702 | // their output to the beginning of the buffer (hence the name, as the 703 | // output is left-aligned). The caller is responsible for ensuring that 704 | // the buffer has enough space to hold the output. 705 | // 706 | // Returns a pointer to the end of the string (i.e. the null character 707 | // terminating the string). 708 | // ---------------------------------------------------------------------- 709 | 710 | static const char two_ASCII_digits[100][2] = { 711 | {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, 712 | {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'}, 713 | {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, 714 | {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'}, 715 | {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, 716 | {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'}, 717 | {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, 718 | {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'}, 719 | {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, 720 | {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'}, 721 | {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, 722 | {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'}, 723 | {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, 724 | {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'}, 725 | {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, 726 | {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'}, 727 | {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, 728 | {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'}, 729 | {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, 730 | {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'} 731 | }; 732 | 733 | char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { 734 | int digits; 735 | const char *ASCII_digits = NULL; 736 | // The idea of this implementation is to trim the number of divides to as few 737 | // as possible by using multiplication and subtraction rather than mod (%), 738 | // and by outputting two digits at a time rather than one. 739 | // The huge-number case is first, in the hopes that the compiler will output 740 | // that case in one branch-free block of code, and only output conditional 741 | // branches into it from below. 742 | if (u >= 1000000000) { // >= 1,000,000,000 743 | digits = u / 100000000; // 100,000,000 744 | ASCII_digits = two_ASCII_digits[digits]; 745 | buffer[0] = ASCII_digits[0]; 746 | buffer[1] = ASCII_digits[1]; 747 | buffer += 2; 748 | sublt100_000_000: 749 | u -= digits * 100000000; // 100,000,000 750 | lt100_000_000: 751 | digits = u / 1000000; // 1,000,000 752 | ASCII_digits = two_ASCII_digits[digits]; 753 | buffer[0] = ASCII_digits[0]; 754 | buffer[1] = ASCII_digits[1]; 755 | buffer += 2; 756 | sublt1_000_000: 757 | u -= digits * 1000000; // 1,000,000 758 | lt1_000_000: 759 | digits = u / 10000; // 10,000 760 | ASCII_digits = two_ASCII_digits[digits]; 761 | buffer[0] = ASCII_digits[0]; 762 | buffer[1] = ASCII_digits[1]; 763 | buffer += 2; 764 | sublt10_000: 765 | u -= digits * 10000; // 10,000 766 | lt10_000: 767 | digits = u / 100; 768 | ASCII_digits = two_ASCII_digits[digits]; 769 | buffer[0] = ASCII_digits[0]; 770 | buffer[1] = ASCII_digits[1]; 771 | buffer += 2; 772 | sublt100: 773 | u -= digits * 100; 774 | lt100: 775 | digits = u; 776 | ASCII_digits = two_ASCII_digits[digits]; 777 | buffer[0] = ASCII_digits[0]; 778 | buffer[1] = ASCII_digits[1]; 779 | buffer += 2; 780 | done: 781 | *buffer = 0; 782 | return buffer; 783 | } 784 | 785 | if (u < 100) { 786 | digits = u; 787 | if (u >= 10) goto lt100; 788 | *buffer++ = '0' + digits; 789 | goto done; 790 | } 791 | if (u < 10000) { // 10,000 792 | if (u >= 1000) goto lt10_000; 793 | digits = u / 100; 794 | *buffer++ = '0' + digits; 795 | goto sublt100; 796 | } 797 | if (u < 1000000) { // 1,000,000 798 | if (u >= 100000) goto lt1_000_000; 799 | digits = u / 10000; // 10,000 800 | *buffer++ = '0' + digits; 801 | goto sublt10_000; 802 | } 803 | if (u < 100000000) { // 100,000,000 804 | if (u >= 10000000) goto lt100_000_000; 805 | digits = u / 1000000; // 1,000,000 806 | *buffer++ = '0' + digits; 807 | goto sublt1_000_000; 808 | } 809 | // we already know that u < 1,000,000,000 810 | digits = u / 100000000; // 100,000,000 811 | *buffer++ = '0' + digits; 812 | goto sublt100_000_000; 813 | } 814 | 815 | char* FastInt32ToBufferLeft(int32 i, char* buffer) { 816 | uint32 u = i; 817 | if (i < 0) { 818 | *buffer++ = '-'; 819 | u = -i; 820 | } 821 | return FastUInt32ToBufferLeft(u, buffer); 822 | } 823 | 824 | char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { 825 | int digits; 826 | const char *ASCII_digits = NULL; 827 | 828 | uint32 u = static_cast(u64); 829 | if (u == u64) return FastUInt32ToBufferLeft(u, buffer); 830 | 831 | uint64 top_11_digits = u64 / 1000000000; 832 | buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); 833 | u = u64 - (top_11_digits * 1000000000); 834 | 835 | digits = u / 10000000; // 10,000,000 836 | GOOGLE_DCHECK_LT(digits, 100); 837 | ASCII_digits = two_ASCII_digits[digits]; 838 | buffer[0] = ASCII_digits[0]; 839 | buffer[1] = ASCII_digits[1]; 840 | buffer += 2; 841 | u -= digits * 10000000; // 10,000,000 842 | digits = u / 100000; // 100,000 843 | ASCII_digits = two_ASCII_digits[digits]; 844 | buffer[0] = ASCII_digits[0]; 845 | buffer[1] = ASCII_digits[1]; 846 | buffer += 2; 847 | u -= digits * 100000; // 100,000 848 | digits = u / 1000; // 1,000 849 | ASCII_digits = two_ASCII_digits[digits]; 850 | buffer[0] = ASCII_digits[0]; 851 | buffer[1] = ASCII_digits[1]; 852 | buffer += 2; 853 | u -= digits * 1000; // 1,000 854 | digits = u / 10; 855 | ASCII_digits = two_ASCII_digits[digits]; 856 | buffer[0] = ASCII_digits[0]; 857 | buffer[1] = ASCII_digits[1]; 858 | buffer += 2; 859 | u -= digits * 10; 860 | digits = u; 861 | *buffer++ = '0' + digits; 862 | *buffer = 0; 863 | return buffer; 864 | } 865 | 866 | char* FastInt64ToBufferLeft(int64 i, char* buffer) { 867 | uint64 u = i; 868 | if (i < 0) { 869 | *buffer++ = '-'; 870 | u = -i; 871 | } 872 | return FastUInt64ToBufferLeft(u, buffer); 873 | } 874 | 875 | // ---------------------------------------------------------------------- 876 | // SimpleItoa() 877 | // Description: converts an integer to a string. 878 | // 879 | // Return value: string 880 | // ---------------------------------------------------------------------- 881 | 882 | string SimpleItoa(int i) { 883 | char buffer[kFastToBufferSize]; 884 | return (sizeof(i) == 4) ? 885 | FastInt32ToBuffer(i, buffer) : 886 | FastInt64ToBuffer(i, buffer); 887 | } 888 | 889 | string SimpleItoa(unsigned int i) { 890 | char buffer[kFastToBufferSize]; 891 | return string(buffer, (sizeof(i) == 4) ? 892 | FastUInt32ToBufferLeft(i, buffer) : 893 | FastUInt64ToBufferLeft(i, buffer)); 894 | } 895 | 896 | string SimpleItoa(long i) { 897 | char buffer[kFastToBufferSize]; 898 | return (sizeof(i) == 4) ? 899 | FastInt32ToBuffer(i, buffer) : 900 | FastInt64ToBuffer(i, buffer); 901 | } 902 | 903 | string SimpleItoa(unsigned long i) { 904 | char buffer[kFastToBufferSize]; 905 | return string(buffer, (sizeof(i) == 4) ? 906 | FastUInt32ToBufferLeft(i, buffer) : 907 | FastUInt64ToBufferLeft(i, buffer)); 908 | } 909 | 910 | string SimpleItoa(long long i) { 911 | char buffer[kFastToBufferSize]; 912 | return (sizeof(i) == 4) ? 913 | FastInt32ToBuffer(i, buffer) : 914 | FastInt64ToBuffer(i, buffer); 915 | } 916 | 917 | string SimpleItoa(unsigned long long i) { 918 | char buffer[kFastToBufferSize]; 919 | return string(buffer, (sizeof(i) == 4) ? 920 | FastUInt32ToBufferLeft(i, buffer) : 921 | FastUInt64ToBufferLeft(i, buffer)); 922 | } 923 | 924 | // ---------------------------------------------------------------------- 925 | // SimpleDtoa() 926 | // SimpleFtoa() 927 | // DoubleToBuffer() 928 | // FloatToBuffer() 929 | // We want to print the value without losing precision, but we also do 930 | // not want to print more digits than necessary. This turns out to be 931 | // trickier than it sounds. Numbers like 0.2 cannot be represented 932 | // exactly in binary. If we print 0.2 with a very large precision, 933 | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". 934 | // On the other hand, if we set the precision too low, we lose 935 | // significant digits when printing numbers that actually need them. 936 | // It turns out there is no precision value that does the right thing 937 | // for all numbers. 938 | // 939 | // Our strategy is to first try printing with a precision that is never 940 | // over-precise, then parse the result with strtod() to see if it 941 | // matches. If not, we print again with a precision that will always 942 | // give a precise result, but may use more digits than necessary. 943 | // 944 | // An arguably better strategy would be to use the algorithm described 945 | // in "How to Print Floating-Point Numbers Accurately" by Steele & 946 | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, 947 | // however, that the following implementation is about as fast as 948 | // DMG's code. Furthermore, DMG's code locks mutexes, which means it 949 | // will not scale well on multi-core machines. DMG's code is slightly 950 | // more accurate (in that it will never use more digits than 951 | // necessary), but this is probably irrelevant for most users. 952 | // 953 | // Rob Pike and Ken Thompson also have an implementation of dtoa() in 954 | // third_party/fmt/fltfmt.cc. Their implementation is similar to this 955 | // one in that it makes guesses and then uses strtod() to check them. 956 | // Their implementation is faster because they use their own code to 957 | // generate the digits in the first place rather than use snprintf(), 958 | // thus avoiding format string parsing overhead. However, this makes 959 | // it considerably more complicated than the following implementation, 960 | // and it is embedded in a larger library. If speed turns out to be 961 | // an issue, we could re-implement this in terms of their 962 | // implementation. 963 | // ---------------------------------------------------------------------- 964 | 965 | string SimpleDtoa(double value) { 966 | char buffer[kDoubleToBufferSize]; 967 | return DoubleToBuffer(value, buffer); 968 | } 969 | 970 | string SimpleFtoa(float value) { 971 | char buffer[kFloatToBufferSize]; 972 | return FloatToBuffer(value, buffer); 973 | } 974 | 975 | static inline bool IsValidFloatChar(char c) { 976 | return ('0' <= c && c <= '9') || 977 | c == 'e' || c == 'E' || 978 | c == '+' || c == '-'; 979 | } 980 | 981 | void DelocalizeRadix(char* buffer) { 982 | // Fast check: if the buffer has a normal decimal point, assume no 983 | // translation is needed. 984 | if (strchr(buffer, '.') != NULL) return; 985 | 986 | // Find the first unknown character. 987 | while (IsValidFloatChar(*buffer)) ++buffer; 988 | 989 | if (*buffer == '\0') { 990 | // No radix character found. 991 | return; 992 | } 993 | 994 | // We are now pointing at the locale-specific radix character. Replace it 995 | // with '.'. 996 | *buffer = '.'; 997 | ++buffer; 998 | 999 | if (!IsValidFloatChar(*buffer) && *buffer != '\0') { 1000 | // It appears the radix was a multi-byte character. We need to remove the 1001 | // extra bytes. 1002 | char* target = buffer; 1003 | do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0'); 1004 | memmove(target, buffer, strlen(buffer) + 1); 1005 | } 1006 | } 1007 | 1008 | char* DoubleToBuffer(double value, char* buffer) { 1009 | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all 1010 | // platforms these days. Just in case some system exists where DBL_DIG 1011 | // is significantly larger -- and risks overflowing our buffer -- we have 1012 | // this assert. 1013 | GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); 1014 | 1015 | if (value == numeric_limits::infinity()) { 1016 | strcpy(buffer, "inf"); 1017 | return buffer; 1018 | } else if (value == -numeric_limits::infinity()) { 1019 | strcpy(buffer, "-inf"); 1020 | return buffer; 1021 | } else if (IsNaN(value)) { 1022 | strcpy(buffer, "nan"); 1023 | return buffer; 1024 | } 1025 | 1026 | int snprintf_result = 1027 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); 1028 | 1029 | // The snprintf should never overflow because the buffer is significantly 1030 | // larger than the precision we asked for. 1031 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); 1032 | 1033 | // We need to make parsed_value volatile in order to force the compiler to 1034 | // write it out to the stack. Otherwise, it may keep the value in a 1035 | // register, and if it does that, it may keep it as a long double instead 1036 | // of a double. This long double may have extra bits that make it compare 1037 | // unequal to "value" even though it would be exactly equal if it were 1038 | // truncated to a double. 1039 | volatile double parsed_value = strtod(buffer, NULL); 1040 | if (parsed_value != value) { 1041 | int snprintf_result = 1042 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); 1043 | 1044 | // Should never overflow; see above. 1045 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); 1046 | } 1047 | 1048 | DelocalizeRadix(buffer); 1049 | return buffer; 1050 | } 1051 | 1052 | bool safe_strtof(const char* str, float* value) { 1053 | char* endptr; 1054 | errno = 0; // errno only gets set on errors 1055 | #if defined(_WIN32) || defined (__hpux) // has no strtof() 1056 | *value = strtod(str, &endptr); 1057 | #else 1058 | *value = strtof(str, &endptr); 1059 | #endif 1060 | return *str != 0 && *endptr == 0 && errno == 0; 1061 | } 1062 | 1063 | char* FloatToBuffer(float value, char* buffer) { 1064 | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all 1065 | // platforms these days. Just in case some system exists where FLT_DIG 1066 | // is significantly larger -- and risks overflowing our buffer -- we have 1067 | // this assert. 1068 | GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); 1069 | 1070 | if (value == numeric_limits::infinity()) { 1071 | strcpy(buffer, "inf"); 1072 | return buffer; 1073 | } else if (value == -numeric_limits::infinity()) { 1074 | strcpy(buffer, "-inf"); 1075 | return buffer; 1076 | } else if (IsNaN(value)) { 1077 | strcpy(buffer, "nan"); 1078 | return buffer; 1079 | } 1080 | 1081 | int snprintf_result = 1082 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); 1083 | 1084 | // The snprintf should never overflow because the buffer is significantly 1085 | // larger than the precision we asked for. 1086 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); 1087 | 1088 | float parsed_value; 1089 | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { 1090 | int snprintf_result = 1091 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value); 1092 | 1093 | // Should never overflow; see above. 1094 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); 1095 | } 1096 | 1097 | DelocalizeRadix(buffer); 1098 | return buffer; 1099 | } 1100 | 1101 | // ---------------------------------------------------------------------- 1102 | // NoLocaleStrtod() 1103 | // This code will make you cry. 1104 | // ---------------------------------------------------------------------- 1105 | 1106 | // Returns a string identical to *input except that the character pointed to 1107 | // by radix_pos (which should be '.') is replaced with the locale-specific 1108 | // radix character. 1109 | string LocalizeRadix(const char* input, const char* radix_pos) { 1110 | // Determine the locale-specific radix character by calling sprintf() to 1111 | // print the number 1.5, then stripping off the digits. As far as I can 1112 | // tell, this is the only portable, thread-safe way to get the C library 1113 | // to divuldge the locale's radix character. No, localeconv() is NOT 1114 | // thread-safe. 1115 | char temp[16]; 1116 | int size = sprintf(temp, "%.1f", 1.5); 1117 | GOOGLE_CHECK_EQ(temp[0], '1'); 1118 | GOOGLE_CHECK_EQ(temp[size-1], '5'); 1119 | GOOGLE_CHECK_LE(size, 6); 1120 | 1121 | // Now replace the '.' in the input with it. 1122 | string result; 1123 | result.reserve(strlen(input) + size - 3); 1124 | result.append(input, radix_pos); 1125 | result.append(temp + 1, size - 2); 1126 | result.append(radix_pos + 1); 1127 | return result; 1128 | } 1129 | 1130 | double NoLocaleStrtod(const char* text, char** original_endptr) { 1131 | // We cannot simply set the locale to "C" temporarily with setlocale() 1132 | // as this is not thread-safe. Instead, we try to parse in the current 1133 | // locale first. If parsing stops at a '.' character, then this is a 1134 | // pretty good hint that we're actually in some other locale in which 1135 | // '.' is not the radix character. 1136 | 1137 | char* temp_endptr; 1138 | double result = strtod(text, &temp_endptr); 1139 | if (original_endptr != NULL) *original_endptr = temp_endptr; 1140 | if (*temp_endptr != '.') return result; 1141 | 1142 | // Parsing halted on a '.'. Perhaps we're in a different locale? Let's 1143 | // try to replace the '.' with a locale-specific radix character and 1144 | // try again. 1145 | string localized = LocalizeRadix(text, temp_endptr); 1146 | const char* localized_cstr = localized.c_str(); 1147 | char* localized_endptr; 1148 | result = strtod(localized_cstr, &localized_endptr); 1149 | if ((localized_endptr - localized_cstr) > 1150 | (temp_endptr - text)) { 1151 | // This attempt got further, so replacing the decimal must have helped. 1152 | // Update original_endptr to point at the right location. 1153 | if (original_endptr != NULL) { 1154 | // size_diff is non-zero if the localized radix has multiple bytes. 1155 | int size_diff = localized.size() - strlen(text); 1156 | // const_cast is necessary to match the strtod() interface. 1157 | *original_endptr = const_cast( 1158 | text + (localized_endptr - localized_cstr - size_diff)); 1159 | } 1160 | } 1161 | 1162 | return result; 1163 | } 1164 | 1165 | } // namespace protobuf 1166 | } // namespace google 1167 | --------------------------------------------------------------------------------