├── .gitignore ├── WinHttpClient.sln └── WinHttpClient ├── RegExp.cpp ├── RegExp.h ├── StringProcess.cpp ├── StringProcess.h ├── WinHttpClient.cpp ├── WinHttpClient.h ├── WinHttpClient.vcxproj └── atlrx.h /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | *.vcxproj.filters 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | bld/ 24 | [Bb]in/ 25 | [Oo]bj/ 26 | [Ll]og/ 27 | 28 | # Visual Studio 2015 cache/options directory 29 | .vs/ 30 | # Uncomment if you have tasks that create the project's static files in wwwroot 31 | #wwwroot/ 32 | 33 | # MSTest test Results 34 | [Tt]est[Rr]esult*/ 35 | [Bb]uild[Ll]og.* 36 | 37 | # NUNIT 38 | *.VisualState.xml 39 | TestResult.xml 40 | 41 | # Build Results of an ATL Project 42 | [Dd]ebugPS/ 43 | [Rr]eleasePS/ 44 | dlldata.c 45 | 46 | # DNX 47 | project.lock.json 48 | project.fragment.lock.json 49 | artifacts/ 50 | Properties/launchSettings.json 51 | 52 | *_i.c 53 | *_p.c 54 | *_i.h 55 | *.ilk 56 | *.meta 57 | *.obj 58 | *.pch 59 | *.pdb 60 | *.pgc 61 | *.pgd 62 | *.rsp 63 | *.sbr 64 | *.tlb 65 | *.tli 66 | *.tlh 67 | *.tmp 68 | *.tmp_proj 69 | *.log 70 | *.vspscc 71 | *.vssscc 72 | .builds 73 | *.pidb 74 | *.svclog 75 | *.scc 76 | 77 | # Chutzpah Test files 78 | _Chutzpah* 79 | 80 | # Visual C++ cache files 81 | ipch/ 82 | *.aps 83 | *.ncb 84 | *.opendb 85 | *.opensdf 86 | *.sdf 87 | *.cachefile 88 | *.VC.db 89 | *.VC.VC.opendb 90 | 91 | # Visual Studio profiler 92 | *.psess 93 | *.vsp 94 | *.vspx 95 | *.sap 96 | 97 | # TFS 2012 Local Workspace 98 | $tf/ 99 | 100 | # Guidance Automation Toolkit 101 | *.gpState 102 | 103 | # ReSharper is a .NET coding add-in 104 | _ReSharper*/ 105 | *.[Rr]e[Ss]harper 106 | *.DotSettings.user 107 | 108 | # JustCode is a .NET coding add-in 109 | .JustCode 110 | 111 | # TeamCity is a build add-in 112 | _TeamCity* 113 | 114 | # DotCover is a Code Coverage Tool 115 | *.dotCover 116 | 117 | # Visual Studio code coverage results 118 | *.coverage 119 | *.coveragexml 120 | 121 | # NCrunch 122 | _NCrunch_* 123 | .*crunch*.local.xml 124 | nCrunchTemp_* 125 | 126 | # MightyMoose 127 | *.mm.* 128 | AutoTest.Net/ 129 | 130 | # Web workbench (sass) 131 | .sass-cache/ 132 | 133 | # Installshield output folder 134 | [Ee]xpress/ 135 | 136 | # DocProject is a documentation generator add-in 137 | DocProject/buildhelp/ 138 | DocProject/Help/*.HxT 139 | DocProject/Help/*.HxC 140 | DocProject/Help/*.hhc 141 | DocProject/Help/*.hhk 142 | DocProject/Help/*.hhp 143 | DocProject/Help/Html2 144 | DocProject/Help/html 145 | 146 | # Click-Once directory 147 | publish/ 148 | 149 | # Publish Web Output 150 | *.[Pp]ublish.xml 151 | *.azurePubxml 152 | # TODO: Comment the next line if you want to checkin your web deploy settings 153 | # but database connection strings (with potential passwords) will be unencrypted 154 | *.pubxml 155 | *.publishproj 156 | 157 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 158 | # checkin your Azure Web App publish settings, but sensitive information contained 159 | # in these scripts will be unencrypted 160 | PublishScripts/ 161 | 162 | # NuGet Packages 163 | *.nupkg 164 | # The packages folder can be ignored because of Package Restore 165 | **/packages/* 166 | # except build/, which is used as an MSBuild target. 167 | !**/packages/build/ 168 | # Uncomment if necessary however generally it will be regenerated when needed 169 | #!**/packages/repositories.config 170 | # NuGet v3's project.json files produces more ignoreable files 171 | *.nuget.props 172 | *.nuget.targets 173 | 174 | # Microsoft Azure Build Output 175 | csx/ 176 | *.build.csdef 177 | 178 | # Microsoft Azure Emulator 179 | ecf/ 180 | rcf/ 181 | 182 | # Windows Store app package directories and files 183 | AppPackages/ 184 | BundleArtifacts/ 185 | Package.StoreAssociation.xml 186 | _pkginfo.txt 187 | 188 | # Visual Studio cache files 189 | # files ending in .cache can be ignored 190 | *.[Cc]ache 191 | # but keep track of directories ending in .cache 192 | !*.[Cc]ache/ 193 | 194 | # Others 195 | ClientBin/ 196 | ~$* 197 | *~ 198 | *.dbmdl 199 | *.dbproj.schemaview 200 | *.jfm 201 | *.pfx 202 | *.publishsettings 203 | node_modules/ 204 | orleans.codegen.cs 205 | 206 | # Since there are multiple workflows, uncomment next line to ignore bower_components 207 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 208 | #bower_components/ 209 | 210 | # RIA/Silverlight projects 211 | Generated_Code/ 212 | 213 | # Backup & report files from converting an old project file 214 | # to a newer Visual Studio version. Backup files are not needed, 215 | # because we have git ;-) 216 | _UpgradeReport_Files/ 217 | Backup*/ 218 | UpgradeLog*.XML 219 | UpgradeLog*.htm 220 | 221 | # SQL Server files 222 | *.mdf 223 | *.ldf 224 | 225 | # Business Intelligence projects 226 | *.rdl.data 227 | *.bim.layout 228 | *.bim_*.settings 229 | 230 | # Microsoft Fakes 231 | FakesAssemblies/ 232 | 233 | # GhostDoc plugin setting file 234 | *.GhostDoc.xml 235 | 236 | # Node.js Tools for Visual Studio 237 | .ntvs_analysis.dat 238 | 239 | # Visual Studio 6 build log 240 | *.plg 241 | 242 | # Visual Studio 6 workspace options file 243 | *.opt 244 | 245 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 246 | *.vbw 247 | 248 | # Visual Studio LightSwitch build output 249 | **/*.HTMLClient/GeneratedArtifacts 250 | **/*.DesktopClient/GeneratedArtifacts 251 | **/*.DesktopClient/ModelManifest.xml 252 | **/*.Server/GeneratedArtifacts 253 | **/*.Server/ModelManifest.xml 254 | _Pvt_Extensions 255 | 256 | # Paket dependency manager 257 | .paket/paket.exe 258 | paket-files/ 259 | 260 | # FAKE - F# Make 261 | .fake/ 262 | 263 | # JetBrains Rider 264 | .idea/ 265 | *.sln.iml 266 | 267 | # CodeRush 268 | .cr/ 269 | 270 | # Python Tools for Visual Studio (PTVS) 271 | __pycache__/ 272 | *.pyc 273 | 274 | # Cake - Uncomment if you are using it 275 | # tools/ -------------------------------------------------------------------------------- /WinHttpClient.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WinHttpClient", "WinHttpClient\WinHttpClient.vcxproj", "{13A7A9A1-B911-4470-99B6-2567A024A1F1}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Debug|x64.ActiveCfg = Debug|x64 17 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Debug|x64.Build.0 = Debug|x64 18 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Debug|x86.ActiveCfg = Debug|Win32 19 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Debug|x86.Build.0 = Debug|Win32 20 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Release|x64.ActiveCfg = Release|x64 21 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Release|x64.Build.0 = Release|x64 22 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Release|x86.ActiveCfg = Release|Win32 23 | {13A7A9A1-B911-4470-99B6-2567A024A1F1}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /WinHttpClient/RegExp.cpp: -------------------------------------------------------------------------------- 1 | #include "RegExp.h" 2 | 3 | inline bool ParseRegExp(const wstring ®Exp, bool caseSensitive, int groupCount, const wstring &source, vector &result, bool allowDuplicate) 4 | { 5 | result.clear(); 6 | if (regExp.size() <= 0) 7 | { 8 | return false; 9 | } 10 | if (groupCount <= 0) 11 | { 12 | return false; 13 | } 14 | if (source.size() <= 0) 15 | { 16 | return false; 17 | } 18 | CAtlRegExp<> re; 19 | REParseError error = re.Parse(regExp.c_str(), caseSensitive); 20 | if (error != REPARSE_ERROR_OK) 21 | { 22 | return false; 23 | } 24 | wchar_t *pSource = new wchar_t[source.size() + 1]; 25 | wchar_t *pSourceEnd = pSource + source.size(); 26 | if (pSource == NULL) 27 | { 28 | return false; 29 | } 30 | wcscpy_s(pSource, source.size() + 1, source.c_str()); 31 | BOOL bSucceed = TRUE; 32 | CAtlREMatchContext<> mc; 33 | const wchar_t *pFrom = pSource; 34 | const wchar_t *pTo = NULL; 35 | while (bSucceed) 36 | { 37 | bSucceed = re.Match(pFrom, &mc, &pTo); 38 | if (bSucceed) 39 | { 40 | const wchar_t *pStart = NULL; 41 | const wchar_t *pEnd = NULL; 42 | vector tempMatch; 43 | for (int i = 0; i < groupCount; i++) 44 | { 45 | mc.GetMatch(i, &pStart, &pEnd); 46 | if (pStart != NULL && pEnd != NULL) 47 | { 48 | wstring match(pStart, pEnd - pStart); 49 | tempMatch.push_back(match); 50 | } 51 | else 52 | { 53 | break; 54 | } 55 | } 56 | bool bAdd = true; 57 | if (!allowDuplicate) 58 | { 59 | // Check whether this match already exists in the vector. 60 | for (vector::iterator it = result.begin(); it != result.end();) 61 | { 62 | bool bEqual = true; 63 | for (vector::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++, it++) 64 | { 65 | bool bGroupEqual = true; 66 | if (caseSensitive) 67 | { 68 | bGroupEqual = (wcscmp(it->c_str(), tempMatchIt->c_str()) == 0); 69 | } 70 | else 71 | { 72 | bGroupEqual = (_wcsicmp(it->c_str(), tempMatchIt->c_str()) == 0); 73 | } 74 | if (!bGroupEqual) 75 | { 76 | bEqual = false; 77 | } 78 | } 79 | if (bEqual) 80 | { 81 | bAdd = false; 82 | break; 83 | } 84 | } 85 | } 86 | if (bAdd) 87 | { 88 | for (vector::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++) 89 | { 90 | result.push_back(*tempMatchIt); 91 | } 92 | } 93 | if (pTo < pSourceEnd) 94 | { 95 | pFrom = pTo; 96 | } 97 | else 98 | { 99 | break; 100 | } 101 | } 102 | else 103 | { 104 | break; 105 | } 106 | } 107 | 108 | delete[] pSource; 109 | 110 | return true; 111 | } -------------------------------------------------------------------------------- /WinHttpClient/RegExp.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2008-2009 Cheng Shi. All rights reserved. 3 | * Email: shicheng107@hotmail.com 4 | */ 5 | 6 | #ifndef REGEXP_H 7 | #define REGEXP_H 8 | 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | 14 | #pragma warning(push) 15 | #pragma warning(disable: 6385 6011 4127) 16 | #include "atlrx.h" 17 | #pragma warning(pop) 18 | 19 | /* 20 | * Parameters 21 | * [in] regExp: Value of type string which is the input regular expression. 22 | * [in] caseSensitive: Value of type bool which indicate whether the parse is case sensitive. 23 | * [in] groupCount: Value of type int which is the group count of the regular expression. 24 | * [in] source: Value of type string reference which is the source to parse. 25 | * [out] result: Value of type vecotr of strings which is the output of the parse. 26 | * [in] allowDuplicate: Value of type bool which indicates whether duplicate items are added to the output result. 27 | * 28 | * Return Value 29 | * Returns true if the function succeeds, or false otherwise. 30 | * 31 | * Remarks 32 | * The output result is devided into groups. User should get the groups according to the group count. For example: 33 | * 1. RegExp = L"{ab}", source = L"abcabe", then result = L"ab", L"ab". 34 | * 2. RegExp = L"{ab}{cd}", source = L"abcdeabecd", then result = L"ab", L"cd", L"ab", L"cd". 35 | */ 36 | inline bool ParseRegExp(const wstring ®Exp, bool caseSensitive, int groupCount, const wstring &source, vector &result, bool allowDuplicate = false); 37 | 38 | #endif // REGEXP_H 39 | -------------------------------------------------------------------------------- /WinHttpClient/StringProcess.cpp: -------------------------------------------------------------------------------- 1 | #include "StringProcess.h" 2 | 3 | inline wstring Trim(const wstring &source, const wstring &targets) 4 | { 5 | wstring::size_type start = 0; 6 | wstring::size_type end = 0; 7 | for (start = 0; start < source.size(); start++) 8 | { 9 | bool bIsTarget = false; 10 | for (wstring::size_type i = 0; i < targets.size(); i++) 11 | { 12 | if (source[start] == targets[i]) 13 | { 14 | bIsTarget = true; 15 | break; 16 | } 17 | } 18 | if (!bIsTarget) 19 | { 20 | break; 21 | } 22 | } 23 | for (end = source.size() - 1; (int)end >= 0; end--) 24 | { 25 | bool bIsTarget = false; 26 | for (wstring::size_type i = 0; i < targets.size(); i++) 27 | { 28 | if (source[end] == targets[i]) 29 | { 30 | bIsTarget = true; 31 | break; 32 | } 33 | } 34 | if (!bIsTarget) 35 | { 36 | break; 37 | } 38 | } 39 | wstring result = L""; 40 | if (end >= start && start < source.size() && end >= 0) 41 | { 42 | result = source.substr(start, end - start + 1); 43 | } 44 | 45 | return result; 46 | } 47 | 48 | inline bool PrepareString(wchar_t *dest, size_t *size, const wstring &src) 49 | { 50 | if (dest == NULL) 51 | { 52 | if (size != NULL) 53 | { 54 | *size = src.size(); 55 | } 56 | return false; 57 | } 58 | else 59 | { 60 | if (size != NULL) 61 | { 62 | wcsncpy_s(dest, *size, src.c_str(), _TRUNCATE); 63 | if (*size <= src.size()) 64 | { 65 | ::SetLastError(ERROR_INSUFFICIENT_BUFFER); 66 | return false; 67 | } 68 | } 69 | } 70 | return true; 71 | } 72 | 73 | inline wstring ReplaceString(const wstring &srcStr, const wstring &oldStr, const wstring &newStr) 74 | { 75 | if (srcStr.size() <= 0 || oldStr.size() <= 0) 76 | { 77 | return srcStr; 78 | } 79 | wstring strReturn = srcStr; 80 | wstring::size_type offset = 0; 81 | wstring::size_type start = strReturn.find(oldStr); 82 | while (start != wstring::npos) 83 | { 84 | offset = start + newStr.size(); 85 | strReturn.replace(start, oldStr.size(), newStr); 86 | start = strReturn.find(oldStr, offset); 87 | } 88 | 89 | return strReturn; 90 | } 91 | 92 | inline int StringToInteger(const wstring &number) 93 | { 94 | if (number.size() <= 0) 95 | { 96 | return 0; 97 | } 98 | wstring num = ReplaceString(number, L",", L""); 99 | num = ReplaceString(num, L" ", L""); 100 | 101 | return _wtoi(num.c_str()); 102 | } 103 | 104 | inline wstring LowerString(const wstring &text) 105 | { 106 | if (text.size() <= 0) 107 | { 108 | return L""; 109 | } 110 | unsigned int iLength = text.size() + 1; 111 | wchar_t *pTemp = new wchar_t[iLength]; 112 | if (pTemp == NULL) 113 | { 114 | return L""; 115 | } 116 | wcscpy_s(pTemp, iLength, text.c_str()); 117 | _wcslwr_s(pTemp, iLength); 118 | wstring retStr = pTemp; 119 | delete[] pTemp; 120 | 121 | return retStr; 122 | } 123 | 124 | inline wstring UpperString(const wstring &text) 125 | { 126 | if (text.size() <= 0) 127 | { 128 | return L""; 129 | } 130 | unsigned int iLength = text.size() + 1; 131 | wchar_t *pTemp = new wchar_t[iLength]; 132 | if (pTemp == NULL) 133 | { 134 | return L""; 135 | } 136 | wcscpy_s(pTemp, iLength, text.c_str()); 137 | _wcsupr_s(pTemp, iLength); 138 | wstring retStr = pTemp; 139 | delete[] pTemp; 140 | 141 | return retStr; 142 | } 143 | 144 | inline wstring GetAnchorText(const wstring &anchor) 145 | { 146 | wstring regExp = L"[ \t\r\n]*{.*?}[ \t\r\n]*"; 147 | vector result; 148 | if (ParseRegExp(regExp, false, 1, anchor, result) && result.size() == 1) 149 | { 150 | wstring text = result[0]; 151 | return text; 152 | } 153 | 154 | return L""; 155 | } 156 | 157 | inline wstring GetAnchorLink(const wstring &anchor) 158 | { 159 | wstring regExp = L".*?"; 160 | vector result; 161 | if (ParseRegExp(regExp, false, 1, anchor, result) && result.size() == 1) 162 | { 163 | wstring link = result[0]; 164 | return link; 165 | } 166 | 167 | return L""; 168 | } 169 | 170 | inline bool SeparateString(const wstring &content, const wstring &delimiter, vector &result) 171 | { 172 | if (content.size() <= 0 || delimiter.size() <= 0) 173 | { 174 | return false; 175 | } 176 | 177 | result.clear(); 178 | wstring::size_type start = 0; 179 | wstring::size_type index = 0; 180 | index = content.find(delimiter, start); 181 | while (index != wstring::npos) 182 | { 183 | wstring::size_type size = index - start; 184 | if (size > 0) 185 | { 186 | wstring temp = content.substr(start, size); 187 | if (temp.size() > 0) 188 | { 189 | result.push_back(temp); 190 | } 191 | } 192 | start += size + delimiter.size(); 193 | index = content.find(delimiter, start); 194 | } 195 | if (content.find(delimiter) != wstring::npos) 196 | { 197 | wstring last = content.substr(start); 198 | if (last.size() > 0) 199 | { 200 | result.push_back(last); 201 | } 202 | } 203 | else 204 | { 205 | false; 206 | } 207 | 208 | return true; 209 | } 210 | 211 | inline wstring URLEncoding(const wstring &keyword, bool convertToUTF8) 212 | { 213 | int iLength = 0; 214 | char *szKeyword = NULL; 215 | 216 | if (convertToUTF8) 217 | { 218 | iLength = ::WideCharToMultiByte(CP_UTF8, 219 | 0, 220 | keyword.c_str(), 221 | keyword.length(), 222 | NULL, 223 | 0, 224 | NULL, 225 | NULL); 226 | if (iLength <= 0) 227 | { 228 | return L""; 229 | } 230 | 231 | szKeyword = new char[iLength]; 232 | if (szKeyword == NULL) 233 | { 234 | return L""; 235 | } 236 | iLength = ::WideCharToMultiByte(CP_UTF8, 237 | 0, 238 | keyword.c_str(), 239 | keyword.length(), 240 | szKeyword, 241 | iLength, 242 | NULL, 243 | NULL); 244 | } 245 | else 246 | { 247 | string strKeyword = (char *)(_bstr_t)keyword.c_str(); 248 | iLength = (int)strKeyword.length(); 249 | szKeyword = new char[strKeyword.length() + 1]; 250 | strcpy_s(szKeyword, strKeyword.length() + 1, strKeyword.c_str()); 251 | } 252 | 253 | wstring encodedKeyword = L""; 254 | string strEncodedKeyword = ""; 255 | for (int i = 0; i < iLength; i++) 256 | { 257 | unsigned char c = (unsigned char)szKeyword[i]; 258 | char temp[MAX_PATH] = ""; 259 | sprintf_s(temp, MAX_PATH, "%%%2X", c); 260 | if (temp[1] == ' ') 261 | { 262 | temp[1] = '0'; 263 | } 264 | strEncodedKeyword += temp; 265 | } 266 | if (szKeyword != NULL) 267 | { 268 | delete[] szKeyword; 269 | } 270 | encodedKeyword = (wchar_t *)(_bstr_t)strEncodedKeyword.c_str(); 271 | encodedKeyword = ReplaceString(encodedKeyword, L" ", L"+"); 272 | 273 | return encodedKeyword; 274 | } 275 | 276 | inline unsigned int GetSeparateKeywordMatchGrade(const wstring &source, const wstring &keyword) 277 | { 278 | if (source.length() <= 0 || keyword.length() <= 0) 279 | { 280 | return 0; 281 | } 282 | 283 | wstring lowerSource = LowerString(source); 284 | wstring lowerKeyword = LowerString(keyword); 285 | 286 | unsigned int grade = 0; 287 | if (lowerKeyword.length() <= 3) 288 | { 289 | if (lowerSource.find(lowerKeyword) != wstring::npos) 290 | { 291 | grade = 100; 292 | } 293 | else 294 | { 295 | grade = 0; 296 | } 297 | } 298 | else 299 | { 300 | unsigned int matchLength = 0; 301 | unsigned int index = 0; 302 | while (index < lowerKeyword.length()) 303 | { 304 | unsigned int compareLength = lowerKeyword.length() - index; 305 | while (compareLength > 0 && index < lowerKeyword.length()) 306 | { 307 | wstring subKeyword = lowerKeyword.substr(index, compareLength); 308 | if (lowerSource.find(subKeyword) != wstring::npos) 309 | { 310 | matchLength += compareLength; 311 | index += compareLength; 312 | } 313 | else 314 | { 315 | compareLength--; 316 | } 317 | } 318 | index++; 319 | } 320 | grade = matchLength * 100 / lowerKeyword.length(); 321 | } 322 | 323 | return grade; 324 | } 325 | 326 | inline unsigned int GetKeywordMatchGrade(const wstring &source, const wstring & keyword) 327 | { 328 | if (source.length() <= 0 || keyword.length() <= 0) 329 | { 330 | return 0; 331 | } 332 | 333 | unsigned int grade = 0; 334 | wstring src = source; 335 | while (src.find(L"\t") != wstring::npos) 336 | { 337 | src = ReplaceString(src, L"\t", L" "); 338 | } 339 | while (src.find(L" ") != wstring::npos) 340 | { 341 | src = ReplaceString(src, L" ", L" "); 342 | } 343 | vector results; 344 | if (SeparateString(keyword, L" ", results) && results.size() > 0) 345 | { 346 | unsigned int keywordTotalLength = 0; 347 | for (vector::size_type index = 0; index < results.size(); index++) 348 | { 349 | keywordTotalLength += results[index].length(); 350 | } 351 | for (vector::size_type index = 0; index < results.size(); index++) 352 | { 353 | grade += GetSeparateKeywordMatchGrade(src, results[index]) * results[index].length() / keywordTotalLength; 354 | } 355 | } 356 | else 357 | { 358 | grade = GetSeparateKeywordMatchGrade(src, keyword); 359 | } 360 | 361 | return grade; 362 | } 363 | 364 | inline wstring GetDateString(const COleDateTime &time, const wstring &separator, bool align) 365 | { 366 | wstring date = L""; 367 | wchar_t szTemp[MAX_PATH] = L""; 368 | 369 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetYear()); 370 | date += szTemp; 371 | date += separator; 372 | 373 | memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH); 374 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetMonth()); 375 | if (time.GetMonth() < 10 && align) 376 | { 377 | date += L"0"; 378 | } 379 | date += szTemp; 380 | date += separator; 381 | 382 | memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH); 383 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetDay()); 384 | if (time.GetDay() < 10 && align) 385 | { 386 | date += L"0"; 387 | } 388 | date += szTemp; 389 | 390 | return date; 391 | } 392 | 393 | inline wstring GetDateString(int dayOffset, const wstring &separator, bool align) 394 | { 395 | COleDateTime time = COleDateTime::GetCurrentTime(); 396 | time += COleDateTimeSpan(dayOffset, 0, 0, 0); 397 | 398 | return GetDateString(time, separator, align); 399 | } 400 | 401 | inline wstring GetTimeString(const COleDateTime &time, const wstring &separator, bool align) 402 | { 403 | wstring date = L""; 404 | wchar_t szTemp[MAX_PATH] = L""; 405 | 406 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetHour()); 407 | date += szTemp; 408 | date += separator; 409 | 410 | memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH); 411 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetMinute()); 412 | if (time.GetMinute() < 10 && align) 413 | { 414 | date += L"0"; 415 | } 416 | date += szTemp; 417 | date += separator; 418 | 419 | memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH); 420 | swprintf_s(szTemp, MAX_PATH, L"%d", time.GetSecond()); 421 | if (time.GetSecond() < 10 && align) 422 | { 423 | date += L"0"; 424 | } 425 | date += szTemp; 426 | 427 | return date; 428 | } 429 | 430 | inline wstring MD5(const wstring &text) 431 | { 432 | if (text.size() <= 0) 433 | { 434 | return L""; 435 | } 436 | string asciiText = (char *)(_bstr_t)text.c_str(); 437 | wstring encrypted = L""; 438 | HCRYPTPROV hCryptProv = NULL; 439 | if (::CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET)) 440 | { 441 | HCRYPTHASH hHash = NULL; 442 | if (::CryptCreateHash(hCryptProv, CALG_MD5, 0, 0, &hHash)) 443 | { 444 | if (::CryptHashData(hHash, (BYTE *)asciiText.c_str(), asciiText.size(), 0)) 445 | { 446 | BYTE result[16]; 447 | DWORD dwSize = 16; 448 | wchar_t temp[3] = L""; 449 | if (::CryptGetHashParam(hHash, HP_HASHVAL, result, &dwSize, 0)) 450 | { 451 | for (unsigned int i = 0; i < 16; i++) 452 | { 453 | memset(temp, 0, 6); 454 | swprintf(temp, 3, L"%02x", result[i]); 455 | encrypted += temp; 456 | } 457 | } 458 | } 459 | ::CryptDestroyHash(hHash); 460 | ::CryptReleaseContext(hCryptProv, 0); 461 | } 462 | } 463 | 464 | return encrypted; 465 | } 466 | 467 | inline wstring FilterFileName(const wstring &name) 468 | { 469 | if (name.size() <= 0) 470 | { 471 | return L""; 472 | } 473 | 474 | wstring filteredName = name; 475 | filteredName = ReplaceString(filteredName, L"/", L"_"); 476 | filteredName = ReplaceString(filteredName, L"\\", L"_"); 477 | filteredName = ReplaceString(filteredName, L":", L"_"); 478 | filteredName = ReplaceString(filteredName, L"*", L"_"); 479 | filteredName = ReplaceString(filteredName, L"?", L"_"); 480 | filteredName = ReplaceString(filteredName, L"\"", L"_"); 481 | filteredName = ReplaceString(filteredName, L"<", L"_"); 482 | filteredName = ReplaceString(filteredName, L">", L"_"); 483 | filteredName = ReplaceString(filteredName, L"|", L"_"); 484 | 485 | return filteredName; 486 | } 487 | 488 | inline wstring GetMagic(unsigned int length) 489 | { 490 | srand(::GetTickCount()); 491 | if (length <= 0) 492 | { 493 | return L""; 494 | } 495 | 496 | wstring margic = L""; 497 | for (unsigned int i = 0; i < length; i++) 498 | { 499 | wchar_t szMargic[50] = L""; 500 | swprintf_s(szMargic, 50, L"%c", rand() % 26 + L'a'); 501 | margic += szMargic; 502 | } 503 | 504 | return margic; 505 | } 506 | 507 | inline wstring GetHost(const wstring &url) 508 | { 509 | if (url.size() <= 0) 510 | { 511 | return L""; 512 | } 513 | 514 | wstring urlWidthoutHttp = ReplaceString(LowerString(url), L"http://", L""); 515 | 516 | unsigned int index = urlWidthoutHttp.find(L"/"); 517 | if (index == wstring::npos) 518 | { 519 | index = urlWidthoutHttp.find(L"\\"); 520 | } 521 | if (index == wstring::npos) 522 | { 523 | return urlWidthoutHttp; 524 | } 525 | 526 | return urlWidthoutHttp.substr(0, index); 527 | } 528 | 529 | inline wstring GetValidFileName(const wstring &fileName) 530 | { 531 | if (fileName.size() == 0) 532 | { 533 | return L""; 534 | } 535 | wstring tempFileName = fileName; 536 | tempFileName = ReplaceString(tempFileName, L"\\", L"_"); 537 | tempFileName = ReplaceString(tempFileName, L"/", L"_"); 538 | tempFileName = ReplaceString(tempFileName, L":", L"_"); 539 | tempFileName = ReplaceString(tempFileName, L"*", L"_"); 540 | tempFileName = ReplaceString(tempFileName, L"?", L"_"); 541 | tempFileName = ReplaceString(tempFileName, L"\"", L"_"); 542 | tempFileName = ReplaceString(tempFileName, L"<", L"_"); 543 | tempFileName = ReplaceString(tempFileName, L">", L"_"); 544 | tempFileName = ReplaceString(tempFileName, L"|", L"_"); 545 | tempFileName = ReplaceString(tempFileName, L"\r", L"_"); 546 | tempFileName = ReplaceString(tempFileName, L"\n", L"_"); 547 | tempFileName = ReplaceString(tempFileName, L"%", L"_"); 548 | 549 | return tempFileName; 550 | } -------------------------------------------------------------------------------- /WinHttpClient/StringProcess.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2008-2009 Cheng Shi. All rights reserved. 3 | * Email: shicheng107@hotmail.com 4 | */ 5 | 6 | #ifndef STRINGPROCESS_H 7 | #define STRINGPROCESS_H 8 | 9 | #include "RegExp.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #pragma warning(push) 15 | #pragma warning(disable: 4127) 16 | #include 17 | #pragma warning(pop) 18 | using namespace std; 19 | 20 | inline wstring Trim(const wstring &source, const wstring &targets); 21 | 22 | inline bool PrepareString(wchar_t *dest, size_t *size, const wstring &src); 23 | 24 | inline wstring ReplaceString(const wstring &srcStr, const wstring &oldStr, const wstring &newStr); 25 | 26 | inline int StringToInteger(const wstring &number); 27 | 28 | inline wstring LowerString(const wstring &text); 29 | 30 | inline wstring UpperString(const wstring &text); 31 | 32 | inline wstring GetAnchorText(const wstring &anchor); 33 | 34 | inline wstring GetAnchorLink(const wstring &anchor); 35 | 36 | inline bool SeparateString(const wstring &content, const wstring &delimiter, vector &result); 37 | 38 | inline wstring URLEncoding(const wstring &keyword, bool convertToUTF8 = true); 39 | 40 | inline unsigned int GetSeparateKeywordMatchGrade(const wstring &source, const wstring &keyword); 41 | 42 | inline unsigned int GetKeywordMatchGrade(const wstring &source, const wstring & keyword); 43 | 44 | inline wstring GetDateString(const COleDateTime &time, const wstring &separator = L"-", bool align = true); 45 | 46 | inline wstring GetDateString(int dayOffset, const wstring &separator = L"-", bool align = true); 47 | 48 | inline wstring GetTimeString(const COleDateTime &time, const wstring &separator = L":", bool align = true); 49 | 50 | inline wstring MD5(const wstring &text); 51 | 52 | inline wstring FilterFileName(const wstring &name); 53 | 54 | inline wstring GetMagic(unsigned int length); 55 | 56 | inline wstring GetHost(const wstring &url); 57 | 58 | inline wstring GetValidFileName(const wstring &fileName); 59 | 60 | #endif // STRINGPROCESS_H 61 | -------------------------------------------------------------------------------- /WinHttpClient/WinHttpClient.cpp: -------------------------------------------------------------------------------- 1 | #include "WinHttpClient.h" 2 | 3 | WinHttpClient::WinHttpClient(const wstring &url, PROGRESSPROC progressProc) 4 | : m_requestURL(url), 5 | m_sessionHandle(NULL), 6 | m_requireValidSsl(false), 7 | m_responseHeader(L""), 8 | m_responseContent(L""), 9 | m_responseCharset(L""), 10 | m_requestHost(L""), 11 | m_pResponse(NULL), 12 | m_responseByteCountReceived(0), 13 | m_pfProcessProc(progressProc), 14 | m_responseByteCount(0), 15 | m_responseCookies(L""), 16 | m_additionalRequestCookies(L""), 17 | m_pDataToSend(NULL), 18 | m_dataToSendSize(0), 19 | m_proxy(L""), 20 | m_dwLastError(0), 21 | m_statusCode(L""), 22 | m_userAgent(SZ_AGENT), 23 | m_bForceCharset(false), 24 | m_proxyUsername(L""), 25 | m_proxyPassword(L""), 26 | m_location(L""), 27 | m_resolveTimeout(0), 28 | m_connectTimeout(60000), 29 | m_sendTimeout(30000), 30 | m_receiveTimeout(30000) 31 | { 32 | } 33 | 34 | WinHttpClient::~WinHttpClient(void) 35 | { 36 | if (m_pResponse != NULL) 37 | { 38 | delete[] m_pResponse; 39 | } 40 | if (m_pDataToSend != NULL) 41 | { 42 | delete[] m_pDataToSend; 43 | } 44 | 45 | if (m_sessionHandle != NULL) 46 | { 47 | ::WinHttpCloseHandle(m_sessionHandle); 48 | } 49 | } 50 | 51 | bool WinHttpClient::SendHttpRequest(const wstring &httpVerb, bool disableAutoRedirect) 52 | { 53 | if (m_requestURL.size() <= 0) 54 | { 55 | m_dwLastError = ERROR_PATH_NOT_FOUND; 56 | return false; 57 | } 58 | // Make verb uppercase. 59 | wstring verb = httpVerb; 60 | if (_wcsicmp(verb.c_str(), L"GET") == 0) 61 | { 62 | verb = L"GET"; 63 | } 64 | else if (_wcsicmp(verb.c_str(), L"POST") == 0) 65 | { 66 | verb = L"POST"; 67 | } 68 | else 69 | { 70 | m_dwLastError = ERROR_INVALID_PARAMETER; 71 | return false; 72 | } 73 | bool bRetVal = true; 74 | 75 | if (m_sessionHandle == NULL) 76 | { 77 | m_sessionHandle = ::WinHttpOpen(m_userAgent.c_str(), 78 | WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, 79 | WINHTTP_NO_PROXY_NAME, 80 | WINHTTP_NO_PROXY_BYPASS, 81 | 0); 82 | if (m_sessionHandle == NULL) 83 | { 84 | m_dwLastError = ::GetLastError(); 85 | return false; 86 | } 87 | } 88 | 89 | ::WinHttpSetTimeouts(m_sessionHandle, 90 | m_resolveTimeout, 91 | m_connectTimeout, 92 | m_sendTimeout, 93 | m_receiveTimeout); 94 | 95 | wchar_t szHostName[MAX_PATH] = L""; 96 | wchar_t szURLPath[MAX_PATH * 5] = L""; 97 | URL_COMPONENTS urlComp; 98 | memset(&urlComp, 0, sizeof(urlComp)); 99 | urlComp.dwStructSize = sizeof(urlComp); 100 | urlComp.lpszHostName = szHostName; 101 | urlComp.dwHostNameLength = MAX_PATH; 102 | urlComp.lpszUrlPath = szURLPath; 103 | urlComp.dwUrlPathLength = MAX_PATH * 5; 104 | urlComp.dwSchemeLength = 1; // None zero 105 | 106 | if (::WinHttpCrackUrl(m_requestURL.c_str(), m_requestURL.size(), 0, &urlComp)) 107 | { 108 | m_requestHost = szHostName; 109 | HINTERNET hConnect = NULL; 110 | hConnect = ::WinHttpConnect(m_sessionHandle, szHostName, urlComp.nPort, 0); 111 | if (hConnect != NULL) 112 | { 113 | DWORD dwOpenRequestFlag = (urlComp.nScheme == INTERNET_SCHEME_HTTPS) ? WINHTTP_FLAG_SECURE : 0; 114 | HINTERNET hRequest = NULL; 115 | hRequest = ::WinHttpOpenRequest(hConnect, 116 | verb.c_str(), 117 | urlComp.lpszUrlPath, 118 | NULL, 119 | WINHTTP_NO_REFERER, 120 | WINHTTP_DEFAULT_ACCEPT_TYPES, 121 | dwOpenRequestFlag); 122 | if (hRequest != NULL) 123 | { 124 | // If HTTPS, then client is very susceptable to invalid certificates 125 | // Easiest to accept anything for now 126 | if (!m_requireValidSsl && urlComp.nScheme == INTERNET_SCHEME_HTTPS) 127 | { 128 | DWORD options = SECURITY_FLAG_IGNORE_CERT_CN_INVALID 129 | | SECURITY_FLAG_IGNORE_CERT_DATE_INVALID 130 | | SECURITY_FLAG_IGNORE_UNKNOWN_CA; 131 | ::WinHttpSetOption(hRequest, 132 | WINHTTP_OPTION_SECURITY_FLAGS, 133 | (LPVOID)&options, 134 | sizeof(DWORD)); 135 | } 136 | 137 | bool bGetReponseSucceed = false; 138 | unsigned int iRetryTimes = 0; 139 | 140 | // Retry for several times if fails. 141 | while (!bGetReponseSucceed && iRetryTimes++ < INT_RETRYTIMES) 142 | { 143 | if (m_additionalRequestHeaders.size() > 0) 144 | { 145 | if (!::WinHttpAddRequestHeaders(hRequest, m_additionalRequestHeaders.c_str(), m_additionalRequestHeaders.size(), WINHTTP_ADDREQ_FLAG_COALESCE_WITH_SEMICOLON)) 146 | { 147 | m_dwLastError = ::GetLastError(); 148 | } 149 | } 150 | if (m_additionalRequestCookies.size() > 0) 151 | { 152 | wstring cookies = L"Cookie: "; 153 | cookies += m_additionalRequestCookies; 154 | if (!::WinHttpAddRequestHeaders(hRequest, cookies.c_str(), cookies.size(), WINHTTP_ADDREQ_FLAG_COALESCE_WITH_SEMICOLON)) 155 | { 156 | m_dwLastError = ::GetLastError(); 157 | } 158 | } 159 | if (m_proxy.size() > 0) 160 | { 161 | WINHTTP_PROXY_INFO proxyInfo; 162 | memset(&proxyInfo, 0, sizeof(proxyInfo)); 163 | proxyInfo.dwAccessType = WINHTTP_ACCESS_TYPE_NAMED_PROXY; 164 | wchar_t szProxy[MAX_PATH] = L""; 165 | wcscpy_s(szProxy, MAX_PATH, m_proxy.c_str()); 166 | proxyInfo.lpszProxy = szProxy; 167 | 168 | if (!::WinHttpSetOption(hRequest, WINHTTP_OPTION_PROXY, &proxyInfo, sizeof(proxyInfo))) 169 | { 170 | m_dwLastError = ::GetLastError(); 171 | } 172 | 173 | if (m_proxyUsername.size() > 0) 174 | { 175 | if (!::WinHttpSetOption(hRequest, WINHTTP_OPTION_PROXY_USERNAME, (LPVOID)m_proxyUsername.c_str(), m_proxyUsername.size() * sizeof(wchar_t))) 176 | { 177 | m_dwLastError = ::GetLastError(); 178 | } 179 | if (m_proxyPassword.size() > 0) 180 | { 181 | if (!::WinHttpSetOption(hRequest, WINHTTP_OPTION_PROXY_PASSWORD, (LPVOID)m_proxyPassword.c_str(), m_proxyPassword.size() * sizeof(wchar_t))) 182 | { 183 | m_dwLastError = ::GetLastError(); 184 | } 185 | } 186 | } 187 | } 188 | 189 | if (disableAutoRedirect) 190 | { 191 | DWORD dwDisableFeature = WINHTTP_DISABLE_REDIRECTS; 192 | if (!::WinHttpSetOption(hRequest, WINHTTP_OPTION_DISABLE_FEATURE, &dwDisableFeature, sizeof(dwDisableFeature))) 193 | { 194 | m_dwLastError = ::GetLastError(); 195 | } 196 | } 197 | bool bSendRequestSucceed = false; 198 | if (::WinHttpSendRequest(hRequest, 199 | WINHTTP_NO_ADDITIONAL_HEADERS, 200 | 0, 201 | WINHTTP_NO_REQUEST_DATA, 202 | 0, 203 | 0, 204 | NULL)) 205 | { 206 | bSendRequestSucceed = true; 207 | } 208 | else 209 | { 210 | // Query the proxy information from IE setting and set the proxy if any. 211 | WINHTTP_CURRENT_USER_IE_PROXY_CONFIG proxyConfig; 212 | memset(&proxyConfig, 0, sizeof(proxyConfig)); 213 | if (::WinHttpGetIEProxyConfigForCurrentUser(&proxyConfig)) 214 | { 215 | if (proxyConfig.lpszAutoConfigUrl != NULL) 216 | { 217 | WINHTTP_AUTOPROXY_OPTIONS autoProxyOptions; 218 | memset(&autoProxyOptions, 0, sizeof(autoProxyOptions)); 219 | autoProxyOptions.dwFlags = WINHTTP_AUTOPROXY_AUTO_DETECT | WINHTTP_AUTOPROXY_CONFIG_URL; 220 | autoProxyOptions.dwAutoDetectFlags = WINHTTP_AUTO_DETECT_TYPE_DHCP; 221 | autoProxyOptions.lpszAutoConfigUrl = proxyConfig.lpszAutoConfigUrl; 222 | autoProxyOptions.fAutoLogonIfChallenged = TRUE; 223 | autoProxyOptions.dwReserved = 0; 224 | autoProxyOptions.lpvReserved = NULL; 225 | 226 | WINHTTP_PROXY_INFO proxyInfo; 227 | memset(&proxyInfo, 0, sizeof(proxyInfo)); 228 | 229 | if (::WinHttpGetProxyForUrl(m_sessionHandle, m_requestURL.c_str(), &autoProxyOptions, &proxyInfo)) 230 | { 231 | if (::WinHttpSetOption(hRequest, WINHTTP_OPTION_PROXY, &proxyInfo, sizeof(proxyInfo))) 232 | { 233 | if (::WinHttpSendRequest(hRequest, 234 | WINHTTP_NO_ADDITIONAL_HEADERS, 235 | 0, 236 | WINHTTP_NO_REQUEST_DATA, 237 | 0, 238 | 0, 239 | NULL)) 240 | { 241 | bSendRequestSucceed = true; 242 | } 243 | } 244 | if (proxyInfo.lpszProxy != NULL) 245 | { 246 | ::GlobalFree(proxyInfo.lpszProxy); 247 | } 248 | if (proxyInfo.lpszProxyBypass != NULL) 249 | { 250 | ::GlobalFree(proxyInfo.lpszProxyBypass); 251 | } 252 | } 253 | else 254 | { 255 | m_dwLastError = ::GetLastError(); 256 | } 257 | } 258 | else if (proxyConfig.lpszProxy != NULL) 259 | { 260 | WINHTTP_PROXY_INFO proxyInfo; 261 | memset(&proxyInfo, 0, sizeof(proxyInfo)); 262 | proxyInfo.dwAccessType = WINHTTP_ACCESS_TYPE_NAMED_PROXY; 263 | wchar_t szProxy[MAX_PATH] = L""; 264 | wcscpy_s(szProxy, MAX_PATH, proxyConfig.lpszProxy); 265 | proxyInfo.lpszProxy = szProxy; 266 | 267 | if (proxyConfig.lpszProxyBypass != NULL) 268 | { 269 | wchar_t szProxyBypass[MAX_PATH] = L""; 270 | wcscpy_s(szProxyBypass, MAX_PATH, proxyConfig.lpszProxyBypass); 271 | proxyInfo.lpszProxyBypass = szProxyBypass; 272 | } 273 | 274 | if (!::WinHttpSetOption(hRequest, WINHTTP_OPTION_PROXY, &proxyInfo, sizeof(proxyInfo))) 275 | { 276 | m_dwLastError = ::GetLastError(); 277 | } 278 | } 279 | 280 | if (proxyConfig.lpszAutoConfigUrl != NULL) 281 | { 282 | ::GlobalFree(proxyConfig.lpszAutoConfigUrl); 283 | } 284 | if (proxyConfig.lpszProxy != NULL) 285 | { 286 | ::GlobalFree(proxyConfig.lpszProxy); 287 | } 288 | if (proxyConfig.lpszProxyBypass != NULL) 289 | { 290 | ::GlobalFree(proxyConfig.lpszProxyBypass); 291 | } 292 | } 293 | else 294 | { 295 | m_dwLastError = ::GetLastError(); 296 | } 297 | } 298 | if (bSendRequestSucceed) 299 | { 300 | if (m_pDataToSend != NULL) 301 | { 302 | DWORD dwWritten = 0; 303 | if (!::WinHttpWriteData(hRequest, 304 | m_pDataToSend, 305 | m_dataToSendSize, 306 | &dwWritten)) 307 | { 308 | m_dwLastError = ::GetLastError(); 309 | } 310 | } 311 | if (::WinHttpReceiveResponse(hRequest, NULL)) 312 | { 313 | DWORD dwSize = 0; 314 | BOOL bResult = FALSE; 315 | bResult = ::WinHttpQueryHeaders(hRequest, 316 | WINHTTP_QUERY_RAW_HEADERS_CRLF, 317 | WINHTTP_HEADER_NAME_BY_INDEX, 318 | NULL, 319 | &dwSize, 320 | WINHTTP_NO_HEADER_INDEX); 321 | if (bResult || (!bResult && (::GetLastError() == ERROR_INSUFFICIENT_BUFFER))) 322 | { 323 | wchar_t *szHeader = new wchar_t[dwSize]; 324 | if (szHeader != NULL) 325 | { 326 | memset(szHeader, 0, dwSize * sizeof(wchar_t)); 327 | if (::WinHttpQueryHeaders(hRequest, 328 | WINHTTP_QUERY_RAW_HEADERS_CRLF, 329 | WINHTTP_HEADER_NAME_BY_INDEX, 330 | szHeader, 331 | &dwSize, 332 | WINHTTP_NO_HEADER_INDEX)) 333 | { 334 | m_responseHeader.assign(szHeader); 335 | vector result; 336 | wstring regExp = L""; 337 | if (!m_bForceCharset) 338 | { 339 | regExp = L"charset={[A-Za-z0-9\\-_]+}"; 340 | if (ParseRegExp(regExp, false, 1, m_responseHeader, result) && result.size() > 0) 341 | { 342 | m_responseCharset = result[0]; 343 | } 344 | } 345 | regExp = L"Content-Length: {[0-9]+}"; 346 | if (ParseRegExp(regExp, false, 1, m_responseHeader, result) && result.size() > 0) 347 | { 348 | m_responseByteCount = (unsigned int)_wtoi(result[0].c_str()); 349 | } 350 | regExp = L"Location: {[0-9]+}"; 351 | if (ParseRegExp(regExp, false, 1, m_responseHeader, result) && result.size() > 0) 352 | { 353 | m_location = result[0]; 354 | } 355 | regExp = L"Set-Cookie:\\b*{.+?}\\n"; 356 | if (ParseRegExp(regExp, false, 1, m_responseHeader, result) && result.size() > 0) 357 | { 358 | for (vector::size_type i = 0; i < result.size(); i++) 359 | { 360 | m_responseCookies += result[i]; 361 | if (i != result.size() - 1) 362 | { 363 | m_responseCookies += L"; "; 364 | } 365 | } 366 | m_responseCookies = Trim(m_responseCookies, L" "); 367 | if (m_responseCookies.size() > 0 && m_responseCookies[m_responseCookies.size() - 1] != L';') 368 | { 369 | m_responseCookies += L";"; 370 | } 371 | } 372 | } 373 | delete[] szHeader; 374 | } 375 | } 376 | 377 | dwSize = 0; 378 | bResult = ::WinHttpQueryHeaders(hRequest, 379 | WINHTTP_QUERY_STATUS_CODE, 380 | WINHTTP_HEADER_NAME_BY_INDEX, 381 | NULL, 382 | &dwSize, 383 | WINHTTP_NO_HEADER_INDEX); 384 | if (bResult || (!bResult && (::GetLastError() == ERROR_INSUFFICIENT_BUFFER))) 385 | { 386 | wchar_t *szStatusCode = new wchar_t[dwSize]; 387 | if (szStatusCode != NULL) 388 | { 389 | memset(szStatusCode, 0, dwSize * sizeof(wchar_t)); 390 | if (::WinHttpQueryHeaders(hRequest, 391 | WINHTTP_QUERY_STATUS_CODE, 392 | WINHTTP_HEADER_NAME_BY_INDEX, 393 | szStatusCode, 394 | &dwSize, 395 | WINHTTP_NO_HEADER_INDEX)) 396 | { 397 | m_statusCode = szStatusCode; 398 | } 399 | delete[] szStatusCode; 400 | } 401 | } 402 | 403 | unsigned int iMaxBufferSize = INT_BUFFERSIZE; 404 | unsigned int iCurrentBufferSize = 0; 405 | if (m_pResponse != NULL) 406 | { 407 | delete[] m_pResponse; 408 | m_pResponse = NULL; 409 | } 410 | m_pResponse = new BYTE[iMaxBufferSize]; 411 | if (m_pResponse == NULL) 412 | { 413 | bRetVal = false; 414 | break; 415 | } 416 | memset(m_pResponse, 0, iMaxBufferSize); 417 | do 418 | { 419 | dwSize = 0; 420 | if (::WinHttpQueryDataAvailable(hRequest, &dwSize)) 421 | { 422 | SetProgress(iCurrentBufferSize); 423 | BYTE *pResponse = new BYTE[dwSize + 1]; 424 | if (pResponse != NULL) 425 | { 426 | memset(pResponse, 0, (dwSize + 1) * sizeof(BYTE)); 427 | DWORD dwRead = 0; 428 | if (::WinHttpReadData(hRequest, 429 | pResponse, 430 | dwSize, 431 | &dwRead)) 432 | { 433 | if (dwRead + iCurrentBufferSize > iMaxBufferSize) 434 | { 435 | BYTE *pOldBuffer = m_pResponse; 436 | m_pResponse = new BYTE[iMaxBufferSize * 2]; 437 | if (m_pResponse == NULL) 438 | { 439 | m_pResponse = pOldBuffer; 440 | bRetVal = false; 441 | break; 442 | } 443 | iMaxBufferSize *= 2; 444 | memset(m_pResponse, 0, iMaxBufferSize); 445 | memcpy(m_pResponse, pOldBuffer, iCurrentBufferSize); 446 | delete[] pOldBuffer; 447 | } 448 | memcpy(m_pResponse + iCurrentBufferSize, pResponse, dwRead); 449 | iCurrentBufferSize += dwRead; 450 | } 451 | delete[] pResponse; 452 | } 453 | } 454 | else 455 | { 456 | m_dwLastError = ::GetLastError(); 457 | } 458 | } while (dwSize > 0); 459 | SetProgress(iCurrentBufferSize); 460 | m_responseByteCountReceived = iCurrentBufferSize; 461 | 462 | UINT codePage = CP_ACP; 463 | DWORD dwFlag = MB_PRECOMPOSED; 464 | if (_wcsnicmp(m_responseCharset.c_str(), L"utf-8", 5) == 0) 465 | { 466 | codePage = CP_UTF8; 467 | dwFlag = 0; 468 | } 469 | int iLength = ::MultiByteToWideChar(codePage, 470 | dwFlag, 471 | (LPCSTR)m_pResponse, 472 | m_responseByteCountReceived + 1, 473 | NULL, 474 | 0); 475 | if (iLength <= 0) 476 | { 477 | // Use CP_ACP if UTF-8 fail 478 | codePage = CP_ACP; 479 | dwFlag = MB_PRECOMPOSED; 480 | iLength = ::MultiByteToWideChar(codePage, 481 | dwFlag, 482 | (LPCSTR)m_pResponse, 483 | m_responseByteCountReceived + 1, 484 | NULL, 485 | 0); 486 | } 487 | if (iLength > 0) 488 | { 489 | wchar_t *wideChar = new wchar_t[iLength]; 490 | if (wideChar != NULL) 491 | { 492 | memset(wideChar, 0, iLength * sizeof(wchar_t)); 493 | iLength = ::MultiByteToWideChar(codePage, 494 | dwFlag, 495 | (LPCSTR)m_pResponse, 496 | m_responseByteCountReceived + 1, 497 | wideChar, 498 | iLength); 499 | if (iLength > 0) 500 | { 501 | m_responseContent = wideChar; 502 | } 503 | delete[] wideChar; 504 | } 505 | } 506 | bGetReponseSucceed = true; 507 | 508 | // If the resposne html web page size is less than 200, retry. 509 | if (verb == L"GET" && !disableAutoRedirect) 510 | { 511 | wstring regExp = L"{}"; 512 | vector result; 513 | if (ParseRegExp(regExp, false, 1, m_responseContent, result) && result.size() > 0) 514 | { 515 | regExp = L"{}"; 516 | if (!ParseRegExp(regExp, false, 1, m_responseContent, result) || result.size() <= 0) 517 | { 518 | m_dwLastError = ERROR_INVALID_DATA; 519 | bGetReponseSucceed = false; 520 | } 521 | } 522 | } 523 | } 524 | else 525 | { 526 | m_dwLastError = ::GetLastError(); 527 | } 528 | } 529 | } // while 530 | if (!bGetReponseSucceed) 531 | { 532 | bRetVal = false; 533 | } 534 | 535 | ::WinHttpCloseHandle(hRequest); 536 | } 537 | ::WinHttpCloseHandle(hConnect); 538 | } 539 | 540 | } 541 | 542 | return bRetVal; 543 | } 544 | 545 | wstring WinHttpClient::GetResponseHeader(void) 546 | { 547 | return m_responseHeader; 548 | } 549 | 550 | wstring WinHttpClient::GetResponseContent(void) 551 | { 552 | return m_responseContent; 553 | } 554 | 555 | wstring WinHttpClient::GetResponseCharset(void) 556 | { 557 | return m_responseCharset; 558 | } 559 | 560 | wstring WinHttpClient::GetRequestHost(void) 561 | { 562 | return m_requestHost; 563 | } 564 | 565 | bool WinHttpClient::SaveResponseToFile(const wstring &filePath) 566 | { 567 | if (m_pResponse == NULL || m_responseByteCountReceived <= 0) 568 | { 569 | return false; 570 | } 571 | FILE *f = NULL; 572 | int iResult = _wfopen_s(&f, filePath.c_str(), L"wb"); 573 | if (iResult == 0 && f != NULL) 574 | { 575 | fwrite(m_pResponse, m_responseByteCountReceived, 1, f); 576 | fclose(f); 577 | return true; 578 | } 579 | 580 | return false; 581 | } 582 | 583 | bool WinHttpClient::SetProgress(unsigned int byteCountReceived) 584 | { 585 | bool bReturn = false; 586 | if (m_pfProcessProc != NULL && m_responseByteCount > 0) 587 | { 588 | double dProgress = (double)byteCountReceived * 100 / m_responseByteCount; 589 | m_pfProcessProc(dProgress); 590 | bReturn = true; 591 | } 592 | 593 | return bReturn; 594 | } 595 | 596 | wstring WinHttpClient::GetResponseCookies(void) 597 | { 598 | return m_responseCookies; 599 | } 600 | 601 | bool WinHttpClient::SetAdditionalRequestCookies(const wstring &cookies) 602 | { 603 | m_additionalRequestCookies = cookies; 604 | 605 | return true; 606 | } 607 | 608 | bool WinHttpClient::SetAdditionalDataToSend(BYTE *data, unsigned int dataSize) 609 | { 610 | if (data == NULL || dataSize < 0) 611 | { 612 | return false; 613 | } 614 | 615 | if (m_pDataToSend != NULL) 616 | { 617 | delete[] m_pDataToSend; 618 | } 619 | m_pDataToSend = NULL; 620 | m_pDataToSend = new BYTE[dataSize]; 621 | if (m_pDataToSend != NULL) 622 | { 623 | memcpy(m_pDataToSend, data, dataSize); 624 | m_dataToSendSize = dataSize; 625 | return true; 626 | } 627 | 628 | return false; 629 | } 630 | 631 | // Reset additional data fields 632 | bool WinHttpClient::ResetAdditionalDataToSend(void) 633 | { 634 | if (m_pDataToSend != NULL) 635 | { 636 | delete[] m_pDataToSend; 637 | } 638 | 639 | m_pDataToSend = NULL; 640 | m_dataToSendSize = 0; 641 | 642 | return true; 643 | } 644 | 645 | // Allow us to reset the url on subsequent requests 646 | bool WinHttpClient::UpdateUrl(const wstring &url) 647 | { 648 | m_requestURL = url; 649 | ResetAdditionalDataToSend(); 650 | 651 | return true; 652 | } 653 | 654 | bool WinHttpClient::SetAdditionalRequestHeaders(const wstring &additionalRequestHeaders) 655 | { 656 | m_additionalRequestHeaders = additionalRequestHeaders; 657 | 658 | return true; 659 | } 660 | 661 | bool WinHttpClient::SetProxy(const wstring &proxy) 662 | { 663 | m_proxy = proxy; 664 | 665 | return true; 666 | } 667 | 668 | // If we don't require valid SSL Certs then accept any 669 | // certificate on an SSL connection 670 | bool WinHttpClient::SetRequireValidSslCertificates(bool require) 671 | { 672 | m_requireValidSsl = require; 673 | 674 | return true; 675 | } 676 | 677 | const BYTE *WinHttpClient::GetRawResponseContent(void) 678 | { 679 | return m_pResponse; 680 | } 681 | 682 | unsigned int WinHttpClient::GetRawResponseContentLength(void) 683 | { 684 | return m_responseByteCount; 685 | } 686 | 687 | unsigned int WinHttpClient::GetRawResponseReceivedContentLength(void) 688 | { 689 | return m_responseByteCountReceived; 690 | } 691 | 692 | DWORD WinHttpClient::GetLastError(void) 693 | { 694 | return m_dwLastError; 695 | } 696 | 697 | wstring WinHttpClient::GetResponseStatusCode(void) 698 | { 699 | return m_statusCode; 700 | } 701 | 702 | bool WinHttpClient::SetUserAgent(const wstring &userAgent) 703 | { 704 | m_userAgent = userAgent; 705 | 706 | return true; 707 | } 708 | 709 | bool WinHttpClient::SetForceCharset(const wstring &charset) 710 | { 711 | m_responseCharset = charset; 712 | 713 | return true; 714 | } 715 | 716 | bool WinHttpClient::SetProxyUsername(const wstring &username) 717 | { 718 | m_proxyUsername = username; 719 | 720 | return true; 721 | } 722 | 723 | bool WinHttpClient::SetProxyPassword(const std::wstring &password) 724 | { 725 | m_proxyPassword = password; 726 | 727 | return true; 728 | } 729 | 730 | wstring WinHttpClient::GetResponseLocation(void) 731 | { 732 | return m_location; 733 | } 734 | 735 | bool WinHttpClient::SetTimeouts(unsigned int resolveTimeout, 736 | unsigned int connectTimeout, 737 | unsigned int sendTimeout, 738 | unsigned int receiveTimeout) 739 | { 740 | m_resolveTimeout = resolveTimeout; 741 | m_connectTimeout = connectTimeout; 742 | m_sendTimeout = sendTimeout; 743 | m_receiveTimeout = receiveTimeout; 744 | 745 | return true; 746 | } 747 | -------------------------------------------------------------------------------- /WinHttpClient/WinHttpClient.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2008-2010 Cheng Shi. All rights reserved. 3 | * Email: shicheng107@hotmail.com 4 | */ 5 | 6 | #ifndef WINHTTPCLIENT_H 7 | #define WINHTTPCLIENT_H 8 | 9 | #pragma comment(lib, "Winhttp.lib") 10 | 11 | #include "RegExp.h" 12 | #include "StringProcess.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | typedef bool (*PROGRESSPROC)(double); 20 | 21 | static const unsigned int INT_RETRYTIMES = 3; 22 | static wchar_t *SZ_AGENT = L"WinHttpClient"; 23 | static const int INT_BUFFERSIZE = 10240; // Initial 10 KB temporary buffer, double if it is not enough. 24 | 25 | class WinHttpClient 26 | { 27 | public: 28 | inline WinHttpClient(const wstring &url, PROGRESSPROC progressProc = NULL); 29 | inline ~WinHttpClient(void); 30 | 31 | // It is a synchronized method and may take a long time to finish. 32 | inline bool SendHttpRequest(const wstring &httpVerb = L"GET", bool disableAutoRedirect = false); 33 | inline wstring GetResponseHeader(void); 34 | inline wstring GetResponseContent(void); 35 | inline wstring GetResponseCharset(void); 36 | inline wstring GetResponseStatusCode(void); 37 | inline wstring GetResponseLocation(void); 38 | inline wstring GetRequestHost(void); 39 | inline const BYTE *GetRawResponseContent(void); 40 | inline unsigned int GetRawResponseContentLength(void); 41 | inline unsigned int GetRawResponseReceivedContentLength(void); 42 | inline bool SaveResponseToFile(const wstring &filePath); 43 | inline wstring GetResponseCookies(void); 44 | inline bool SetAdditionalRequestCookies(const wstring &cookies); 45 | inline bool SetAdditionalDataToSend(BYTE *data, unsigned int dataSize); 46 | inline bool UpdateUrl(const wstring &url); 47 | inline bool ResetAdditionalDataToSend(void); 48 | inline bool SetAdditionalRequestHeaders(const wstring &additionalRequestHeaders); 49 | inline bool SetRequireValidSslCertificates(bool require); 50 | inline bool SetProxy(const wstring &proxy); 51 | inline DWORD GetLastError(void); 52 | inline bool SetUserAgent(const wstring &userAgent); 53 | inline bool SetForceCharset(const wstring &charset); 54 | inline bool SetProxyUsername(const wstring &username); 55 | inline bool SetProxyPassword(const wstring &password); 56 | inline bool SetTimeouts(unsigned int resolveTimeout = 0, 57 | unsigned int connectTimeout = 60000, 58 | unsigned int sendTimeout = 30000, 59 | unsigned int receiveTimeout = 30000); 60 | 61 | private: 62 | inline WinHttpClient(const WinHttpClient &other); 63 | inline WinHttpClient &operator =(const WinHttpClient &other); 64 | inline bool SetProgress(unsigned int byteCountReceived); 65 | 66 | HINTERNET m_sessionHandle; 67 | bool m_requireValidSsl; 68 | wstring m_requestURL; 69 | wstring m_requestHost; 70 | wstring m_responseHeader; 71 | wstring m_responseContent; 72 | wstring m_responseCharset; 73 | BYTE *m_pResponse; 74 | unsigned int m_responseByteCountReceived; // Up to 4GB. 75 | PROGRESSPROC m_pfProcessProc; 76 | unsigned int m_responseByteCount; 77 | wstring m_responseCookies; 78 | wstring m_additionalRequestCookies; 79 | BYTE *m_pDataToSend; 80 | unsigned int m_dataToSendSize; 81 | wstring m_additionalRequestHeaders; 82 | wstring m_proxy; 83 | DWORD m_dwLastError; 84 | wstring m_statusCode; 85 | wstring m_userAgent; 86 | bool m_bForceCharset; 87 | wstring m_proxyUsername; 88 | wstring m_proxyPassword; 89 | wstring m_location; 90 | unsigned int m_resolveTimeout; 91 | unsigned int m_connectTimeout; 92 | unsigned int m_sendTimeout; 93 | unsigned int m_receiveTimeout; 94 | }; 95 | 96 | #endif // WINHTTPCLIENT_H 97 | -------------------------------------------------------------------------------- /WinHttpClient/WinHttpClient.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | {13A7A9A1-B911-4470-99B6-2567A024A1F1} 34 | Win32Proj 35 | WinHttpClient 36 | 8.1 37 | 38 | 39 | 40 | StaticLibrary 41 | true 42 | v140 43 | Unicode 44 | 45 | 46 | StaticLibrary 47 | false 48 | v140 49 | true 50 | Unicode 51 | 52 | 53 | StaticLibrary 54 | true 55 | v140 56 | Unicode 57 | 58 | 59 | StaticLibrary 60 | false 61 | v140 62 | true 63 | Unicode 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | Level3 89 | Disabled 90 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 91 | 92 | 93 | Windows 94 | 95 | 96 | 97 | 98 | 99 | 100 | Level3 101 | Disabled 102 | _DEBUG;_LIB;%(PreprocessorDefinitions) 103 | 104 | 105 | Windows 106 | 107 | 108 | 109 | 110 | Level3 111 | 112 | 113 | MaxSpeed 114 | true 115 | true 116 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 117 | 118 | 119 | Windows 120 | true 121 | true 122 | 123 | 124 | 125 | 126 | Level3 127 | 128 | 129 | MaxSpeed 130 | true 131 | true 132 | NDEBUG;_LIB;%(PreprocessorDefinitions) 133 | 134 | 135 | Windows 136 | true 137 | true 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /WinHttpClient/atlrx.h: -------------------------------------------------------------------------------- 1 | // This is a part of the Active Template Library. 2 | // Copyright (C) Microsoft Corporation 3 | // All rights reserved. 4 | // 5 | // This source code is only intended as a supplement to the 6 | // Active Template Library Reference and related 7 | // electronic documentation provided with the library. 8 | // See these sources for detailed information regarding the 9 | // Active Template Library product. 10 | 11 | #ifndef __ATLRX_H__ 12 | #define __ATLRX_H__ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #ifndef ATL_REGEXP_MIN_STACK 21 | #define ATL_REGEXP_MIN_STACK 256 22 | #endif 23 | 24 | /* 25 | Regular Expression Grammar 26 | 27 | R - top level grammar rule 28 | RE - regular expression 29 | AltE - Alternative expression 30 | E - expression 31 | SE - simple expression 32 | 33 | R -> RE 34 | '^'RE (matches begining of string) 35 | 36 | RE -> AltE RE 37 | AltE 38 | 39 | 40 | AltE -> E 41 | E '|' AltE 42 | E -> SE (RepeatOp '?'?)? 43 | SE -> Arg 44 | Group 45 | CharClass 46 | '\'Abbrev (see below) 47 | '\'EscapedChar (any character including reserved symbols) 48 | '\'Digit+ (Arg back reference) 49 | '!' (not) 50 | '.' (any char) 51 | '$' (end of input) 52 | Symbol (any non-reserved character) 53 | Arg -> '{'RE'}' 54 | Group -> '('RE')' 55 | CharClass -> '[' '^'? CharSet ']' 56 | CharSet -> CharItem+ 57 | CharItem -> Char('-'Char)? 58 | RepeatOp -> '*' 59 | '+' 60 | '?' 61 | Abbrev -> Abbreviation defined in CAtlRECharTraits 62 | Abbrev Expansion Meaning 63 | a ([a-zA-Z0-9]) alpha numeric 64 | b ([ \\t]) white space (blank) 65 | c ([a-zA-Z]) alpha 66 | d ([0-9]) digit 67 | h ([0-9a-fA-F]) hex digit 68 | n (\r|(\r?\n)) newline 69 | q (\"[^\"]*\")|(\'[^\']*\') quoted string 70 | w ([a-zA-Z]+) simple word 71 | z ([0-9]+) integer 72 | */ 73 | 74 | #pragma pack(push,_ATL_PACKING) 75 | namespace ATL { 76 | 77 | //Convertion utility classes used to convert char* to RECHAR. 78 | //Used by rx debugging printing. 79 | template 80 | class CAToREChar 81 | { 82 | public: 83 | CAToREChar(const char* psz) throw() 84 | : m_psz(psz) 85 | { 86 | } 87 | operator const RECHARTYPE*() const throw() { return m_psz; } 88 | const char* m_psz; 89 | }; 90 | 91 | template<> 92 | class CAToREChar 93 | { 94 | public: 95 | CAToREChar(const char* psz) throw() 96 | : m_a2w(psz) 97 | { 98 | } 99 | operator const wchar_t*() const throw() { return (wchar_t*)m_a2w; } 100 | 101 | private: 102 | CA2W m_a2w; 103 | }; 104 | 105 | class CAtlRECharTraitsA 106 | { 107 | public: 108 | typedef char RECHARTYPE; 109 | 110 | static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw() 111 | { 112 | #ifndef ATL_NO_CHECK_BIT_FIELD 113 | ATLASSERT(UseBitFieldForRange()); 114 | #endif 115 | return static_cast(static_cast(*sz)); 116 | } 117 | static RECHARTYPE *Next(const RECHARTYPE *sz) throw() 118 | { 119 | return (RECHARTYPE *) (sz+1); 120 | } 121 | 122 | static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 123 | { 124 | return strncmp(szLeft, szRight, nCount); 125 | } 126 | 127 | static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 128 | { 129 | return _strnicmp(szLeft, szRight, nCount); 130 | } 131 | 132 | _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsA::Strlwr must be passed a buffer size.") 133 | static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw() 134 | { 135 | #pragma warning (push) 136 | #pragma warning(disable : 4996) 137 | return _strlwr(sz); 138 | #pragma warning (pop) 139 | } 140 | 141 | static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw() 142 | { 143 | Checked::strlwr_s(sz, nSize); 144 | return sz; 145 | } 146 | 147 | static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw() 148 | { 149 | return strtol(sz, szEnd, nBase); 150 | } 151 | 152 | static int Isdigit(RECHARTYPE ch) throw() 153 | { 154 | return isdigit(static_cast(ch)); 155 | } 156 | 157 | static const RECHARTYPE** GetAbbrevs() 158 | { 159 | static const RECHARTYPE *s_szAbbrevs[] = 160 | { 161 | "a([a-zA-Z0-9])", // alpha numeric 162 | "b([ \\t])", // white space (blank) 163 | "c([a-zA-Z])", // alpha 164 | "d([0-9])", // digit 165 | "h([0-9a-fA-F])", // hex digit 166 | "n(\r|(\r?\n))", // newline 167 | "q(\"[^\"]*\")|(\'[^\']*\')", // quoted string 168 | "w([a-zA-Z]+)", // simple word 169 | "z([0-9]+)", // integer 170 | NULL 171 | }; 172 | 173 | return s_szAbbrevs; 174 | } 175 | 176 | static BOOL UseBitFieldForRange() throw() 177 | { 178 | return TRUE; 179 | } 180 | 181 | static int ByteLen(const RECHARTYPE *sz) throw() 182 | { 183 | return int(strlen(sz)); 184 | } 185 | }; 186 | 187 | class CAtlRECharTraitsW 188 | { 189 | public: 190 | typedef WCHAR RECHARTYPE; 191 | 192 | static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw() 193 | { 194 | #ifndef ATL_NO_CHECK_BIT_FIELD 195 | ATLASSERT(UseBitFieldForRange()); 196 | #endif 197 | return static_cast(*sz); 198 | } 199 | static RECHARTYPE *Next(const RECHARTYPE *sz) throw() 200 | { 201 | return (RECHARTYPE *) (sz+1); 202 | } 203 | 204 | static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 205 | { 206 | return wcsncmp(szLeft, szRight, nCount); 207 | } 208 | 209 | static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 210 | { 211 | return _wcsnicmp(szLeft, szRight, nCount); 212 | } 213 | 214 | _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsW::Strlwr must be passed a buffer size.") 215 | static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw() 216 | { 217 | #pragma warning (push) 218 | #pragma warning(disable : 4996) 219 | return _wcslwr(sz); 220 | #pragma warning (pop) 221 | } 222 | 223 | static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw() 224 | { 225 | Checked::wcslwr_s(sz, nSize); 226 | return sz; 227 | } 228 | 229 | static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw() 230 | { 231 | return wcstol(sz, szEnd, nBase); 232 | } 233 | 234 | static int Isdigit(RECHARTYPE ch) throw() 235 | { 236 | return iswdigit(ch); 237 | } 238 | 239 | static const RECHARTYPE** GetAbbrevs() 240 | { 241 | static const RECHARTYPE *s_szAbbrevs[] = 242 | { 243 | L"a([a-zA-Z0-9])", // alpha numeric 244 | L"b([ \\t])", // white space (blank) 245 | L"c([a-zA-Z])", // alpha 246 | L"d([0-9])", // digit 247 | L"h([0-9a-fA-F])", // hex digit 248 | L"n(\r|(\r?\n))", // newline 249 | L"q(\"[^\"]*\")|(\'[^\']*\')", // quoted string 250 | L"w([a-zA-Z]+)", // simple word 251 | L"z([0-9]+)", // integer 252 | NULL 253 | }; 254 | 255 | return s_szAbbrevs; 256 | } 257 | 258 | static BOOL UseBitFieldForRange() throw() 259 | { 260 | return FALSE; 261 | } 262 | 263 | static int ByteLen(const RECHARTYPE *sz) throw() 264 | { 265 | return int(wcslen(sz)*sizeof(WCHAR)); 266 | } 267 | }; 268 | 269 | class CAtlRECharTraitsMB 270 | { 271 | public: 272 | typedef unsigned char RECHARTYPE; 273 | 274 | static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw() 275 | { 276 | #ifndef ATL_NO_CHECK_BIT_FIELD 277 | ATLASSERT(UseBitFieldForRange()); 278 | #endif 279 | 280 | return static_cast(*sz); 281 | } 282 | 283 | static RECHARTYPE *Next(const RECHARTYPE *sz) throw() 284 | { 285 | return _mbsinc(sz); 286 | } 287 | 288 | static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 289 | { 290 | return _mbsncmp(szLeft, szRight, nCount); 291 | } 292 | 293 | static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw() 294 | { 295 | return _mbsnicmp(szLeft, szRight, nCount); 296 | } 297 | 298 | _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsMB::Strlwr must be passed a buffer size.") 299 | static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw() 300 | { 301 | #pragma warning (push) 302 | #pragma warning(disable : 4996) 303 | return _mbslwr(sz); 304 | #pragma warning (pop) 305 | } 306 | 307 | static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw() 308 | { 309 | Checked::mbslwr_s(sz, nSize); 310 | return sz; 311 | } 312 | 313 | static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw() 314 | { 315 | return strtol((const char *) sz, (char **) szEnd, nBase); 316 | } 317 | 318 | static int Isdigit(RECHARTYPE ch) throw() 319 | { 320 | return _ismbcdigit((unsigned int) ch); 321 | } 322 | 323 | static const RECHARTYPE** GetAbbrevs() 324 | { 325 | return reinterpret_cast(CAtlRECharTraitsA::GetAbbrevs()); 326 | } 327 | 328 | static BOOL UseBitFieldForRange() throw() 329 | { 330 | return FALSE; 331 | } 332 | 333 | static int ByteLen(const RECHARTYPE *sz) throw() 334 | { 335 | return (int)strlen((const char *) sz); 336 | } 337 | }; 338 | 339 | #ifndef _UNICODE 340 | typedef CAtlRECharTraitsA CAtlRECharTraits; 341 | #else // _UNICODE 342 | typedef CAtlRECharTraitsW CAtlRECharTraits; 343 | #endif // !_UNICODE 344 | // Note: If you want to use CAtlRECharTraitsMB you must pass it in 345 | // as a template argument 346 | 347 | template 348 | class CAtlRegExp; // forward declaration 349 | 350 | template 351 | class CAtlREMatchContext 352 | { 353 | public: 354 | friend CAtlRegExp; 355 | typedef typename CharTraits::RECHARTYPE RECHAR; 356 | 357 | struct MatchGroup 358 | { 359 | const RECHAR *szStart; 360 | const RECHAR *szEnd; 361 | }; 362 | 363 | UINT m_uNumGroups; 364 | 365 | MatchGroup m_Match; 366 | 367 | void GetMatch(UINT nIndex, const RECHAR **szStart, const RECHAR **szEnd) 368 | { 369 | ATLENSURE(szStart != NULL); 370 | ATLENSURE(szEnd != NULL); 371 | ATLENSURE(nIndex >=0 && nIndex < m_uNumGroups); 372 | *szStart = m_Matches[nIndex].szStart; 373 | *szEnd = m_Matches[nIndex].szEnd; 374 | } 375 | 376 | void GetMatch(UINT nIndex, MatchGroup *pGroup) 377 | { 378 | 379 | ATLENSURE(pGroup != NULL); 380 | ATLENSURE(nIndex >=0&&(static_cast(nIndex))< m_uNumGroups); 381 | pGroup->szStart = m_Matches[nIndex].szStart; 382 | pGroup->szEnd = m_Matches[nIndex].szEnd; 383 | } 384 | 385 | protected: 386 | CAutoVectorPtr m_Mem; 387 | CAutoVectorPtr m_Matches; 388 | CAtlArray m_stack; 389 | size_t m_nTos; 390 | 391 | public: 392 | CAtlREMatchContext(size_t nInitStackSize=ATL_REGEXP_MIN_STACK) 393 | { 394 | m_uNumGroups = 0; 395 | m_nTos = 0; 396 | m_stack.SetCount(nInitStackSize); 397 | m_Match.szStart = NULL; 398 | m_Match.szEnd = NULL; 399 | } 400 | 401 | protected: 402 | BOOL Initialize(UINT uRequiredMem, UINT uNumGroups) throw() 403 | { 404 | m_nTos = 0; 405 | 406 | m_uNumGroups = 0; 407 | m_Matches.Free(); 408 | 409 | if (!m_Matches.Allocate(uNumGroups)) 410 | return FALSE; 411 | 412 | m_uNumGroups = uNumGroups; 413 | 414 | m_Mem.Free(); 415 | 416 | if (!m_Mem.Allocate(uRequiredMem)) 417 | return FALSE; 418 | 419 | memset(m_Mem.m_p, 0x00, uRequiredMem*sizeof(void *)); 420 | 421 | memset(m_Matches, 0x00, m_uNumGroups * sizeof(MatchGroup)); 422 | return TRUE; 423 | } 424 | 425 | BOOL Push(void *p) 426 | { 427 | m_nTos++; 428 | if (m_stack.GetCount() <= (UINT) m_nTos) 429 | { 430 | if (!m_stack.SetCount((m_nTos+1)*2)) 431 | { 432 | m_nTos--; 433 | return FALSE; 434 | } 435 | } 436 | m_stack[m_nTos] = p; 437 | return TRUE; 438 | } 439 | 440 | BOOL Push(size_t n) 441 | { 442 | return Push((void *) n); 443 | } 444 | 445 | void *Pop() throw() 446 | { 447 | if (m_nTos==0) 448 | { 449 | // stack underflow 450 | // this should never happen at match time. 451 | // (the parsing succeeded when it shouldn't have) 452 | ATLASSERT(FALSE); 453 | return NULL; 454 | } 455 | void *p = m_stack[m_nTos]; 456 | m_nTos--; 457 | return p; 458 | } 459 | }; 460 | 461 | enum REParseError { 462 | REPARSE_ERROR_OK = 0, // No error occurred 463 | REPARSE_ERROR_OUTOFMEMORY, // Out of memory 464 | REPARSE_ERROR_BRACE_EXPECTED, // A closing brace was expected 465 | REPARSE_ERROR_PAREN_EXPECTED, // A closing parenthesis was expected 466 | REPARSE_ERROR_BRACKET_EXPECTED, // A closing bracket was expected 467 | REPARSE_ERROR_UNEXPECTED, // An unspecified fatal error occurred 468 | REPARSE_ERROR_EMPTY_RANGE, // A range expression was empty 469 | REPARSE_ERROR_INVALID_GROUP, // A backreference was made to a group 470 | // that did not exist 471 | REPARSE_ERROR_INVALID_RANGE, // An invalid range was specified 472 | REPARSE_ERROR_EMPTY_REPEATOP, // A possibly empty * or + was detected 473 | REPARSE_ERROR_INVALID_INPUT, // The input string was invalid 474 | }; 475 | 476 | template 477 | class CAtlRegExp 478 | { 479 | public: 480 | CAtlRegExp() throw() 481 | { 482 | m_uNumGroups = 0; 483 | m_uRequiredMem = 0; 484 | m_bCaseSensitive = TRUE; 485 | m_LastError = REPARSE_ERROR_OK; 486 | } 487 | 488 | typedef typename CharTraits::RECHARTYPE RECHAR; 489 | 490 | // CAtlRegExp::Parse 491 | // Parses the regular expression 492 | // returns REPARSE_ERROR_OK if successful, an REParseError otherwise 493 | REParseError Parse(const RECHAR *szRE, BOOL bCaseSensitive=TRUE) 494 | { 495 | ATLASSERT(szRE); 496 | if (!szRE) 497 | return REPARSE_ERROR_INVALID_INPUT; 498 | 499 | Reset(); 500 | 501 | m_bCaseSensitive = bCaseSensitive; 502 | 503 | const RECHAR *szInput = szRE; 504 | 505 | if (!bCaseSensitive) 506 | { 507 | // copy the string 508 | int nSize = CharTraits::ByteLen(szRE)+sizeof(RECHAR); 509 | szInput = (const RECHAR *) malloc(nSize); 510 | if (!szInput) 511 | return REPARSE_ERROR_OUTOFMEMORY; 512 | 513 | Checked::memcpy_s((char *) szInput, nSize, szRE, nSize); 514 | 515 | CharTraits::Strlwr(const_cast(szInput), nSize/sizeof(RECHAR)); 516 | } 517 | const RECHAR *sz = szInput; 518 | 519 | int nCall = AddInstruction(RE_CALL); 520 | if (nCall < 0) 521 | return REPARSE_ERROR_OUTOFMEMORY; 522 | 523 | if (*sz == '^') 524 | { 525 | if (AddInstruction(RE_FAIL) < 0) 526 | return REPARSE_ERROR_OUTOFMEMORY; 527 | sz++; 528 | } 529 | else 530 | { 531 | if (AddInstruction(RE_ADVANCE) < 0) 532 | return REPARSE_ERROR_OUTOFMEMORY; 533 | } 534 | 535 | bool bEmpty = true; 536 | ParseRE(&sz, bEmpty); 537 | if (!GetLastParseError()) 538 | { 539 | GetInstruction(nCall).call.nTarget = 2; 540 | 541 | if (AddInstruction(RE_MATCH) < 0) 542 | return REPARSE_ERROR_OUTOFMEMORY; 543 | } 544 | 545 | if (szInput != szRE) 546 | free((void *) szInput); 547 | 548 | return GetLastParseError(); 549 | } 550 | 551 | BOOL Match(const RECHAR *szIn, CAtlREMatchContext *pContext, const RECHAR **ppszEnd=NULL) 552 | { 553 | ATLASSERT(szIn); 554 | ATLASSERT(pContext); 555 | 556 | if (!szIn || !pContext) 557 | return FALSE; 558 | 559 | if (ppszEnd) 560 | *ppszEnd = NULL; 561 | 562 | const RECHAR *szInput = szIn; 563 | 564 | if (!m_bCaseSensitive) 565 | { 566 | int nSize = CharTraits::ByteLen(szIn)+sizeof(RECHAR); 567 | szInput = (const RECHAR *) malloc(nSize); 568 | if (!szInput) 569 | return FALSE; 570 | 571 | Checked::memcpy_s((char *) szInput, nSize, szIn, nSize); 572 | CharTraits::Strlwr(const_cast(szInput), nSize/sizeof(RECHAR)); 573 | } 574 | 575 | if (!pContext->Initialize(m_uRequiredMem, m_uNumGroups)) 576 | { 577 | if (szInput != szIn) 578 | free((void *) szInput); 579 | return FALSE; 580 | } 581 | 582 | size_t ip = 0; 583 | 584 | const RECHAR *sz = szInput; 585 | const RECHAR *szCurrInput = szInput; 586 | 587 | #pragma warning(push) 588 | #pragma warning(disable:4127) // conditional expression is constant 589 | 590 | while (1) 591 | { 592 | #ifdef ATLRX_DEBUG 593 | OnDebugEvent(ip, szInput, sz, pContext); 594 | #endif 595 | if (ip == 0) 596 | pContext->m_Match.szStart = sz; 597 | 598 | switch (GetInstruction(ip).type) 599 | { 600 | case RE_NOP: 601 | ip++; 602 | break; 603 | 604 | case RE_SYMBOL: 605 | if (GetInstruction(ip).symbol.nSymbol == static_cast(*sz)) 606 | { 607 | sz = CharTraits::Next(sz); 608 | ip++; 609 | } 610 | else 611 | { 612 | ip = (size_t) pContext->Pop(); 613 | } 614 | break; 615 | 616 | case RE_ANY: 617 | if (*sz) 618 | { 619 | sz = CharTraits::Next(sz); 620 | ip++; 621 | } 622 | else 623 | { 624 | ip = (size_t) pContext->Pop(); 625 | } 626 | break; 627 | 628 | case RE_GROUP_START: 629 | pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = sz; 630 | ip++; 631 | break; 632 | 633 | case RE_GROUP_END: 634 | pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = sz; 635 | ip++; 636 | break; 637 | 638 | case RE_PUSH_CHARPOS: 639 | pContext->Push((void *) sz); 640 | ip++; 641 | break; 642 | 643 | case RE_POP_CHARPOS: 644 | sz = (RECHAR *) pContext->Pop(); 645 | ip++; 646 | break; 647 | 648 | case RE_CALL: 649 | pContext->Push(ip+1); 650 | ip = GetInstruction(ip).call.nTarget; 651 | break; 652 | 653 | case RE_JMP: 654 | ip = GetInstruction(ip).jmp.nTarget; 655 | break; 656 | 657 | case RE_RETURN: 658 | ip = (size_t) pContext->Pop(); 659 | break; 660 | 661 | case RE_PUSH_MEMORY: 662 | pContext->Push((void *) (pContext->m_Mem[GetInstruction(ip).memory.nIndex])); 663 | ip++; 664 | break; 665 | 666 | case RE_POP_MEMORY: 667 | pContext->m_Mem[GetInstruction(ip).memory.nIndex] = pContext->Pop(); 668 | ip++; 669 | break; 670 | 671 | case RE_STORE_CHARPOS: 672 | pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) sz; 673 | ip++; 674 | break; 675 | 676 | case RE_GET_CHARPOS: 677 | sz = (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex]; 678 | ip++; 679 | break; 680 | 681 | case RE_STORE_STACKPOS: 682 | pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) pContext->m_nTos; 683 | ip++; 684 | break; 685 | 686 | case RE_GET_STACKPOS: 687 | pContext->m_nTos = (size_t) pContext->m_Mem[GetInstruction(ip).memory.nIndex]; 688 | ip++; 689 | break; 690 | 691 | case RE_RET_NOMATCH: 692 | if (sz == (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex]) 693 | { 694 | // do a return 695 | ip = (size_t) pContext->Pop(); 696 | } 697 | else 698 | ip++; 699 | break; 700 | 701 | case RE_ADVANCE: 702 | sz = CharTraits::Next(szCurrInput); 703 | szCurrInput = sz; 704 | if (*sz == '\0') 705 | goto Error; 706 | ip = 0; 707 | pContext->m_nTos = 0; 708 | break; 709 | 710 | case RE_FAIL: 711 | goto Error; 712 | 713 | case RE_RANGE: 714 | { 715 | if (*sz == '\0') 716 | { 717 | ip = (size_t) pContext->Pop(); 718 | break; 719 | } 720 | 721 | RECHAR *pBits = reinterpret_cast((&m_Instructions[ip]+1)); 722 | size_t u = CharTraits::GetBitFieldForRangeArrayIndex(sz); 723 | if (pBits[u >> 3] & 1 << (u & 0x7)) 724 | { 725 | ip += InstructionsPerRangeBitField(); 726 | ip++; 727 | sz = CharTraits::Next(sz); 728 | } 729 | else 730 | { 731 | ip = (size_t) pContext->Pop(); 732 | } 733 | } 734 | break; 735 | 736 | case RE_NOTRANGE: 737 | { 738 | if (*sz == '\0') 739 | { 740 | ip = (size_t) pContext->Pop(); 741 | break; 742 | } 743 | 744 | RECHAR *pBits = reinterpret_cast((&m_Instructions[ip]+1)); 745 | size_t u = static_cast(* ((RECHAR *) sz)); 746 | if (pBits[u >> 3] & 1 << (u & 0x7)) 747 | { 748 | ip = (size_t) pContext->Pop(); 749 | } 750 | else 751 | { 752 | ip += InstructionsPerRangeBitField(); 753 | ip++; 754 | sz = CharTraits::Next(sz); 755 | } 756 | } 757 | break; 758 | 759 | case RE_RANGE_EX: 760 | { 761 | if (*sz == '\0') 762 | { 763 | ip = (size_t) pContext->Pop(); 764 | break; 765 | } 766 | 767 | BOOL bMatch = FALSE; 768 | size_t inEnd = GetInstruction(ip).range.nTarget; 769 | ip++; 770 | 771 | while (ip < inEnd) 772 | { 773 | if (static_cast(*sz) >= GetInstruction(ip).memory.nIndex && 774 | static_cast(*sz) <= GetInstruction(ip+1).memory.nIndex) 775 | { 776 | // if we match, we jump to the end 777 | sz = CharTraits::Next(sz); 778 | ip = inEnd; 779 | bMatch = TRUE; 780 | } 781 | else 782 | { 783 | ip += 2; 784 | } 785 | } 786 | if (!bMatch) 787 | { 788 | ip = (size_t) pContext->Pop(); 789 | } 790 | } 791 | break; 792 | 793 | case RE_NOTRANGE_EX: 794 | { 795 | if (*sz == '\0') 796 | { 797 | ip = (size_t) pContext->Pop(); 798 | break; 799 | } 800 | 801 | BOOL bMatch = TRUE; 802 | size_t inEnd = GetInstruction(ip).range.nTarget; 803 | ip++; 804 | 805 | while (ip < inEnd) 806 | { 807 | if (static_cast(*sz) >= GetInstruction(ip).memory.nIndex && 808 | static_cast(*sz) <= GetInstruction(ip+1).memory.nIndex) 809 | { 810 | ip = (size_t) pContext->Pop(); 811 | bMatch = FALSE; 812 | break; 813 | } 814 | else 815 | { 816 | // if we match, we jump to the end 817 | ip += 2; 818 | } 819 | } 820 | if (bMatch) 821 | sz = CharTraits::Next(sz); 822 | } 823 | break; 824 | 825 | case RE_PREVIOUS: 826 | { 827 | BOOL bMatch = FALSE; 828 | if (m_bCaseSensitive) 829 | { 830 | bMatch = !CharTraits::Strncmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart, 831 | pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart); 832 | } 833 | else 834 | { 835 | bMatch = !CharTraits::Strnicmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart, 836 | pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart); 837 | } 838 | if (bMatch) 839 | { 840 | sz += pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart; 841 | ip++; 842 | break; 843 | } 844 | ip = (size_t) pContext->Pop(); 845 | } 846 | break; 847 | 848 | case RE_MATCH: 849 | pContext->m_Match.szEnd = sz; 850 | if (!m_bCaseSensitive) 851 | FixupMatchContext(pContext, szIn, szInput); 852 | if (ppszEnd) 853 | *ppszEnd = szIn + (sz - szInput); 854 | if (szInput != szIn) 855 | free((void *) szInput); 856 | return TRUE; 857 | break; 858 | 859 | case RE_PUSH_GROUP: 860 | pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart); 861 | pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd); 862 | ip++; 863 | break; 864 | 865 | case RE_POP_GROUP: 866 | pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = (const RECHAR *) pContext->Pop(); 867 | pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = (const RECHAR *) pContext->Pop(); 868 | ip++; 869 | break; 870 | 871 | default: 872 | ATLASSERT(FALSE); 873 | break; 874 | } 875 | } 876 | 877 | #pragma warning(pop) // 4127 878 | 879 | ATLASSERT(FALSE); 880 | Error: 881 | pContext->m_Match.szEnd = sz; 882 | if (!m_bCaseSensitive) 883 | FixupMatchContext(pContext, szIn, szInput); 884 | if (ppszEnd) 885 | *ppszEnd = szIn + (sz - szInput); 886 | if (szInput != szIn) 887 | free((void *) szInput); 888 | return FALSE; 889 | } 890 | 891 | protected: 892 | REParseError m_LastError; 893 | 894 | REParseError GetLastParseError() throw() 895 | { 896 | return m_LastError; 897 | } 898 | 899 | void SetLastParseError(REParseError Error) throw() 900 | { 901 | m_LastError = Error; 902 | } 903 | // CAtlRegExp::Reset 904 | // Removes all instructions to allow reparsing into the same instance 905 | void Reset() throw() 906 | { 907 | m_Instructions.RemoveAll(); 908 | m_uRequiredMem = 0; 909 | m_bCaseSensitive = TRUE; 910 | m_uNumGroups = 0; 911 | SetLastParseError(REPARSE_ERROR_OK); 912 | } 913 | 914 | 915 | enum REInstructionType { 916 | RE_NOP, 917 | RE_GROUP_START, 918 | RE_GROUP_END, 919 | RE_SYMBOL, 920 | RE_ANY, 921 | RE_RANGE, 922 | RE_NOTRANGE, 923 | RE_RANGE_EX, 924 | RE_NOTRANGE_EX, 925 | RE_PLUS, 926 | RE_NG_PLUS, 927 | RE_QUESTION, 928 | RE_NG_QUESTION, 929 | RE_JMP, 930 | RE_PUSH_CHARPOS, 931 | RE_POP_CHARPOS, 932 | RE_CALL, 933 | RE_RETURN, 934 | RE_STAR_BEGIN, 935 | RE_NG_STAR_BEGIN, 936 | RE_PUSH_MEMORY, 937 | RE_POP_MEMORY, 938 | RE_STORE_CHARPOS, 939 | RE_STORE_STACKPOS, 940 | RE_GET_CHARPOS, 941 | RE_GET_STACKPOS, 942 | RE_RET_NOMATCH, 943 | RE_PREVIOUS, 944 | RE_FAIL, 945 | RE_ADVANCE, 946 | RE_MATCH, 947 | RE_PUSH_GROUP, 948 | RE_POP_GROUP, 949 | }; 950 | 951 | struct INSTRUCTION_SYMBOL 952 | { 953 | size_t nSymbol; 954 | }; 955 | 956 | struct INSTRUCTION_JMP 957 | { 958 | size_t nTarget; 959 | }; 960 | 961 | struct INSTRUCTION_GROUP 962 | { 963 | size_t nGroup; 964 | }; 965 | 966 | struct INSTRUCTION_CALL 967 | { 968 | size_t nTarget; 969 | }; 970 | 971 | struct INSTRUCTION_MEMORY 972 | { 973 | size_t nIndex; 974 | }; 975 | 976 | struct INSTRUCTION_PREVIOUS 977 | { 978 | size_t nGroup; 979 | }; 980 | 981 | struct INSTRUCTION_RANGE_EX 982 | { 983 | size_t nTarget; 984 | }; 985 | 986 | struct INSTRUCTION 987 | { 988 | REInstructionType type; 989 | union 990 | { 991 | INSTRUCTION_SYMBOL symbol; 992 | INSTRUCTION_JMP jmp; 993 | INSTRUCTION_GROUP group; 994 | INSTRUCTION_CALL call; 995 | INSTRUCTION_MEMORY memory; 996 | INSTRUCTION_PREVIOUS prev; 997 | INSTRUCTION_RANGE_EX range; 998 | }; 999 | }; 1000 | 1001 | inline int InstructionsPerRangeBitField() throw() 1002 | { 1003 | return (256/8) / sizeof(INSTRUCTION) + (((256/8) % sizeof(INSTRUCTION)) ? 1 : 0); 1004 | } 1005 | 1006 | CAtlArray m_Instructions; 1007 | 1008 | UINT m_uNumGroups; 1009 | UINT m_uRequiredMem; 1010 | BOOL m_bCaseSensitive; 1011 | 1012 | 1013 | // class used internally to restore 1014 | // parsing state when unwinding 1015 | class CParseState 1016 | { 1017 | public: 1018 | int m_nNumInstructions; 1019 | UINT m_uNumGroups; 1020 | UINT m_uRequiredMem; 1021 | 1022 | CParseState(CAtlRegExp *pRegExp) throw() 1023 | { 1024 | m_nNumInstructions = (int) pRegExp->m_Instructions.GetCount(); 1025 | m_uNumGroups = pRegExp->m_uNumGroups; 1026 | m_uRequiredMem = pRegExp->m_uRequiredMem; 1027 | } 1028 | 1029 | void Restore(CAtlRegExp *pRegExp) 1030 | { 1031 | pRegExp->m_Instructions.SetCount(m_nNumInstructions); 1032 | pRegExp->m_uNumGroups = m_uNumGroups; 1033 | pRegExp->m_uRequiredMem = m_uRequiredMem; 1034 | } 1035 | }; 1036 | 1037 | int AddInstruction(REInstructionType type) 1038 | { 1039 | if (!m_Instructions.SetCount(m_Instructions.GetCount()+1)) 1040 | { 1041 | SetLastParseError(REPARSE_ERROR_OUTOFMEMORY); 1042 | return -1; 1043 | } 1044 | 1045 | m_Instructions[m_Instructions.GetCount()-1].type = type; 1046 | return (int) m_Instructions.GetCount()-1; 1047 | } 1048 | 1049 | BOOL PeekToken(const RECHAR **ppszRE, int ch) throw() 1050 | { 1051 | if (**ppszRE != ch) 1052 | return FALSE; 1053 | return TRUE; 1054 | } 1055 | 1056 | BOOL MatchToken(const RECHAR **ppszRE, int ch) throw() 1057 | { 1058 | if (!PeekToken(ppszRE, ch)) 1059 | return FALSE; 1060 | *ppszRE = CharTraits::Next(*ppszRE); 1061 | return TRUE; 1062 | } 1063 | 1064 | INSTRUCTION &GetInstruction(size_t nIndex) throw() 1065 | { 1066 | return m_Instructions[nIndex]; 1067 | } 1068 | 1069 | // ParseArg: parse grammar rule Arg 1070 | int ParseArg(const RECHAR **ppszRE, bool &bEmpty) 1071 | { 1072 | int nPushGroup = AddInstruction(RE_PUSH_GROUP); 1073 | if (nPushGroup < 0) 1074 | return -1; 1075 | 1076 | GetInstruction(nPushGroup).group.nGroup = m_uNumGroups; 1077 | 1078 | int p = AddInstruction(RE_GROUP_START); 1079 | if (p < 0) 1080 | return -1; 1081 | GetInstruction(p).group.nGroup = m_uNumGroups++; 1082 | 1083 | int nCall = AddInstruction(RE_CALL); 1084 | if (nCall < 0) 1085 | return -1; 1086 | 1087 | int nPopGroup = AddInstruction(RE_POP_GROUP); 1088 | if (nPopGroup < 0) 1089 | return -1; 1090 | GetInstruction(nPopGroup).group.nGroup = GetInstruction(nPushGroup).group.nGroup; 1091 | 1092 | if (AddInstruction(RE_RETURN) < 0) 1093 | return -1; 1094 | 1095 | int nAlt = ParseRE(ppszRE, bEmpty); 1096 | if (nAlt < 0) 1097 | { 1098 | if (GetLastParseError()) 1099 | return -1; 1100 | 1101 | if (!PeekToken(ppszRE, '}')) 1102 | { 1103 | SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED); 1104 | return -1; 1105 | } 1106 | 1107 | // in the case of an empty group, we add a nop 1108 | nAlt = AddInstruction(RE_NOP); 1109 | if (nAlt < 0) 1110 | return -1; 1111 | } 1112 | 1113 | GetInstruction(nCall).call.nTarget = nAlt; 1114 | 1115 | if (!MatchToken(ppszRE, '}')) 1116 | { 1117 | SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED); 1118 | return -1; 1119 | } 1120 | 1121 | int nEnd = AddInstruction(RE_GROUP_END); 1122 | if (nEnd < 0) 1123 | return -1; 1124 | GetInstruction(nEnd).group.nGroup = GetInstruction(p).group.nGroup; 1125 | return nPushGroup; 1126 | } 1127 | 1128 | // ParseGroup: parse grammar rule Group 1129 | int ParseGroup(const RECHAR **ppszRE, bool &bEmpty) 1130 | { 1131 | int nCall = AddInstruction(RE_CALL); 1132 | if (nCall < 0) 1133 | return -1; 1134 | 1135 | if (AddInstruction(RE_RETURN) < 0) 1136 | return -1; 1137 | 1138 | int nAlt = ParseRE(ppszRE, bEmpty); 1139 | if (nAlt < 0) 1140 | { 1141 | if (GetLastParseError()) 1142 | return -1; 1143 | 1144 | if (!PeekToken(ppszRE, ')')) 1145 | { 1146 | SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED); 1147 | return -1; 1148 | } 1149 | 1150 | // in the case of an empty group, we add a nop 1151 | nAlt = AddInstruction(RE_NOP); 1152 | if (nAlt < 0) 1153 | return -1; 1154 | } 1155 | 1156 | GetInstruction(nCall).call.nTarget = nAlt; 1157 | 1158 | if (!MatchToken(ppszRE, ')')) 1159 | { 1160 | SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED); 1161 | return -1; 1162 | } 1163 | 1164 | return nCall; 1165 | } 1166 | 1167 | RECHAR GetEscapedChar(RECHAR ch) throw() 1168 | { 1169 | if (ch == 't') 1170 | return '\t'; 1171 | return ch; 1172 | } 1173 | 1174 | // ParseCharItem: parse grammar rule CharItem 1175 | int ParseCharItem(const RECHAR **ppszRE, RECHAR *pchStartChar, RECHAR *pchEndChar) throw() 1176 | { 1177 | if (**ppszRE == '\\') 1178 | { 1179 | *ppszRE = CharTraits::Next(*ppszRE); 1180 | *pchStartChar = GetEscapedChar(**ppszRE); 1181 | } 1182 | else 1183 | *pchStartChar = **ppszRE; 1184 | *ppszRE = CharTraits::Next(*ppszRE); 1185 | 1186 | if (!MatchToken(ppszRE, '-')) 1187 | { 1188 | *pchEndChar = *pchStartChar; 1189 | return 0; 1190 | } 1191 | 1192 | // check for unterminated range 1193 | if (!**ppszRE || PeekToken(ppszRE, ']')) 1194 | { 1195 | SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED); 1196 | return -1; 1197 | } 1198 | 1199 | *pchEndChar = **ppszRE; 1200 | *ppszRE = CharTraits::Next(*ppszRE); 1201 | 1202 | if (*pchEndChar < *pchStartChar) 1203 | { 1204 | SetLastParseError(REPARSE_ERROR_INVALID_RANGE); 1205 | return -1; 1206 | } 1207 | return 0; 1208 | } 1209 | 1210 | int AddInstructions(int nNumInstructions) 1211 | { 1212 | size_t nCurr = m_Instructions.GetCount(); 1213 | if (!m_Instructions.SetCount(nCurr+nNumInstructions)) 1214 | { 1215 | SetLastParseError(REPARSE_ERROR_OUTOFMEMORY); 1216 | return -1; 1217 | } 1218 | return (int) nCurr; 1219 | } 1220 | 1221 | // ParseCharSet: parse grammar rule CharSet 1222 | int ParseCharSet(const RECHAR **ppszRE, BOOL bNot) 1223 | { 1224 | int p = -1; 1225 | 1226 | unsigned char *pBits = NULL; 1227 | 1228 | if (CharTraits::UseBitFieldForRange()) 1229 | { 1230 | // we use a bit field to represent the characters 1231 | // a 1 bit means match against the character 1232 | // the last 5 bits are used as an index into 1233 | // the byte array, and the first 3 bits 1234 | // are used to index into the selected byte 1235 | 1236 | p = AddInstruction(bNot ? RE_NOTRANGE : RE_RANGE); 1237 | if (p < 0) 1238 | return -1; 1239 | 1240 | // add the required space to hold the character 1241 | // set. We use one bit per character for ansi 1242 | if (AddInstructions(InstructionsPerRangeBitField()) < 0) 1243 | return -1; 1244 | 1245 | pBits = (unsigned char *) (&m_Instructions[p+1]); 1246 | memset(pBits, 0x00, 256/8); 1247 | } 1248 | else 1249 | { 1250 | p = AddInstruction(bNot ? RE_NOTRANGE_EX : RE_RANGE_EX); 1251 | if (p < 0) 1252 | return -1; 1253 | } 1254 | 1255 | RECHAR chStart; 1256 | RECHAR chEnd; 1257 | 1258 | while (**ppszRE && **ppszRE != ']') 1259 | { 1260 | if (ParseCharItem(ppszRE, &chStart, &chEnd)) 1261 | return -1; 1262 | 1263 | if (CharTraits::UseBitFieldForRange()) 1264 | { 1265 | for (int i=chStart; i<=chEnd; i++) 1266 | pBits[i >> 3] |= 1 << (i & 0x7); 1267 | } 1268 | else 1269 | { 1270 | int nStart = AddInstruction(RE_NOP); 1271 | if (nStart < 0) 1272 | return -1; 1273 | 1274 | int nEnd = AddInstruction(RE_NOP); 1275 | if (nEnd < 0) 1276 | return -1; 1277 | 1278 | GetInstruction(nStart).memory.nIndex = (int) chStart; 1279 | GetInstruction(nEnd).memory.nIndex = (int) chEnd; 1280 | } 1281 | } 1282 | 1283 | if (!CharTraits::UseBitFieldForRange()) 1284 | GetInstruction(p).range.nTarget = m_Instructions.GetCount(); 1285 | 1286 | return p; 1287 | } 1288 | 1289 | // ParseCharClass: parse grammar rule CharClass 1290 | int ParseCharClass(const RECHAR **ppszRE, bool &bEmpty) 1291 | { 1292 | bEmpty = false; 1293 | if (MatchToken(ppszRE, ']')) 1294 | { 1295 | SetLastParseError(REPARSE_ERROR_EMPTY_RANGE); 1296 | return -1; 1297 | } 1298 | 1299 | BOOL bNot = FALSE; 1300 | if (MatchToken(ppszRE, '^')) 1301 | bNot = TRUE; 1302 | 1303 | if (MatchToken(ppszRE, ']')) 1304 | { 1305 | SetLastParseError(REPARSE_ERROR_EMPTY_RANGE); 1306 | return -1; 1307 | } 1308 | 1309 | int p = ParseCharSet(ppszRE, bNot); 1310 | if (p < 0) 1311 | return p; 1312 | if (!MatchToken(ppszRE, ']')) 1313 | { 1314 | SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED); 1315 | return -1; 1316 | } 1317 | 1318 | return p; 1319 | } 1320 | 1321 | int AddMemInstruction(REInstructionType type) 1322 | { 1323 | int p = AddInstruction(type); 1324 | if (p < 0) 1325 | return p; 1326 | GetInstruction(p).memory.nIndex = m_uRequiredMem++; 1327 | return p; 1328 | } 1329 | 1330 | // helper for parsing !SE 1331 | int ParseNot(const RECHAR **ppszRE, bool &bEmpty) 1332 | { 1333 | int nStoreCP = AddMemInstruction(RE_STORE_CHARPOS); 1334 | int nStoreSP = AddMemInstruction(RE_STORE_STACKPOS); 1335 | 1336 | int nCall = AddInstruction(RE_CALL); 1337 | if (nCall < 0) 1338 | return -1; 1339 | 1340 | int nGetCP = AddInstruction(RE_GET_CHARPOS); 1341 | if (nGetCP < 0) 1342 | return -1; 1343 | GetInstruction(nGetCP).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex; 1344 | 1345 | int nGetSP = AddInstruction(RE_GET_STACKPOS); 1346 | if (nGetSP < 0) 1347 | return -1; 1348 | GetInstruction(nGetSP).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex; 1349 | 1350 | int nJmp = AddInstruction(RE_JMP); 1351 | if (nJmp < 0) 1352 | return -1; 1353 | 1354 | int nSE = ParseSE(ppszRE, bEmpty); 1355 | if (nSE < 0) 1356 | return nSE; 1357 | 1358 | // patch the call 1359 | GetInstruction(nCall).call.nTarget = nSE; 1360 | 1361 | int nGetCP1 = AddInstruction(RE_GET_CHARPOS); 1362 | if (nGetCP1 < 0) 1363 | return -1; 1364 | GetInstruction(nGetCP1).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex; 1365 | 1366 | int nGetSP1 = AddInstruction(RE_GET_STACKPOS); 1367 | if (nGetSP1 < 0) 1368 | return -1; 1369 | GetInstruction(nGetSP1).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex; 1370 | 1371 | int nRet = AddInstruction(RE_RETURN); 1372 | if (nRet < 0) 1373 | return -1; 1374 | 1375 | GetInstruction(nJmp).jmp.nTarget = nRet+1; 1376 | 1377 | return nStoreCP; 1378 | } 1379 | 1380 | // ParseAbbrev: parse grammar rule Abbrev 1381 | int ParseAbbrev(const RECHAR **ppszRE, bool &bEmpty) 1382 | { 1383 | const RECHAR **szAbbrevs = CharTraits::GetAbbrevs(); 1384 | 1385 | while (*szAbbrevs) 1386 | { 1387 | if (**ppszRE == **szAbbrevs) 1388 | { 1389 | const RECHAR *szAbbrev = (*szAbbrevs)+1; 1390 | int p = ParseE(&szAbbrev, bEmpty); 1391 | if (p < 0) 1392 | { 1393 | SetLastParseError(REPARSE_ERROR_UNEXPECTED); 1394 | return p; 1395 | } 1396 | *ppszRE = CharTraits::Next(*ppszRE); 1397 | return p; 1398 | } 1399 | szAbbrevs++; 1400 | } 1401 | return -1; 1402 | } 1403 | 1404 | // ParseSE: parse grammar rule SE (simple expression) 1405 | int ParseSE(const RECHAR **ppszRE, bool &bEmpty) 1406 | { 1407 | 1408 | if (MatchToken(ppszRE, '{')) 1409 | return ParseArg(ppszRE, bEmpty); 1410 | if (MatchToken(ppszRE, '(')) 1411 | return ParseGroup(ppszRE, bEmpty); 1412 | if (MatchToken(ppszRE, '[')) 1413 | return ParseCharClass(ppszRE, bEmpty); 1414 | 1415 | if (MatchToken(ppszRE, '\\')) 1416 | { 1417 | if (!CharTraits::Isdigit(**ppszRE)) 1418 | { 1419 | // check for abbreviations 1420 | int p; 1421 | p = ParseAbbrev(ppszRE, bEmpty); 1422 | if (p >= 0) 1423 | return p; 1424 | 1425 | if (GetLastParseError()) 1426 | return -1; 1427 | 1428 | // escaped char 1429 | p = AddInstruction(RE_SYMBOL); 1430 | if (p < 0) 1431 | return -1; 1432 | GetInstruction(p).symbol.nSymbol = (int) **ppszRE; 1433 | *ppszRE = CharTraits::Next(*ppszRE); 1434 | return p; 1435 | } 1436 | // previous match 1437 | bEmpty = false; 1438 | int nPrev = AddInstruction(RE_PREVIOUS); 1439 | if (nPrev < 0) 1440 | return -1; 1441 | 1442 | UINT uValue = (UINT) CharTraits::Strtol(*ppszRE, (RECHAR **) ppszRE, 10); 1443 | if (uValue >= m_uNumGroups) 1444 | { 1445 | SetLastParseError(REPARSE_ERROR_INVALID_GROUP); 1446 | return -1; 1447 | } 1448 | GetInstruction(nPrev).prev.nGroup = (size_t) uValue; 1449 | return nPrev; 1450 | } 1451 | 1452 | if (MatchToken(ppszRE, '!')) 1453 | return ParseNot(ppszRE, bEmpty); 1454 | 1455 | if (**ppszRE == '}' || **ppszRE == ']' || **ppszRE == ')') 1456 | { 1457 | return -1; 1458 | } 1459 | 1460 | if (**ppszRE == '\0') 1461 | { 1462 | return -1; 1463 | } 1464 | 1465 | int p; 1466 | if (**ppszRE == '.') 1467 | { 1468 | p = AddInstruction(RE_ANY); 1469 | if (p < 0) 1470 | return -1; 1471 | bEmpty = false; 1472 | } 1473 | else if (**ppszRE == '$' && (*ppszRE)[1] == '\0') 1474 | { 1475 | p = AddInstruction(RE_SYMBOL); 1476 | if (p < 0) 1477 | return -1; 1478 | GetInstruction(p).symbol.nSymbol = 0; 1479 | bEmpty = false; 1480 | } 1481 | else 1482 | { 1483 | p = AddInstruction(RE_SYMBOL); 1484 | if (p < 0) 1485 | return -1; 1486 | GetInstruction(p).symbol.nSymbol = (int) **ppszRE; 1487 | bEmpty = false; 1488 | } 1489 | *ppszRE = CharTraits::Next(*ppszRE); 1490 | return p; 1491 | } 1492 | 1493 | // ParseE: parse grammar rule E (expression) 1494 | int ParseE(const RECHAR **ppszRE, bool &bEmpty) 1495 | { 1496 | CParseState ParseState(this); 1497 | const RECHAR *sz = *ppszRE; 1498 | 1499 | int nSE; 1500 | 1501 | int nFirst = ParseSE(ppszRE, bEmpty); 1502 | if (nFirst < 0) 1503 | return nFirst; 1504 | 1505 | REInstructionType type = RE_MATCH; 1506 | 1507 | if (MatchToken(ppszRE, '*')) 1508 | if(MatchToken(ppszRE, '?')) 1509 | type = RE_NG_STAR_BEGIN; 1510 | else 1511 | type = RE_STAR_BEGIN; 1512 | 1513 | 1514 | else if (MatchToken(ppszRE, '+')) 1515 | if(MatchToken(ppszRE, '?')) 1516 | type = RE_NG_PLUS; 1517 | else 1518 | type = RE_PLUS; 1519 | 1520 | else if (MatchToken(ppszRE, '?')) 1521 | if(MatchToken(ppszRE, '?')) 1522 | type = RE_NG_QUESTION; 1523 | else 1524 | type = RE_QUESTION; 1525 | 1526 | 1527 | if (type == RE_MATCH) 1528 | return nFirst; 1529 | 1530 | if (type == RE_STAR_BEGIN || type == RE_QUESTION|| type == RE_NG_STAR_BEGIN || type == RE_NG_QUESTION) 1531 | { 1532 | ParseState.Restore(this); 1533 | } 1534 | else 1535 | { 1536 | m_uNumGroups = ParseState.m_uNumGroups; 1537 | } 1538 | *ppszRE = sz; 1539 | 1540 | int nE; 1541 | 1542 | if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS || type == RE_NG_QUESTION) // Non-Greedy 1543 | { 1544 | int nCall = AddInstruction(RE_CALL); 1545 | if (nCall < 0) 1546 | return -1; 1547 | 1548 | bEmpty = false; 1549 | 1550 | nSE = ParseSE(ppszRE, bEmpty); 1551 | if (nSE < 0) 1552 | return nSE; 1553 | 1554 | if (bEmpty && (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS)) 1555 | { 1556 | SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP); 1557 | return -1; 1558 | } 1559 | bEmpty = true; 1560 | 1561 | *ppszRE = CharTraits::Next(*ppszRE); 1562 | *ppszRE = CharTraits::Next(*ppszRE); 1563 | 1564 | if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS) 1565 | { 1566 | int nJmp = AddInstruction(RE_JMP); 1567 | if (nJmp < 0) 1568 | return -1; 1569 | GetInstruction(nCall).call.nTarget = nJmp+1; 1570 | GetInstruction(nJmp).jmp.nTarget = nCall; 1571 | } 1572 | else 1573 | GetInstruction(nCall).call.nTarget = nSE+1; 1574 | 1575 | if (type == RE_NG_PLUS) 1576 | nE = nFirst; 1577 | else 1578 | nE = nCall; 1579 | } 1580 | else // Greedy 1581 | { 1582 | 1583 | int nPushMem = AddInstruction(RE_PUSH_MEMORY); 1584 | if (nPushMem < 0) 1585 | return -1; 1586 | 1587 | int nStore = AddInstruction(RE_STORE_CHARPOS); 1588 | if (nStore < 0) 1589 | return -1; 1590 | 1591 | if (AddInstruction(RE_PUSH_CHARPOS) < 0) 1592 | return -1; 1593 | 1594 | int nCall = AddInstruction(RE_CALL); 1595 | if (nCall < 0) 1596 | return -1; 1597 | 1598 | if (AddInstruction(RE_POP_CHARPOS) < 0) 1599 | return -1; 1600 | 1601 | int nPopMem = AddInstruction(RE_POP_MEMORY); 1602 | if (nPopMem < 0) 1603 | return -1; 1604 | 1605 | int nJmp = AddInstruction(RE_JMP); 1606 | if (nJmp < 0) 1607 | return -1; 1608 | 1609 | GetInstruction(nPushMem).memory.nIndex = m_uRequiredMem++; 1610 | GetInstruction(nStore).memory.nIndex = GetInstruction(nPushMem).memory.nIndex; 1611 | GetInstruction(nCall).call.nTarget = nJmp+1; 1612 | GetInstruction(nPopMem).memory.nIndex = GetInstruction(nPushMem).memory.nIndex; 1613 | 1614 | bEmpty = false; 1615 | 1616 | nSE = ParseSE(ppszRE, bEmpty); 1617 | if (nSE < 0) 1618 | return nSE; 1619 | 1620 | if (bEmpty && (type == RE_STAR_BEGIN || type == RE_PLUS)) 1621 | { 1622 | SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP); 1623 | return -1; 1624 | } 1625 | 1626 | if (type != RE_PLUS && type != RE_NG_PLUS) 1627 | bEmpty = true; 1628 | 1629 | *ppszRE = CharTraits::Next(*ppszRE); 1630 | 1631 | 1632 | int nRetNoMatch = AddInstruction(RE_RET_NOMATCH); 1633 | if (nRetNoMatch < 0) 1634 | return -1; 1635 | 1636 | int nStore1 = AddInstruction(RE_STORE_CHARPOS); 1637 | if (nStore1 < 0) 1638 | return -1; 1639 | 1640 | GetInstruction(nRetNoMatch).memory.nIndex = GetInstruction(nPushMem).memory.nIndex; 1641 | GetInstruction(nStore1).memory.nIndex = GetInstruction(nPushMem).memory.nIndex; 1642 | 1643 | if (type != RE_QUESTION) 1644 | { 1645 | int nJmp1 = AddInstruction(RE_JMP); 1646 | if (nJmp1 < 0) 1647 | return -1; 1648 | GetInstruction(nJmp1).jmp.nTarget = nPushMem; 1649 | } 1650 | 1651 | GetInstruction(nJmp).jmp.nTarget = m_Instructions.GetCount(); 1652 | if (type == RE_PLUS) 1653 | nE = nFirst; 1654 | else 1655 | nE = nPushMem; 1656 | } 1657 | 1658 | return nE; 1659 | } 1660 | 1661 | 1662 | // ParseAltE: parse grammar rule AltE 1663 | int ParseAltE(const RECHAR **ppszRE, bool &bEmpty) 1664 | { 1665 | const RECHAR *sz = *ppszRE; 1666 | CParseState ParseState(this); 1667 | 1668 | int nPush = AddInstruction(RE_PUSH_CHARPOS); 1669 | if (nPush < 0) 1670 | return -1; 1671 | 1672 | int nCall = AddInstruction(RE_CALL); 1673 | if (nCall < 0) 1674 | return -1; 1675 | 1676 | GetInstruction(nCall).call.nTarget = nPush+4; 1677 | if (AddInstruction(RE_POP_CHARPOS) < 0) 1678 | return -1; 1679 | 1680 | int nJmpNext = AddInstruction(RE_JMP); 1681 | if (nJmpNext < 0) 1682 | return -1; 1683 | 1684 | int nE = ParseE(ppszRE, bEmpty); 1685 | if (nE < 0) 1686 | { 1687 | if (GetLastParseError()) 1688 | return -1; 1689 | ParseState.Restore(this); 1690 | return nE; 1691 | } 1692 | 1693 | int nJmpEnd = AddInstruction(RE_JMP); 1694 | if (nJmpEnd < 0) 1695 | return -1; 1696 | 1697 | GetInstruction(nJmpNext).jmp.nTarget = nJmpEnd+1; 1698 | 1699 | if (!MatchToken(ppszRE, '|')) 1700 | { 1701 | ParseState.Restore(this); 1702 | *ppszRE = sz; 1703 | 1704 | return ParseE(ppszRE, bEmpty); 1705 | } 1706 | 1707 | bool bEmptyAltE; 1708 | int nAltE = ParseAltE(ppszRE, bEmptyAltE); 1709 | GetInstruction(nJmpEnd).jmp.nTarget = m_Instructions.GetCount(); 1710 | GetInstruction(nJmpNext).jmp.nTarget = nAltE; 1711 | if (nAltE < 0) 1712 | { 1713 | if (GetLastParseError()) 1714 | return -1; 1715 | ParseState.Restore(this); 1716 | return nAltE; 1717 | } 1718 | bEmpty = bEmpty | bEmptyAltE; 1719 | return nPush; 1720 | } 1721 | 1722 | // ParseRE: parse grammar rule RE (regular expression) 1723 | int ParseRE(const RECHAR **ppszRE, bool &bEmpty) 1724 | { 1725 | if (**ppszRE == '\0') 1726 | return -1; 1727 | 1728 | int p = ParseAltE(ppszRE, bEmpty); 1729 | if (p < 0) 1730 | return p; 1731 | 1732 | bool bEmptyRE = true; 1733 | ParseRE(ppszRE, bEmptyRE); 1734 | if (GetLastParseError()) 1735 | return -1; 1736 | bEmpty = bEmpty && bEmptyRE; 1737 | return p; 1738 | } 1739 | 1740 | //pointers to the matched string and matched groups, currently point into an internal allocated 1741 | //buffer that hold a copy of the input string. 1742 | //This function fix these pointers to point into the original, user supplied buffer (first param to Match method). 1743 | //Example: If a ptr (szStart) currently point to +3, it is fixed to +3 1744 | void FixupMatchContext(CAtlREMatchContext *pContext, const RECHAR *szOrig, const RECHAR *szNew) 1745 | { 1746 | ATLENSURE(pContext); 1747 | ATLASSERT(szOrig); 1748 | ATLASSERT(szNew); 1749 | 1750 | pContext->m_Match.szStart = szOrig + (pContext->m_Match.szStart - szNew); 1751 | pContext->m_Match.szEnd = szOrig + (pContext->m_Match.szEnd - szNew); 1752 | for (UINT i=0; im_uNumGroups; i++) 1753 | { 1754 | if (pContext->m_Matches[i].szStart==NULL || pContext->m_Matches[i].szEnd==NULL) 1755 | { 1756 | continue; //Do not fix unmatched groups. 1757 | } 1758 | pContext->m_Matches[i].szStart = szOrig + (pContext->m_Matches[i].szStart - szNew); 1759 | pContext->m_Matches[i].szEnd = szOrig + (pContext->m_Matches[i].szEnd - szNew); 1760 | } 1761 | } 1762 | // implementation 1763 | // helpers for dumping and debugging the rx engine 1764 | public: 1765 | #ifdef ATL_REGEXP_DUMP 1766 | size_t DumpInstruction(size_t ip) 1767 | { 1768 | printf("%08x ", ip); 1769 | switch (GetInstruction(ip).type) 1770 | { 1771 | case RE_NOP: 1772 | printf("NOP\n"); 1773 | ip++; 1774 | break; 1775 | 1776 | case RE_SYMBOL: 1777 | AtlprintfT(CAToREChar("Symbol %c\n"),GetInstruction(ip).symbol.nSymbol); 1778 | ip++; 1779 | break; 1780 | 1781 | case RE_ANY: 1782 | printf("Any\n"); 1783 | ip++; 1784 | break; 1785 | 1786 | case RE_RANGE: 1787 | printf("Range\n"); 1788 | ip++; 1789 | ip += InstructionsPerRangeBitField(); 1790 | break; 1791 | 1792 | case RE_NOTRANGE: 1793 | printf("NOT Range\n"); 1794 | ip++; 1795 | ip += InstructionsPerRangeBitField(); 1796 | break; 1797 | 1798 | case RE_RANGE_EX: 1799 | printf("RangeEx %08x\n", GetInstruction(ip).range.nTarget); 1800 | ip++; 1801 | break; 1802 | 1803 | case RE_NOTRANGE_EX: 1804 | printf("NotRangeEx %08x\n", GetInstruction(ip).range.nTarget); 1805 | ip++; 1806 | break; 1807 | 1808 | case RE_GROUP_START: 1809 | printf("Start group %d\n", GetInstruction(ip).group.nGroup); 1810 | ip++; 1811 | break; 1812 | 1813 | case RE_GROUP_END: 1814 | printf("Group end %d\n", GetInstruction(ip).group.nGroup); 1815 | ip++; 1816 | break; 1817 | 1818 | case RE_PUSH_CHARPOS: 1819 | printf("Push char pos\n"); 1820 | ip++; 1821 | break; 1822 | 1823 | case RE_POP_CHARPOS: 1824 | printf("Pop char pos\n"); 1825 | ip++; 1826 | break; 1827 | 1828 | case RE_STORE_CHARPOS: 1829 | printf("Store char pos %d\n", GetInstruction(ip).memory.nIndex); 1830 | ip++; 1831 | break; 1832 | 1833 | case RE_GET_CHARPOS: 1834 | printf("Get char pos %d\n", GetInstruction(ip).memory.nIndex); 1835 | ip++; 1836 | break; 1837 | 1838 | case RE_STORE_STACKPOS: 1839 | printf("Store stack pos %d\n", GetInstruction(ip).memory.nIndex); 1840 | ip++; 1841 | break; 1842 | 1843 | case RE_GET_STACKPOS: 1844 | printf("Get stack pos %d\n", GetInstruction(ip).memory.nIndex); 1845 | ip++; 1846 | break; 1847 | 1848 | case RE_CALL: 1849 | printf("Call %08x\n", GetInstruction(ip).call.nTarget); 1850 | ip++; 1851 | break; 1852 | 1853 | case RE_JMP: 1854 | printf("Jump %08x\n", GetInstruction(ip).jmp.nTarget); 1855 | ip++; 1856 | break; 1857 | 1858 | case RE_RETURN: 1859 | printf("return\n"); 1860 | ip++; 1861 | break; 1862 | 1863 | case RE_PUSH_MEMORY: 1864 | printf("Push memory %08x\n", GetInstruction(ip).memory.nIndex); 1865 | ip++; 1866 | break; 1867 | 1868 | case RE_POP_MEMORY: 1869 | printf("Pop memory %08x\n", GetInstruction(ip).memory.nIndex); 1870 | ip++; 1871 | break; 1872 | 1873 | case RE_RET_NOMATCH: 1874 | printf("Return no match %08x\n", GetInstruction(ip).memory.nIndex); 1875 | ip++; 1876 | break; 1877 | 1878 | case RE_MATCH: 1879 | printf("END\n"); 1880 | ip++; 1881 | break; 1882 | 1883 | case RE_ADVANCE: 1884 | printf("ADVANCE\n"); 1885 | ip++; 1886 | break; 1887 | 1888 | case RE_FAIL: 1889 | printf("FAIL\n"); 1890 | ip++; 1891 | break; 1892 | 1893 | case RE_PREVIOUS: 1894 | printf("Prev %d\n", GetInstruction(ip).prev.nGroup); 1895 | ip++; 1896 | break; 1897 | 1898 | case RE_PUSH_GROUP: 1899 | printf("Push group %d\n", GetInstruction(ip).group.nGroup); 1900 | ip++; 1901 | break; 1902 | 1903 | case RE_POP_GROUP: 1904 | printf("Pop group %d\n", GetInstruction(ip).group.nGroup); 1905 | ip++; 1906 | break; 1907 | 1908 | 1909 | default: 1910 | printf("????\n"); 1911 | ip++; 1912 | break; 1913 | } 1914 | return ip; 1915 | } 1916 | 1917 | void Dump(size_t ipCurrent = 0) 1918 | { 1919 | size_t ip = 0; 1920 | 1921 | while (ip < m_Instructions.GetCount()) 1922 | { 1923 | if (ip == ipCurrent) 1924 | printf("->"); 1925 | ip = DumpInstruction(ip); 1926 | } 1927 | } 1928 | #endif 1929 | 1930 | #ifdef ATLRX_DEBUG 1931 | void cls( HANDLE hConsole ) 1932 | { 1933 | COORD coordScreen = { 0, 0 }; /* here's where we'll home the 1934 | cursor */ 1935 | BOOL bSuccess; 1936 | DWORD cCharsWritten; 1937 | CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ 1938 | DWORD dwConSize; /* number of character cells in 1939 | the current buffer */ 1940 | 1941 | /* get the number of character cells in the current buffer */ 1942 | 1943 | bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi ); 1944 | dwConSize = csbi.dwSize.X * csbi.dwSize.Y; 1945 | 1946 | /* fill the entire screen with blanks */ 1947 | 1948 | bSuccess = FillConsoleOutputCharacter( hConsole, (TCHAR) ' ', 1949 | dwConSize, coordScreen, &cCharsWritten ); 1950 | 1951 | /* get the current text attribute */ 1952 | 1953 | bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi ); 1954 | 1955 | /* now set the buffer's attributes accordingly */ 1956 | 1957 | bSuccess = FillConsoleOutputAttribute( hConsole, csbi.wAttributes, 1958 | dwConSize, coordScreen, &cCharsWritten ); 1959 | 1960 | /* put the cursor at (0, 0) */ 1961 | 1962 | bSuccess = SetConsoleCursorPosition( hConsole, coordScreen ); 1963 | return; 1964 | } 1965 | 1966 | void DumpStack(CAtlREMatchContext *pContext) 1967 | { 1968 | for (size_t i=pContext->m_nTos; i>0; i--) 1969 | { 1970 | if (pContext->m_stack[i] < (void *) m_Instructions.GetCount()) 1971 | printf("0x%p\n", pContext->m_stack[i]); 1972 | else 1973 | { 1974 | // assume a pointer into the input 1975 | AtlprintfT(CAToREChar("%s\n"), pContext->m_stack[i]); 1976 | } 1977 | } 1978 | } 1979 | 1980 | void DumpMemory(CAtlREMatchContext *pContext) 1981 | { 1982 | for (UINT i=0; i(CAToREChar("%d: %s\n"), i, pContext->m_Mem.m_p[i]); 1985 | } 1986 | } 1987 | 1988 | virtual void OnDebugEvent(size_t ip, const RECHAR *szIn, const RECHAR *sz, CAtlREMatchContext *pContext) 1989 | { 1990 | cls(GetStdHandle(STD_OUTPUT_HANDLE)); 1991 | printf("----------Code---------\n"); 1992 | Dump(ip); 1993 | printf("----------Input---------\n"); 1994 | AtlprintfT(CAToREChar("%s\n"), szIn); 1995 | for (int s=0; szIn+s < sz; s++) 1996 | { 1997 | printf(" "); 1998 | } 1999 | printf("^\n"); 2000 | printf("----------Memory---------\n"); 2001 | DumpMemory(pContext); 2002 | printf("----------Stack---------\n"); 2003 | DumpStack(pContext); 2004 | getchar(); 2005 | } 2006 | #endif 2007 | 2008 | }; 2009 | 2010 | } // namespace ATL 2011 | #pragma pack(pop) 2012 | 2013 | #endif // __ATLRX_H__ 2014 | --------------------------------------------------------------------------------