├── .clang-format ├── .gitignore ├── .travis.yml ├── .travis ├── install.sh └── script.sh ├── CMakeLists.txt ├── LICENSE ├── NOTICE ├── README.md ├── autoload ├── cpsm.py └── cpsm.vim ├── bench ├── bench.py ├── bench_cpsm.py └── linuxclock.py ├── cmake ├── FindICU.cmake └── FindPythonConfig.cmake ├── install.sh └── src ├── .ycm_extra_conf.py ├── api.h ├── cpsm_cli_main.cc ├── ctrlp_util.cc ├── ctrlp_util.h ├── matcher.h ├── matcher_test.cc ├── par_util.h ├── path_util.h ├── python_extension.cc ├── str_util.cc └── str_util.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -1 5 | AlignAfterOpenBracket: true 6 | AlignEscapedNewlinesLeft: true 7 | AlignOperands: true 8 | AlignTrailingComments: true 9 | AllowAllParametersOfDeclarationOnNextLine: true 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortIfStatementsOnASingleLine: true 13 | AllowShortLoopsOnASingleLine: true 14 | AllowShortFunctionsOnASingleLine: All 15 | AlwaysBreakAfterDefinitionReturnType: false 16 | AlwaysBreakTemplateDeclarations: true 17 | AlwaysBreakBeforeMultilineStrings: true 18 | BreakBeforeBinaryOperators: None 19 | BreakBeforeTernaryOperators: true 20 | BreakConstructorInitializersBeforeComma: false 21 | BinPackParameters: true 22 | BinPackArguments: true 23 | ColumnLimit: 80 24 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 25 | ConstructorInitializerIndentWidth: 4 26 | DerivePointerAlignment: true 27 | ExperimentalAutoDetectBinPacking: false 28 | IndentCaseLabels: true 29 | IndentWrappedFunctionNames: false 30 | IndentFunctionDeclarationAfterType: false 31 | MaxEmptyLinesToKeep: 1 32 | KeepEmptyLinesAtTheStartOfBlocks: false 33 | NamespaceIndentation: None 34 | ObjCBlockIndentWidth: 2 35 | ObjCSpaceAfterProperty: false 36 | ObjCSpaceBeforeProtocolList: false 37 | PenaltyBreakBeforeFirstCallParameter: 1 38 | PenaltyBreakComment: 300 39 | PenaltyBreakString: 1000 40 | PenaltyBreakFirstLessLess: 120 41 | PenaltyExcessCharacter: 1000000 42 | PenaltyReturnTypeOnItsOwnLine: 200 43 | PointerAlignment: Left 44 | SpacesBeforeTrailingComments: 2 45 | Cpp11BracedListStyle: true 46 | Standard: Auto 47 | IndentWidth: 2 48 | TabWidth: 8 49 | UseTab: Never 50 | BreakBeforeBraces: Attach 51 | SpacesInParentheses: false 52 | SpacesInSquareBrackets: false 53 | SpacesInAngles: false 54 | SpaceInEmptyParentheses: false 55 | SpacesInCStyleCastParentheses: false 56 | SpaceAfterCStyleCast: false 57 | SpacesInContainerLiterals: true 58 | SpaceBeforeAssignmentOperators: true 59 | ContinuationIndentWidth: 4 60 | CommentPragmas: '^ IWYU pragma:' 61 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 62 | SpaceBeforeParens: ControlStatements 63 | DisableFormat: false 64 | ... 65 | 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Output directories 2 | bin/ 3 | build/ 4 | 5 | # Vim 6 | *[._]s[a-w][a-z] 7 | 8 | # C/C++ 9 | *.o 10 | *.so 11 | 12 | # Python 13 | *.py[cod] 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Language must be "generic" to get minimal Trusty image. 2 | language: generic 3 | 4 | env: 5 | - TEST_PY=py2 6 | - TEST_PY=py3 7 | 8 | os: 9 | - linux 10 | - osx 11 | 12 | # Need Trusty for version of GCC that supports C++11. 13 | dist: trusty 14 | sudo: required 15 | addons: 16 | apt: 17 | packages: 18 | - cmake 19 | - libboost-dev 20 | - libboost-program-options-dev 21 | - python-dev 22 | - python3-dev 23 | 24 | install: ./.travis/install.sh 25 | 26 | script: ./.travis/script.sh 27 | -------------------------------------------------------------------------------- /.travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | if [[ "${TRAVIS_OS_NAME}" == 'osx' ]]; then 8 | brew update 9 | # Skip updating Boost, since doing so takes a long time and we'd like to 10 | # know about compatibility breakage anyway. 11 | brew install cmake || brew outdated cmake || brew upgrade cmake 12 | case "${TEST_PY}" in 13 | py2) brew install python || brew outdated python || brew upgrade python;; 14 | py3) brew install python3 || brew outdated python3 || brew upgrade python3;; 15 | esac 16 | fi 17 | -------------------------------------------------------------------------------- /.travis/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | case "${TEST_PY}" in 8 | py2) PY3=OFF ./install.sh;; 9 | py3) PY3=ON ./install.sh;; 10 | *) echo "Unknown TEST_PY: ${TEST_PY}"; false;; 11 | esac 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | 3 | project(cpsm) 4 | option(PY3 "Build for python3 instead of python2." OFF) 5 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 6 | include_directories(${PROJECT_SOURCE_DIR}/src) 7 | set(CMAKE_BUILD_TYPE Release) 8 | if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU|Clang|Intel") 9 | add_compile_options(-std=c++0x) 10 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") 11 | # Versions of MSVC that support C++11 at all support it by default, so hope 12 | # for the best... 13 | else() 14 | message(FATAL_ERROR "Unknown compiler - don't know how to turn on C++11.") 15 | endif() 16 | set(CMAKE_MACOSX_RPATH 1) 17 | 18 | find_package(Threads REQUIRED) 19 | 20 | set(Boost_USE_MULTITHREADED ON) 21 | find_package(Boost REQUIRED COMPONENTS program_options) 22 | include_directories(${Boost_INCLUDE_DIRS}) 23 | 24 | if(PY3) 25 | set(Python_ADDITIONAL_VERSIONS 3.8 3.7 3.6 3.5 3.4 3.3) 26 | find_package(PythonInterp 3 REQUIRED) 27 | else() 28 | set(Python_ADDITIONAL_VERSIONS 2.7 2.6) 29 | find_package(PythonInterp REQUIRED) 30 | endif() 31 | find_package(PythonConfig REQUIRED) 32 | 33 | find_package(ICU) 34 | if(ICU_FOUND) 35 | include_directories(${ICU_INCLUDE_DIRS}) 36 | add_definitions(-DCPSM_CONFIG_ICU=1) 37 | endif() 38 | 39 | add_library(cpsm_core src/str_util.cc) 40 | if(ICU_FOUND) 41 | target_link_libraries(cpsm_core ${ICU_LIBRARIES}) 42 | endif() 43 | target_link_libraries(cpsm_core ${CMAKE_THREAD_LIBS_INIT}) 44 | set_target_properties(cpsm_core PROPERTIES COMPILE_FLAGS "-fPIC") 45 | 46 | add_library(cpsm_py SHARED src/ctrlp_util.cc src/python_extension.cc) 47 | target_link_libraries(cpsm_py cpsm_core) 48 | set_target_properties(cpsm_py PROPERTIES COMPILE_FLAGS ${PYTHON_COMPILE_FLAGS}) 49 | set_target_properties(cpsm_py PROPERTIES LINK_FLAGS ${PYTHON_LINK_FLAGS}) 50 | set_target_properties(cpsm_py PROPERTIES PREFIX "") 51 | if(APPLE) 52 | set_target_properties(cpsm_py PROPERTIES SUFFIX ".so") 53 | endif() 54 | install(TARGETS cpsm_py DESTINATION ${PROJECT_SOURCE_DIR}/autoload) 55 | 56 | add_executable(cpsm_cli src/cpsm_cli_main.cc) 57 | target_link_libraries(cpsm_cli cpsm_core ${Boost_PROGRAM_OPTIONS_LIBRARIES}) 58 | install(TARGETS cpsm_cli DESTINATION ${PROJECT_SOURCE_DIR}/bin) 59 | 60 | enable_testing() 61 | 62 | add_executable(matcher_test src/matcher_test.cc) 63 | target_link_libraries(matcher_test cpsm_core) 64 | add_test(matcher_test matcher_test) 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | nixprime/cpsm 2 | Copyright 2015 the Authors 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cpsm 2 | ==== 3 | 4 | [![Build Status](https://travis-ci.org/nixprime/cpsm.svg?branch=master)](https://travis-ci.org/nixprime/cpsm) 5 | 6 | cpsm is a matcher for [CtrlP][]. Although it supports all kinds of queries, it 7 | is highly optimized for file paths (and, to a lesser extent, similar strings 8 | like identifiers in source code). 9 | 10 | Motivation 11 | ---------- 12 | 13 | There are a lot of different way to manage multiple files in Vim. The goal of 14 | cpsm is to enable a particular one based on CtrlP: 15 | 16 | 1. Completely forget about the current set of open buffers. 17 | 18 | 2. When you want to open a file, invoke CtrlP and type - at most - a handful of 19 | immediately obvious letters in the file's name or path, like the beginning 20 | of its filename. 21 | 22 | 3. Get immediate visual feedback from CtrlP as to whether or not it has 23 | correctly determined what file you want. 24 | 25 | 4. Hit Enter to open the file you wanted in the current window. 26 | 27 | To achieve this, cpsm needs to deliver: 28 | 29 | - high quality search results (at sufficiently high levels of quality, it's 30 | possible to enter a short query, hit Enter without needing to look at and 31 | mentally parse the top match, and have a reasonable amount of confidence that 32 | CtrlP/cpsm got your file right anyway) 33 | 34 | - with as little user input as possible (every keystroke matters because of how 35 | common switching between files is) 36 | 37 | - with as little latency as possible (to support scaling to very large, and 38 | especially very deeply nested, code bases with very long pathnames) 39 | 40 | See the "Performance" section below for both search quality and time 41 | comparisons to other matchers. 42 | 43 | Requirements 44 | ------------ 45 | 46 | - Vim 7.4, compiled with the `+python` flag. 47 | 48 | - A C++ compiler supporting C++11. 49 | 50 | - Boost (Ubuntu: package `libboost-all-dev`). 51 | 52 | - CMake (Ubuntu: package `cmake`). 53 | 54 | - Python headers (Ubuntu: package `python-dev`). 55 | 56 | - Optional, required for Unicode support: ICU (Ubuntu: package `libicu-dev`). 57 | 58 | Pyenv users on Mac OS X: cpsm requires dynamic Python libraries, which pyenv 59 | does not build by default. If CMake indicates that it's using static libraries, 60 | e.g.: 61 | 62 | ``` 63 | -- Found PythonLibs: /Users//.pyenv/versions/3.5.2/lib/python3.5/config-3.5m/libpython3.5m.a 64 | ``` 65 | 66 | Rebuild with dynamic library support by running `env 67 | PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install `. 68 | 69 | Python 3 Support 70 | ---------------- 71 | 72 | If your Vim is compiled against Python 3 instead (`+python3` flag) the install 73 | script should detect this in most cases and everything should just work. You may 74 | need different python headers installed (e.g. `python3-dev` on Ubuntu). 75 | 76 | If the detection does not work for any reason you can set `PY3=ON` or `PY3=OFF` 77 | as appropriate when running `./install.sh` to override it. 78 | 79 | Installation 80 | ------------ 81 | 82 | 1. Install cpsm using your favorite Vim package manager. For example, with 83 | [Vundle](http://github.com/gmarik/Vundle.vim), this consists of adding: 84 | 85 | Vundle 'nixprime/cpsm' 86 | 87 | to your `vimrc` and then running `:PluginInstall` from Vim. 88 | 89 | 2. Build the Python module. On Linux, `cd` into `~/.vim/bundle/cpsm` and run 90 | `./install.sh`. Otherwise, peek inside `install.sh` and see what it does. 91 | 92 | 3. Add: 93 | 94 | let g:ctrlp_match_func = {'match': 'cpsm#CtrlPMatch'} 95 | 96 | to your `vimrc`. 97 | 98 | Options 99 | ------- 100 | 101 | All of the following options are set by adding 102 | 103 | let (option name) = (option value) 104 | 105 | to your .vimrc. 106 | 107 | - As shown below, cpsm will still attempt to perform matching on an empty query 108 | based on the open file in the current buffer. This interacts badly with e.g. 109 | CtrlPMRU. To disable all matching on empty queries, set 110 | `g:cpsm_match_empty_query` to 0. If you want empty query matching to only be 111 | disabled for MRU mode, it's recommended that you configure this in your 112 | bindings, e.g.: 113 | 114 | nnoremap :let g:cpsm_match_empty_query = 0:CtrlPMRU 115 | nnoremap :let g:cpsm_match_empty_query = 1:CtrlP 116 | 117 | Note that if you do the above, you may need to prevent CtrlP from overriding 118 | your binding by setting `let g:ctrlp_map = ''`. 119 | 120 | - `g:cpsm_highlight_mode` controls how matches are highlighted. Valid highlight 121 | modes are: 122 | 123 | - "none": Do not highlight any match characters. 124 | 125 | - "basic": Highlight the entire region between the leftmost and rightmost 126 | matched characters. 127 | 128 | - "detailed": Highlight each matched character. 129 | 130 | The default is "detailed". The highlight group used to highlight matched 131 | characters is "CtrlPMatch" (the same as for CtrlP's default matcher). 132 | 133 | - By default, cpsm will automatically detect the number of matcher threads 134 | based on the available hardware concurrency. To limit the number of threads 135 | that cpsm can use, set `g:cpsm_max_threads`. 136 | 137 | - When `g:cpsm_query_inverting_delimiter` is set to a single character, it can 138 | be used in a query to move the part of the query after it to the part of the 139 | query before it. For example, if `g:cpsm_query_inverting_delimiter` is a 140 | space, then: 141 | 142 | - A query of "foo" is matched normally. 143 | 144 | - A query of "foo bar" is matched as if it were "barfoo". 145 | 146 | - A query of "foo bar qux" is matched as if it were "quxbarfoo". 147 | 148 | If `g:cpsm_query_inverting_delimiter` is unset or empty, this feature is 149 | disabled. 150 | 151 | - To enable Unicode support, set `g:cpsm_unicode` to 1. Unicode support is 152 | currently very limited, and consists mostly of parsing input strings as UTF-8 153 | and handling the case of non-ASCII letters correctly. 154 | 155 | In addition, cpsm respects the value set for `g:ctrlp_match_current_file`. 156 | 157 | Performance 158 | ----------- 159 | 160 | - The matchers in this comparison: 161 | 162 | - cpsm: cpsm in its default configuration, as accessed through the 163 | cpsm_py Python extension (the same way the Vim plugin works) 164 | 165 | - ctrlp-cmatcher: https://github.com/JazzCore/ctrlp-cmatcher/ 166 | 167 | - ctrlp-py-matcher: https://github.com/FelikZ/ctrlp-py-matcher 168 | 169 | - ctrlp: the default CtrlP matcher 170 | 171 | - fzf: https://github.com/junegunn/fzf 172 | 173 | - All data is measured on Ubuntu 14.04, running in a VirtualBox VM in a Windows 174 | 7 host, on an Intel i5-4670K, with all 4 CPUs visible to the VM. Both the 175 | host and the guest are relatively quiescent while benchmarking. 176 | 177 | - The search corpus consists of the 48728 files in a clean Linux kernel source 178 | repository checked out at the v4.0 tag, as collected by running `ag "" -i 179 | --nocolor --nogroup --hidden --ignore .git -g ""`. 180 | 181 | - For all CtrlP-based matchers, the match mode is "full-line" (the default) and 182 | the limit is 10 (also the default). ctrlp-cmatcher only uses the current 183 | filename to remove it from the list of candidate items; ctrlp-py-matcher 184 | doesn't use it at all; there doesn't seem to be a way to pass this 185 | information to fzf. 186 | 187 | - All times are averages over 100 runs. No timing information is available for 188 | the default CtrlP matcher or fzf because I can't figure out how to run either 189 | in a single-shot standalone configuration. (A quick search finds claims that 190 | ctrlp-cmatcher and ctrlp-py-matcher are both about an order of magnitude 191 | faster than the default matcher. YMMV.) cpsm times include both the default 192 | configuration (automatic selection of number of matcher threads) and with 193 | `max_threads` set to 1. 194 | 195 | - Results (given as the best match and the average time to return matches): 196 | 197 | - Query "", current file "": 198 | 199 | - cpsm: "Kbuild"; 2.863ms (9.946ms with 1 thread) 200 | 201 | - ctrlp: "security/keys/encrypted-keys/Makefile" 202 | 203 | - fzf: "COPYING" 204 | 205 | - All others: "security/capability.c" in roughly zero time 206 | 207 | - Only cpsm and fzf do any ranking; cpsm is falling back on the shortest 208 | filename in the closest directory to the current file (which is the 209 | repository's root), while fzf picks the lexicographically lowest filename 210 | in the root directory. 211 | 212 | - I think the default CtrlP matcher is returning a different result simply 213 | because it gets filenames in a slightly different order from ag (results 214 | for the default matcher are collected by actually running Vim, while the 215 | others use a precomputed list of items; "security/capability.c" is the 216 | first file ag returned in the precomputed list.) 217 | 218 | - Query "", current file "mm/memcontrol.c": 219 | 220 | - cpsm: "include/linux/memcontrol.h"; 2.953ms (10.490ms with 1 thread) 221 | 222 | - All others: same as above 223 | 224 | - "memcontrol" is a sufficiently unique prefix that cpsm returns (IMO) the 225 | best possible default result, "mm/memcontrol.c"'s corresponding header 226 | file, with no query entered whatsoever and with no special knowledge of 227 | the kernel's source layout. 228 | 229 | - It looks like the default CtrlP matcher doesn't use information about the 230 | currently open file either. 231 | 232 | - Query "", current file "kernel/signal.c": 233 | 234 | - cpsm: "include/linux/signal.h"; 2.838ms (10.242ms with 1 thread) 235 | 236 | - All others: same as above 237 | 238 | - "signal" is a significantly more common prefix, but cpsm gets probably 239 | the best match (although this is largely luck; "include/linux" is shorter 240 | than the other likeliest possibility, "include/asm-generic", and all else 241 | being equal cpsm prefers shorter matches on the grounds that it's easier 242 | to add more characters to select a longer match.) 243 | 244 | - Query "x86/", current file "kernel/signal.c": 245 | 246 | - cpsm: "arch/x86/um/signal.c"; 1.932ms (6.693ms with 1 thread) 247 | 248 | - ctrlp-cmatcher: "arch/x86/Kbuild"; 25.034ms 249 | 250 | - ctrlp-py-matcher: "arch/x86/Kbuild"; 27.298ms 251 | 252 | - ctrlp: "tools/perf/arch/x86/util/tsc.h" 253 | 254 | - fzf: "Documentation/x86/early-microcode.txt" 255 | 256 | - Without using the current filename, there is nothing the other matchers 257 | can do to disambiguate the query. 258 | 259 | - The next set of cases simulate a user typing progressively more letters in 260 | a desired file's name ("include/linux/rcupdate.h"), when they happen to be 261 | in a different unrelated file. 262 | 263 | - Query "r", current file "kernel/signal.c": 264 | 265 | - cpsm: "kernel/range.c"; 3.980ms (14.070ms with 1 thread) 266 | 267 | - ctrlp-cmatcher: "README"; 19.825ms 268 | 269 | - ctrlp-py-matcher: "README"; 34.215ms 270 | 271 | - ctrlp: "security/keys/encrypted-keys/Makefile" 272 | 273 | - fzf: "CREDITS" 274 | 275 | - cpsm is much faster than either of the other two benchmarkable matchers 276 | with multithreading enabled, and competitive with ctrlp-cmatcher when 277 | locked to a single thread. 278 | 279 | - Query "rc", current file "kernel/signal.c": 280 | 281 | - cpsm: "kernel/rcu/rcu.h"; 3.891ms (13.827ms with 1 thread) 282 | 283 | - ctrlp-cmatcher: "arch/Kconfig"; 24.391ms 284 | 285 | - ctrlp-py-matcher: "fs/dlm/rcom.h"; 39.328ms 286 | 287 | - ctrlp: "security/capability.c" 288 | 289 | - fzf: "Documentation/circular-buffers.txt" 290 | 291 | - Query "rcu", current file "kernel/signal.c": 292 | 293 | - cpsm: "kernel/rcu/rcu.h"; 2.534ms (9.009ms with 1 thread) 294 | 295 | - ctrlp-cmatcher: "arch/um/Makefile"; 29.619ms 296 | 297 | - ctrlp-py-matcher: "kernel/rcu/rcu.h"; 37.312ms 298 | 299 | - ctrlp: "security/security.c" 300 | 301 | - fzf: "Documentation/circular-buffers.txt" 302 | 303 | - Query "rcup", current file "kernel/signal.c": 304 | 305 | - cpsm: "include/linux/rcupdate.h"; 2.210ms (7.810ms with 1 thread) 306 | 307 | - ctrlp-cmatcher: "kernel/rcu/update.c"; 31.301ms 308 | 309 | - ctrlp-py-matcher: "include/linux/rcupdate.h"; 37.560ms 310 | 311 | - ctrlp: "security/apparmor/include/path.h" 312 | 313 | - fzf: "Documentation/power/suspend-and-cpuhotplug.txt" 314 | 315 | - Skipping the rest of the letter-by-letter results, since cpsm and 316 | ctrlp-py-matcher have already "won": 317 | 318 | - ctrlp-cmatcher stays with "kernel/rcu/update.c" as its best match until 319 | the entire string "rcupdate.h" is used as the query. 320 | 321 | - ctrlp continues to return completely unrelated results for all of the top 322 | 10 until the query "rcupdate", when it suddenly gets the correct best 323 | match. 324 | 325 | - fzf switches to the correct best match after one more letter (query 326 | "rcupd"). 327 | 328 | License 329 | ------- 330 | 331 | This software is licensed under the [Apache License, Version 2.0][LICENSE]. 332 | 333 | [CtrlP]: http://github.com/kien/ctrlp.vim 334 | [LICENSE]: http://www.apache.org/licenses/LICENSE-2.0 335 | -------------------------------------------------------------------------------- /autoload/cpsm.py: -------------------------------------------------------------------------------- 1 | # cpsm - fuzzy path matcher 2 | # Copyright (C) 2015 the Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import print_function 17 | 18 | import os 19 | import sys 20 | import traceback 21 | import vim 22 | 23 | try: 24 | _vim_eval = vim.api.eval 25 | except AttributeError: 26 | # vim.api is a neovim feature. 27 | _vim_eval = vim.eval 28 | 29 | script_dir = _vim_eval("s:script_dir") 30 | sys.path.append(script_dir) 31 | import cpsm_py 32 | 33 | def ctrlp_match_with(**kwargs): 34 | """ 35 | Wrapper for cpsm_py.ctrlp_match() that converts Vim numbers from strings 36 | back to numbers, and handles exceptions. 37 | """ 38 | try: 39 | for key in ("limit", "ispath", "match_crfile", "max_threads", 40 | "unicode"): 41 | kwargs[key] = int(kwargs[key]) 42 | return cpsm_py.ctrlp_match(**kwargs) 43 | except Exception as ex: 44 | # Log the exception. Unfortunately something CtrlP causes all messages 45 | # to be discarded, so this is only visible in Vim verbose logging. 46 | print("cpsm error:") 47 | traceback.print_exc(file=sys.stdout) 48 | # Return a short error message in the results. 49 | ex_str = str(ex) 50 | if (sys.exc_info()[0] is TypeError and 51 | "function takes at most" in ex_str): 52 | # Most likely due to a new parameter being added to 53 | # cpsm_py.ctrlp_match. 54 | ex_str = "rebuild cpsm by running %s: %s" % ( 55 | os.path.normpath(os.path.join( 56 | script_dir, "..", "install.sh")), 57 | ex_str) 58 | return ["ERROR:" + ex_str], [] 59 | 60 | def _ctrlp_match_evalinput(): 61 | return ctrlp_match_with(**_vim_eval("s:input")) 62 | 63 | def ctrlp_match(): 64 | """ 65 | Deprecated interface that gets arguments by calling vim.eval() and returns 66 | outputs by calling vim.command(). Kept for Denite. Use ctrlp_match_with() 67 | or cpsm_py.ctrlp_match() in new code. 68 | """ 69 | # TODO: a:regex is unimplemented. 70 | results, regexes = ctrlp_match_with( 71 | items=_vim_eval("a:items"), query=_vim_eval("a:str"), 72 | limit=int(_vim_eval("a:limit")), mmode=_vim_eval("a:mmode"), 73 | ispath=int(_vim_eval("a:ispath")), crfile=_vim_eval("a:crfile"), 74 | highlight_mode=_vim_eval("g:cpsm_highlight_mode"), 75 | match_crfile=int(_vim_eval("s:match_crfile")), 76 | max_threads=int(_vim_eval("g:cpsm_max_threads")), 77 | query_inverting_delimiter=_vim_eval("g:cpsm_query_inverting_delimiter"), 78 | regex_line_prefix=_vim_eval("s:regex_line_prefix"), 79 | unicode=int(_vim_eval("g:cpsm_unicode"))) 80 | vim.command("let s:results = [%s]" % ",".join( 81 | map(_escape_and_quote, results))) 82 | vim.command("let s:regexes = [%s]" % ",".join( 83 | map(_escape_and_quote, regexes))) 84 | 85 | def _escape_and_quote(s): 86 | return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' 87 | -------------------------------------------------------------------------------- /autoload/cpsm.vim: -------------------------------------------------------------------------------- 1 | " cpsm - fuzzy path matcher 2 | " Copyright (C) 2015 the Authors 3 | " 4 | " Licensed under the Apache License, Version 2.0 (the "License"); 5 | " you may not use this file except in compliance with the License. 6 | " You may obtain a copy of the License at 7 | " 8 | " http://www.apache.org/licenses/LICENSE-2.0 9 | " 10 | " Unless required by applicable law or agreed to in writing, software 11 | " distributed under the License is distributed on an "AS IS" BASIS, 12 | " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | " See the License for the specific language governing permissions and 14 | " limitations under the License. 15 | 16 | " Global variables and defaults 17 | if !exists('g:cpsm_highlight_mode') 18 | let g:cpsm_highlight_mode = 'detailed' 19 | endif 20 | if !exists('g:cpsm_match_empty_query') 21 | let g:cpsm_match_empty_query = 1 22 | endif 23 | if !exists('g:cpsm_max_threads') 24 | if has('win32unix') 25 | " Synchronization primitives are extremely slow on Cygwin: 26 | " https://cygwin.com/ml/cygwin/2012-08/msg00200.html 27 | let g:cpsm_max_threads = 1 28 | else 29 | let g:cpsm_max_threads = 0 30 | endif 31 | endif 32 | if !exists('g:cpsm_query_inverting_delimiter') 33 | let g:cpsm_query_inverting_delimiter = '' 34 | endif 35 | if !exists('g:cpsm_unicode') 36 | let g:cpsm_unicode = 0 37 | endif 38 | 39 | let s:script_dir = escape(expand(':p:h'), '\') 40 | " s:status is: 41 | " - 0: no Python support, or module loading failed for other reasons 42 | " - 1: cpsm module built with incompatible version of Python 43 | " - 2: cpsm module usable with Python 2 44 | " - 3: cpsm module usable with Python 3 45 | let s:status = 0 46 | if has('python3') 47 | try 48 | execute 'py3file ' . s:script_dir . '/cpsm.py' 49 | let s:status = 3 50 | catch 51 | " Ideally we'd check specifically for the exception 52 | " 'ImportError: dynamic module does not define module export function', 53 | " but Vim's handling of multiline exceptions seems to be completely 54 | " broken. 55 | if !has('python') 56 | let s:status = 1 57 | endif 58 | endtry 59 | endif 60 | if s:status == 0 && has('python') 61 | try 62 | execute 'pyfile ' . s:script_dir . '/cpsm.py' 63 | let s:status = 2 64 | catch 65 | let s:status = 1 66 | endtry 67 | endif 68 | 69 | function cpsm#CtrlPMatch(items, str, limit, mmode, ispath, crfile, regex) 70 | if !has('python3') && !has('python') 71 | return ['ERROR: cpsm requires Vim built with Python or Python3 support'] 72 | elseif s:status == 0 73 | return ['ERROR: failed to load cpsm module'] 74 | elseif s:status == 1 75 | return ['ERROR: cpsm built with version of Python not supported by Vim'] 76 | endif 77 | 78 | if empty(a:str) && g:cpsm_match_empty_query == 0 79 | let s:results = a:items[0:(a:limit)] 80 | let s:regexes = [] 81 | else 82 | let s:match_crfile = exists('g:ctrlp_match_current_file') ? g:ctrlp_match_current_file : 0 83 | let s:regex_line_prefix = '> ' 84 | if exists('g:ctrlp_line_prefix') 85 | let s:regex_line_prefix = g:ctrlp_line_prefix 86 | endif 87 | let s:input = { 88 | \ 'items': a:items, 89 | \ 'query': a:str, 90 | \ 'limit': a:limit, 91 | \ 'mmode': a:mmode, 92 | \ 'ispath': a:ispath, 93 | \ 'crfile': a:crfile, 94 | \ 'highlight_mode': g:cpsm_highlight_mode, 95 | \ 'match_crfile': s:match_crfile, 96 | \ 'max_threads': g:cpsm_max_threads, 97 | \ 'query_inverting_delimiter': g:cpsm_query_inverting_delimiter, 98 | \ 'regex_line_prefix': s:regex_line_prefix, 99 | \ 'unicode': g:cpsm_unicode, 100 | \ } 101 | if s:status == 3 102 | let s:output = py3eval('_ctrlp_match_evalinput()') 103 | else 104 | let s:output = pyeval('_ctrlp_match_evalinput()') 105 | endif 106 | let s:results = s:output[0] 107 | let s:regexes = s:output[1] 108 | endif 109 | 110 | call clearmatches() 111 | " Apply highlight regexes. 112 | for r in s:regexes 113 | call matchadd('CtrlPMatch', r) 114 | endfor 115 | " CtrlP does this match to hide the leading > in results. 116 | call matchadd('CtrlPLinePre', '^>') 117 | return s:results 118 | endfunction 119 | -------------------------------------------------------------------------------- /bench/bench_cpsm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # cpsm - fuzzy path matcher 4 | # Copyright (C) 2015 the Authors 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from __future__ import print_function 19 | 20 | import argparse 21 | 22 | import bench 23 | import cpsm_py 24 | import linuxclock 25 | 26 | if __name__ == "__main__": 27 | argp = argparse.ArgumentParser() 28 | argp.add_argument("-c", "--count", nargs="?", type=int, default=1, 29 | help="number of matches to show") 30 | argp.add_argument("-n", "--iterations", nargs="?", type=int, 31 | default=bench.DEFAULT_ITERATIONS, 32 | help="number of iterations per query") 33 | argp.add_argument("-t", "--threads", nargs="?", type=int, default=0, 34 | help="number of matcher threads") 35 | args = argp.parse_args() 36 | for query in bench.QUERIES: 37 | times = [] 38 | for _ in xrange(args.iterations): 39 | start = linuxclock.monotonic() 40 | results, _ = cpsm_py.ctrlp_match(bench.ITEMS, query.query, 41 | limit=bench.LIMIT, ispath=True, 42 | crfile=query.cur_file, 43 | max_threads=args.threads) 44 | finish = linuxclock.monotonic() 45 | times.append(finish - start) 46 | print("%s: avg time %fs, results: %s" % ( 47 | query, sum(times) / len(times), results[:args.count])) 48 | -------------------------------------------------------------------------------- /bench/linuxclock.py: -------------------------------------------------------------------------------- 1 | # cpsm - fuzzy path matcher 2 | # Copyright (C) 2015 the Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import print_function 17 | 18 | import ctypes 19 | import os 20 | 21 | # From 22 | CLOCK_REALTIME = 0 23 | CLOCK_MONOTONIC = 1 24 | CLOCK_PROCESS_CPUTIME_ID = 2 25 | CLOCK_THREAD_CPUTIME_ID = 3 26 | CLOCK_MONOTONIC_RAW = 4 27 | CLOCK_REALTIME_COARSE = 5 28 | CLOCK_MONOTONIC_COARSE = 6 29 | CLOCK_BOOTTIME = 7 30 | CLOCK_REALTIME_ALARM = 8 31 | CLOCK_BOOTTIME_ALARM = 9 32 | CLOCK_SGI_CYCLE = 10 33 | CLOCK_TAI = 11 34 | 35 | class Timespec(ctypes.Structure): 36 | _fields_ = [ 37 | ('tv_sec', ctypes.c_long), 38 | ('tv_nsec', ctypes.c_long), 39 | ] 40 | 41 | def to_seconds(self): 42 | return self.tv_sec + (self.tv_nsec * 1e-9) 43 | 44 | _clock_gettime = ctypes.CDLL("librt.so.1", use_errno=True).clock_gettime 45 | _clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(Timespec)] 46 | 47 | def gettime(clock): 48 | """Returns the current time on the given clock as a Timespec.""" 49 | t = Timespec() 50 | if _clock_gettime(clock, ctypes.pointer(t)) != 0: 51 | errno = ctypes.get_errno() 52 | raise OSError(errno, os.strerror(errno)) 53 | return t 54 | 55 | def monotonic(): 56 | """Returns the value (in fractional seconds) of a monotonic clock.""" 57 | return gettime(CLOCK_MONOTONIC_RAW).to_seconds() 58 | -------------------------------------------------------------------------------- /cmake/FindICU.cmake: -------------------------------------------------------------------------------- 1 | # Finds the International Components for Unicode (ICU) Library 2 | # 3 | # ICU_FOUND - True if ICU found. 4 | # ICU_I18N_FOUND - True if ICU's internationalization library found. 5 | # ICU_INCLUDE_DIRS - Directory to include to get ICU headers 6 | # Note: always include ICU headers as, e.g., 7 | # unicode/utypes.h 8 | # ICU_LIBRARIES - Libraries to link against for the common ICU 9 | # ICU_I18N_LIBRARIES - Libraries to link against for ICU internationaliation 10 | # (note: in addition to ICU_LIBRARIES) 11 | # Look for the header file. 12 | find_path( 13 | ICU_INCLUDE_DIR 14 | NAMES unicode/utypes.h 15 | DOC "Include directory for the ICU library") 16 | mark_as_advanced(ICU_INCLUDE_DIR) 17 | # Look for the library. 18 | find_library( 19 | ICU_LIBRARY 20 | NAMES icuuc cygicuuc cygicuuc32 21 | DOC "Libraries to link against for the common parts of ICU") 22 | mark_as_advanced(ICU_LIBRARY) 23 | # Copy the results to the output variables. 24 | if (ICU_INCLUDE_DIR AND ICU_LIBRARY) 25 | set(ICU_FOUND 1) 26 | set(ICU_LIBRARIES ${ICU_LIBRARY}) 27 | set(ICU_INCLUDE_DIRS ${ICU_INCLUDE_DIR}) 28 | set(ICU_VERSION 0) 29 | set(ICU_MAJOR_VERSION 0) 30 | set(ICU_MINOR_VERSION 0) 31 | file(READ "${ICU_INCLUDE_DIR}/unicode/uversion.h" _ICU_VERSION_CONENTS) 32 | string(REGEX REPLACE ".*#define U_ICU_VERSION_MAJOR_NUM ([0-9]+).*" "\\1" ICU_MAJOR_VERSION "${_ICU_VERSION_CONENTS}") 33 | string(REGEX REPLACE ".*#define U_ICU_VERSION_MINOR_NUM ([0-9]+).*" "\\1" ICU_MINOR_VERSION "${_ICU_VERSION_CONENTS}") 34 | set(ICU_VERSION "${ICU_MAJOR_VERSION}.${ICU_MINOR_VERSION}") 35 | # Look for the ICU internationalization libraries 36 | find_library( 37 | ICU_I18N_LIBRARY 38 | NAMES icuin icui18n cygicuin cygicuin32 39 | DOC "Libraries to link against for ICU internationalization") 40 | mark_as_advanced(ICU_I18N_LIBRARY) 41 | if (ICU_I18N_LIBRARY) 42 | set(ICU_I18N_FOUND 1) 43 | set(ICU_I18N_LIBRARIES ${ICU_I18N_LIBRARY}) 44 | else () 45 | set(ICU_I18N_FOUND 0) 46 | set(ICU_I18N_LIBRARIES) 47 | endif () 48 | else () 49 | set(ICU_FOUND 0) 50 | set(ICU_I18N_FOUND 0) 51 | set(ICU_LIBRARIES) 52 | set(ICU_I18N_LIBRARIES) 53 | set(ICU_INCLUDE_DIRS) 54 | set(ICU_VERSION) 55 | set(ICU_MAJOR_VERSION) 56 | set(ICU_MINOR_VERSION) 57 | endif () 58 | if (ICU_FOUND) 59 | if (NOT ICU_FIND_QUIETLY) 60 | message(STATUS "Found ICU header files in ${ICU_INCLUDE_DIRS}") 61 | message(STATUS "Found ICU libraries: ${ICU_LIBRARIES}") 62 | endif () 63 | else () 64 | if (ICU_FIND_REQUIRED) 65 | message(FATAL_ERROR "Could not find ICU") 66 | else () 67 | message(STATUS "Optional package ICU was not found") 68 | endif () 69 | endif () 70 | -------------------------------------------------------------------------------- /cmake/FindPythonConfig.cmake: -------------------------------------------------------------------------------- 1 | # FindPythonConfig 2 | # ---------------- 3 | # 4 | # This module locates Python libraries. 5 | # 6 | # This code sets the following variables: 7 | # 8 | # PYTHONCONFIG_FOUND - have the Python libs been found 9 | # PYTHON_COMPILE_FLAGS - compiler flags required to include Python headers 10 | # PYTHON_LINK_FLAGS - linker flags required to link Python libraries 11 | # 12 | # If calling both `find_package(PythonInterp)` and 13 | # `find_package(PythonConfig)`, call `find_package(PythonInterp)` first. 14 | 15 | include(FindPackageHandleStandardArgs) 16 | 17 | find_package(PythonInterp) 18 | if(PYTHONINTERP_FOUND) 19 | set(_Python_config "${PYTHON_EXECUTABLE}-config") 20 | execute_process(COMMAND ${_Python_config} "--includes" OUTPUT_VARIABLE PYTHON_COMPILE_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE) 21 | # "To embed Python into an application, a new --embed option must be passed 22 | # to python3-config --libs --embed to get -lpython3.8 (link the application 23 | # to libpython). To support both 3.8 and older, try python3-config --libs 24 | # --embed first and fallback to python3-config --libs (without --embed) if 25 | # the previous command fails." - 26 | # https://docs.python.org/3/whatsnew/3.8.html#debug-build-uses-the-same-abi-as-release-build 27 | execute_process(COMMAND ${_Python_config} "--ldflags" "--embed" OUTPUT_VARIABLE PYTHON_LINK_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE PYTHON_LINK_FLAGS_EMBED_RESULT) 28 | if(NOT PYTHON_LINK_FLAGS_EMBED_RESULT EQUAL 0) 29 | message(STATUS "python-config failed, retrying without --embed") 30 | execute_process(COMMAND ${_Python_config} "--ldflags" OUTPUT_VARIABLE PYTHON_LINK_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE) 31 | endif(NOT PYTHON_LINK_FLAGS_EMBED_RESULT EQUAL 0) 32 | set(_Python_config_message "${PYTHON_COMPILE_FLAGS}; ${PYTHON_LINK_FLAGS}") 33 | unset(_Python_config) 34 | else(PYTHONINTERP_FOUND) 35 | message(SEND_ERROR "Python interpreter not found") 36 | endif(PYTHONINTERP_FOUND) 37 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonConfig DEFAULT_MSG _Python_config_message PYTHON_COMPILE_FLAGS PYTHON_LINK_FLAGS) 38 | unset(_Python_config_message) 39 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | function choose_vim { 8 | local vim="${VIM:-}" 9 | if [[ -d "${vim}" ]]; then 10 | # Assume that this is because install.sh is being executed by vim (see 11 | # `:help $VIM`), such that vim is the parent process. 12 | vim="$(readlink /proc/${PPID}/exe)" 13 | # Note that this can fail if /proc/$PPID/exe doesn't exist (e.g. on Mac 14 | # OS X), in which case we fall through to the following. 15 | fi 16 | if [[ -z "${vim}" ]]; then 17 | vim="$(which vim)" 18 | fi 19 | echo "${vim}" 20 | } 21 | 22 | function vim_has { 23 | local vim="$1" 24 | local feature="$2" 25 | 26 | # We can't use `vim --version` because neovim is too unique to print 27 | # +/-python{,3}, so instead we get to play stupid games with script(1). 28 | local uname="$(uname)" 29 | case "${uname}" in 30 | Linux) echo $(script -eqc "${vim} -S <(echo -e \"echo 'x=' . has('${feature}')\\nqa!\")" /dev/null | grep -o 'x=.' | grep -o '[[:digit:]]' -m 1);; 31 | Darwin | FreeBSD) echo $(script -q /dev/null ${vim} -S <(echo -e "echo 'x=' . has('${feature}')\nqa!") | grep -o 'x=.' | grep -o '[[:digit:]]' -m 1);; 32 | *) >&2 echo "ERROR: Unknown uname: ${uname}; Vim feature detection not supported"; false;; 33 | esac 34 | } 35 | 36 | if [ -z "${PY3+x}" ]; then 37 | vim="$(choose_vim)" 38 | echo "PY3 not specified; inferring Python version from ${vim}" 39 | have_py2="$(vim_has ${vim} python)" 40 | have_py3="$(vim_has ${vim} python3)" 41 | if [ "${have_py3}" -eq "1" ]; then 42 | echo "Python 3 selected" 43 | PY3="ON" 44 | elif [ "${have_py2}" -eq "1" ]; then 45 | echo "Python 2 selected" 46 | PY3="OFF" 47 | else 48 | >&2 echo "ERROR: No Python support detected" 49 | false 50 | fi 51 | else 52 | case "${PY3}" in 53 | ON) echo "Python 3 selected by PY3=${PY3}";; 54 | OFF) echo "Python 2 selected by PY3=${PY3}";; 55 | *) >&2 echo "ERROR: invalid PY3=${PY3}"; false;; 56 | esac 57 | fi 58 | 59 | cd "$(dirname "${BASH_SOURCE[0]}")" 60 | rm -rf bin/* build/* 61 | mkdir -p bin build 62 | ( 63 | cd build 64 | cmake -DPY3:BOOL=${PY3} .. 65 | make install && make test 66 | ) 67 | -------------------------------------------------------------------------------- /src/.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | def FlagsForFile(filename, **kwargs): 2 | return { 3 | "flags": [ 4 | "-x", "c++", 5 | "-std=c++11", 6 | "-I", ".", 7 | "-I", "/usr/include/python2.7", 8 | "-Wall", 9 | ], 10 | "do_cache": True, 11 | } 12 | -------------------------------------------------------------------------------- /src/api.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2016 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef CPSM_API_H_ 17 | #define CPSM_API_H_ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | #include "matcher.h" 27 | #include "par_util.h" 28 | #include "str_util.h" 29 | 30 | namespace cpsm { 31 | 32 | // User options that influence match behavior. 33 | struct Options { 34 | private: 35 | // The currently open file. 36 | boost::string_ref crfile_; 37 | 38 | // The maximum number of matches to return. If 0, there is no limit. 39 | std::size_t limit_ = 0; 40 | 41 | // If false, do not consider the currently open file as a candidate. 42 | bool match_crfile_ = false; 43 | 44 | // The number of threads the matcher should use. 45 | unsigned int nr_threads_ = 1; 46 | 47 | // If true, the query and all items are paths. 48 | bool path_ = true; 49 | 50 | // If true, attempt to interpret the query and all items as UTF-8-encoded 51 | // strings. 52 | bool unicode_ = false; 53 | 54 | // If true, pass `match_info` to match sinks. 55 | bool want_match_info_ = false; 56 | 57 | public: 58 | boost::string_ref crfile() const { return crfile_; } 59 | Options& set_crfile(boost::string_ref const crfile) { 60 | crfile_ = crfile; 61 | return *this; 62 | } 63 | 64 | std::size_t limit() const { return limit_; } 65 | Options& set_limit(std::size_t const limit) { 66 | limit_ = limit; 67 | return *this; 68 | } 69 | 70 | bool match_crfile() const { return match_crfile_; } 71 | Options& set_match_crfile(bool const match_crfile) { 72 | match_crfile_ = match_crfile; 73 | return *this; 74 | } 75 | 76 | unsigned int nr_threads() const { return nr_threads_; } 77 | Options& set_nr_threads(unsigned int const nr_threads) { 78 | if (nr_threads < 1) { 79 | throw Error("invalid nr_threads: ", nr_threads); 80 | } 81 | nr_threads_ = nr_threads; 82 | return *this; 83 | } 84 | 85 | bool path() const { return path_; } 86 | Options& set_path(bool const path) { 87 | path_ = path; 88 | return *this; 89 | } 90 | 91 | bool unicode() const { return unicode_; } 92 | Options& set_unicode(bool const unicode) { 93 | unicode_ = unicode; 94 | return *this; 95 | } 96 | 97 | bool want_match_info() const { return want_match_info_; } 98 | Options& set_want_match_info(bool const want_match_info) { 99 | want_match_info_ = want_match_info; 100 | return *this; 101 | } 102 | }; 103 | 104 | namespace detail { 105 | 106 | template 108 | void for_each_match(boost::string_ref const query, Options const& opts, 109 | Source&& src, Sink&& dst); 110 | 111 | } // namespace detail 112 | 113 | // For each item in a list of items, invoke `dst` in descending order of 114 | // compatibility with the given query in the given context with the given 115 | // options. 116 | // 117 | // `Item` must be a default-constructable, movable type with the following 118 | // member functions: 119 | // - `match_key`, which returns a `boost::string_ref` representing the string 120 | // that the query should match against. 121 | // - `sort_key`, which returns a value of unspecified type that can be compared 122 | // to other values of the same type with operator `<`. When the matcher is 123 | // otherwise unable to order two matched items, it will prefer the one whose 124 | // `sort_key` compares lower. 125 | // 126 | // `src` must have the following member functions: 127 | // - `bool fill(std::vector& items)`, which inserts new unmatched items 128 | // into `items` (which must initially be empty) and returns true iff it may 129 | // produce more unmatched items in the future. 130 | // - `size_t batch_size() const`, which returns an optional upper bound on the 131 | // number of items inserted by each call to `fill`. 132 | // If `opts.nr_threads() > 1`, `src` must be thread-safe. 133 | // 134 | // `dst` must be a functor compatible with signature `void(Item& item, 135 | // MatchInfo const* match_info)`, where `item` is a matched item and 136 | // `match_info`, if not null, holds the state of the match. `dst` need not be 137 | // thread-safe. 138 | // 139 | // Example: 140 | // 141 | // // Prints the top 10 matches of query against items. 142 | // for_each_match( 143 | // query, Options().set_limit(10).set_unicode(true), 144 | // [&](std::vector& batch) { 145 | // if (items.empty()) return false; 146 | // batch.push_back(std::move(items.back())); 147 | // items.pop_back(); 148 | // return true; 149 | // }, 150 | // [&](Item item, void*) { 151 | // std::cout << item.item << std::endl; 152 | // }); 153 | template 154 | void for_each_match(boost::string_ref const query, Options const& opts, 155 | Source&& src, Sink&& dst) { 156 | if (opts.path()) { 157 | if (opts.unicode()) { 158 | detail::for_each_match( 159 | query, opts, std::forward(src), std::forward(dst)); 160 | } else { 161 | detail::for_each_match( 162 | query, opts, std::forward(src), std::forward(dst)); 163 | } 164 | } else { 165 | if (opts.unicode()) { 166 | detail::for_each_match( 167 | query, opts, std::forward(src), std::forward(dst)); 168 | } else { 169 | detail::for_each_match( 170 | query, opts, std::forward(src), std::forward(dst)); 171 | } 172 | } 173 | } 174 | 175 | // Simple Item type wrapping a `boost::string_ref`. 176 | class StringRefItem { 177 | public: 178 | StringRefItem() {} 179 | explicit StringRefItem(boost::string_ref const item) : item_(item) {} 180 | 181 | boost::string_ref item() const { return item_; } 182 | boost::string_ref match_key() const { return item_; } 183 | boost::string_ref sort_key() const { return item_; } 184 | 185 | private: 186 | boost::string_ref item_; 187 | }; 188 | 189 | // Thread-unsafe source functor that constructs items from elements of a range 190 | // defined by a pair of iterators. 191 | template 192 | class RangeSource { 193 | public: 194 | explicit RangeSource(It first, It last) 195 | : it_(std::move(first)), last_(std::move(last)) {} 196 | 197 | bool fill(std::vector& items) { 198 | if (it_ == last_) { 199 | return false; 200 | } 201 | items.emplace_back(*it_); 202 | ++it_; 203 | return it_ != last_; 204 | } 205 | 206 | static constexpr size_t batch_size() { return 1; } 207 | 208 | private: 209 | It it_; 210 | It const last_; 211 | }; 212 | 213 | template 214 | RangeSource source_from_range(It first, It last) { 215 | return RangeSource(std::move(first), std::move(last)); 216 | } 217 | 218 | namespace detail { 219 | 220 | // Type binding a matched item together with its score. 221 | template 222 | struct Matched { 223 | Score score; 224 | Item item; 225 | 226 | Matched() {} 227 | explicit Matched(Score score, Item item) 228 | : score(score), item(std::move(item)) {} 229 | 230 | // Returns true if `x` is a better match than `y`. 231 | static bool is_better(Matched const &x, Matched const &y) { 232 | if (x.score != y.score) { 233 | return x.score > y.score; 234 | } 235 | return x.item.sort_key() < y.item.sort_key(); 236 | } 237 | }; 238 | 239 | template 241 | void for_each_match(boost::string_ref const query, Options const& opts, 242 | Source&& src, Sink&& dst) { 243 | MatcherOptions mopts; 244 | mopts.crfile = opts.crfile(); 245 | mopts.match_crfile = opts.match_crfile(); 246 | 247 | // Match in parallel. 248 | std::vector>> thread_matches(opts.nr_threads()); 249 | std::vector threads; 250 | threads.reserve(opts.nr_threads()); 251 | for (unsigned int i = 0; i < opts.nr_threads(); i++) { 252 | threads.emplace_back([&, i] { 253 | std::vector> matches; 254 | std::vector batch; 255 | // If a limit exists, each thread should only keep that many matches. 256 | if (opts.limit()) { 257 | matches.reserve(opts.limit() + 1); 258 | } 259 | batch.reserve(src.batch_size()); 260 | Matcher matcher(query, mopts); 261 | bool more; 262 | do { 263 | // Collect and match a batch. 264 | more = src.fill(batch); 265 | for (auto& item : batch) { 266 | if (matcher.match(item.match_key())) { 267 | matches.emplace_back(matcher.score(), std::move(item)); 268 | if (opts.limit()) { 269 | std::push_heap(matches.begin(), matches.end(), 270 | Matched::is_better); 271 | if (matches.size() > opts.limit()) { 272 | std::pop_heap(matches.begin(), matches.end(), 273 | Matched::is_better); 274 | matches.pop_back(); 275 | } 276 | } 277 | } 278 | } 279 | batch.clear(); 280 | } while (more); 281 | thread_matches[i] = std::move(matches); 282 | }); 283 | } 284 | 285 | // Collect matcher threads. 286 | std::size_t nr_matches = 0; 287 | for (unsigned int i = 0; i < opts.nr_threads(); i++) { 288 | auto& thread = threads[i]; 289 | thread.join(); 290 | if (thread.has_exception()) { 291 | throw Error(thread.exception_msg()); 292 | } 293 | nr_matches += thread_matches[i].size(); 294 | } 295 | 296 | // Combine per-thread match lists. 297 | std::vector> all_matches; 298 | all_matches.reserve(nr_matches); 299 | for (auto& matches : thread_matches) { 300 | std::move(matches.begin(), matches.end(), std::back_inserter(all_matches)); 301 | matches.shrink_to_fit(); 302 | } 303 | 304 | // Sort and limit matches. 305 | if (opts.limit() && opts.limit() < all_matches.size()) { 306 | std::partial_sort(all_matches.begin(), all_matches.begin() + opts.limit(), 307 | all_matches.end(), Matched::is_better); 308 | all_matches.resize(opts.limit()); 309 | } else { 310 | std::sort(all_matches.begin(), all_matches.end(), Matched::is_better); 311 | } 312 | 313 | // Emit matches. 314 | if (opts.want_match_info()) { 315 | Matcher matcher(query, mopts); 316 | for (auto& match : all_matches) { 317 | if (!matcher.match(match.item.match_key())) { 318 | throw Error("failed to re-match known match '", 319 | match.item.match_key(), 320 | "' during match position collection"); 321 | } 322 | dst(match.item, &matcher); 323 | } 324 | } else { 325 | for (auto& match : all_matches) { 326 | dst(match.item, nullptr); 327 | } 328 | } 329 | } 330 | 331 | } // namespace detail 332 | 333 | } // namespace cpsm 334 | 335 | #endif // CPSM_API_H_ 336 | -------------------------------------------------------------------------------- /src/cpsm_cli_main.cc: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include "api.h" 26 | #include "str_util.h" 27 | 28 | namespace po = boost::program_options; 29 | 30 | int main(int argc, char** argv) { 31 | std::cin.sync_with_stdio(false); 32 | std::cout.sync_with_stdio(false); 33 | std::cerr.sync_with_stdio(false); 34 | 35 | po::options_description opts_desc("Options"); 36 | opts_desc.add_options() 37 | ("crfile", po::value()->default_value(""), 38 | "'currently open file' passed to the matcher") 39 | ("limit", po::value()->default_value(10), 40 | "maximum number of matches to return") 41 | ("query", po::value()->default_value(""), 42 | "query to match items against") 43 | ("help", "display this help and exit") 44 | ; 45 | 46 | po::variables_map opts; 47 | po::store(po::parse_command_line(argc, argv, opts_desc), opts); 48 | po::notify(opts); 49 | 50 | if (opts.count("help")) { 51 | std::cout << opts_desc << std::endl; 52 | return 0; 53 | } 54 | 55 | std::vector lines; 56 | std::string line; 57 | while (std::getline(std::cin, line)) { 58 | lines.emplace_back(std::move(line)); 59 | line.clear(); 60 | } 61 | 62 | auto const crfile = opts["crfile"].as(); 63 | auto const limit = opts["limit"].as(); 64 | auto const query = opts["query"].as(); 65 | auto const mopts = 66 | cpsm::Options().set_crfile(crfile).set_limit(limit).set_want_match_info( 67 | true); 68 | cpsm::for_each_match( 69 | query, mopts, cpsm::source_from_range(lines.cbegin(), 70 | lines.cend()), 71 | [&](cpsm::StringRefItem item, cpsm::MatchInfo const* info) { 72 | std::cout << item.item() << "\n- score: " << info->score() << "; " 73 | << info->score_debug_string() << "\n- match positions: " 74 | << cpsm::str_join(info->match_positions(), ", ") << std::endl; 75 | }); 76 | 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /src/ctrlp_util.cc: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "ctrlp_util.h" 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace cpsm { 24 | 25 | namespace { 26 | 27 | // Groups match positions into matched intervals. 28 | std::vector> group_positions_detailed( 29 | std::vector const& positions) { 30 | std::vector> groups; 31 | std::size_t begin = 0; 32 | std::size_t end = 0; 33 | for (std::size_t const pos : positions) { 34 | if (pos != end) { 35 | // End of previous group, start of new group. 36 | if (begin != end) { 37 | groups.emplace_back(begin, end); 38 | } 39 | begin = end = pos; 40 | } 41 | end++; 42 | } 43 | if (begin != end) { 44 | groups.emplace_back(begin, end); 45 | } 46 | return groups; 47 | } 48 | 49 | // Returns a single match group spanning from the first to last match. 50 | std::vector> group_positions_basic( 51 | std::vector const& positions) { 52 | std::vector> group; 53 | if (!positions.empty()) { 54 | group.emplace_back(*positions.cbegin(), (*positions.crbegin()) + 1); 55 | } 56 | return group; 57 | } 58 | 59 | std::vector> group_positions( 60 | boost::string_ref const mode, std::vector const& positions) { 61 | if (mode.empty() || mode == "none") { 62 | return std::vector>(); 63 | } else if (mode == "basic") { 64 | return group_positions_basic(positions); 65 | } else if (mode == "detailed") { 66 | return group_positions_detailed(positions); 67 | } 68 | throw Error("unknown highlight mode '", mode, "'"); 69 | } 70 | 71 | } // anonymous namespace 72 | 73 | CtrlPMatchMode parse_ctrlp_match_mode(boost::string_ref const mmode) { 74 | if (mmode.empty() || mmode == "full-line") { 75 | return CtrlPMatchMode::FULL_LINE; 76 | } else if (mmode == "filename-only") { 77 | return CtrlPMatchMode::FILENAME_ONLY; 78 | } else if (mmode == "first-non-tab") { 79 | return CtrlPMatchMode::FIRST_NON_TAB; 80 | } else if (mmode == "until-last-tab") { 81 | return CtrlPMatchMode::UNTIL_LAST_TAB; 82 | } 83 | throw Error("unknown match mode ", mmode); 84 | } 85 | 86 | void get_highlight_regexes(boost::string_ref const mode, 87 | boost::string_ref const item, 88 | std::vector const& positions, 89 | std::vector& regexes, 90 | boost::string_ref const line_prefix) { 91 | for (auto const group : group_positions(mode, positions)) { 92 | // Each match group's regex has the same structure: 93 | // - "\V": very nomagic (only "\" needs to be escaped) 94 | // - "\C": forces case sensitivity 95 | // - "\^": beginning of string 96 | // - the line prefix 97 | // - characters in the item before the match 98 | // - "\zs": starts the match 99 | // - characters in the match group 100 | // - "\ze": ends the match 101 | // - characters in the item after the match 102 | // - "\$": end of string 103 | std::string regex = R"(\V\C\^)"; 104 | auto const write_char = [&](char c) { 105 | if (c == '\\') { 106 | regex += R"(\\)"; 107 | } else { 108 | regex += c; 109 | } 110 | }; 111 | for (char const c : line_prefix) { 112 | write_char(c); 113 | } 114 | // support highlight for CtrlP with devicons 115 | regex += R"(\%\(\.\+\s\+\)\?)"; 116 | std::size_t i = 0; 117 | for (; i < group.first; i++) { 118 | write_char(item[i]); 119 | } 120 | regex += R"(\zs)"; 121 | for (; i < group.second; i++) { 122 | write_char(item[i]); 123 | } 124 | regex += R"(\ze)"; 125 | for (; i < item.size(); i++) { 126 | write_char(item[i]); 127 | } 128 | regex += R"(\$)"; 129 | regexes.emplace_back(std::move(regex)); 130 | } 131 | } 132 | 133 | } // namespace cpsm 134 | -------------------------------------------------------------------------------- /src/ctrlp_util.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // CtrlP-specific support utilities. 17 | 18 | #ifndef CPSM_CTRLP_UTIL_H_ 19 | #define CPSM_CTRLP_UTIL_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include "path_util.h" 30 | #include "str_util.h" 31 | 32 | namespace cpsm { 33 | 34 | enum class CtrlPMatchMode { 35 | // Match the entire line. 36 | FULL_LINE, 37 | 38 | // Match only the filename. 39 | FILENAME_ONLY, 40 | 41 | // Match until the first tab char. 42 | FIRST_NON_TAB, 43 | 44 | // Match until the last tab char. 45 | UNTIL_LAST_TAB, 46 | }; 47 | 48 | // Parses a CtrlP match mode. 49 | CtrlPMatchMode parse_ctrlp_match_mode(boost::string_ref mmode); 50 | 51 | // Functor types implementing transformations for each CtrlP match mode. 52 | 53 | struct FullLineMatch { 54 | boost::string_ref operator()(boost::string_ref const item) const { 55 | return item; 56 | } 57 | }; 58 | 59 | struct FilenameOnlyMatch { 60 | boost::string_ref operator()(boost::string_ref const item) const { 61 | return ref_str_iters( 62 | path_basename(item.cbegin(), item.cend()), 63 | item.cend()); 64 | } 65 | }; 66 | 67 | struct FirstNonTabMatch { 68 | boost::string_ref operator()(boost::string_ref const item) const { 69 | return ref_str_iters(item.cbegin(), 70 | std::find(item.cbegin(), item.cend(), '\t')); 71 | } 72 | }; 73 | 74 | struct UntilLastTabMatch { 75 | boost::string_ref operator()(boost::string_ref const item) const { 76 | auto const item_rend = item.crend(); 77 | auto const last_tab_rit = std::find(item.crbegin(), item_rend, '\t'); 78 | return ref_str_iters(item.cbegin(), (last_tab_rit == item_rend) 79 | ? item.cend() 80 | : (last_tab_rit + 1).base()); 81 | } 82 | }; 83 | 84 | // Item type that wraps another, but applies a CtrlP match mode to their 85 | // `match_key`s. 86 | template 87 | struct CtrlPItem { 88 | InnerItem inner; 89 | 90 | CtrlPItem() {} 91 | explicit CtrlPItem(InnerItem inner) : inner(std::move(inner)) {} 92 | 93 | boost::string_ref match_key() const { return MatchMode()(inner.match_key()); } 94 | boost::string_ref sort_key() const { return inner.sort_key(); } 95 | }; 96 | 97 | // Appends a set of Vim regexes to highlight the bytes at `positions` in `item` 98 | // for the given highlight mode. `positions` must be sorted. 99 | void get_highlight_regexes(boost::string_ref mode, boost::string_ref item, 100 | std::vector const& positions, 101 | std::vector& regexes, 102 | boost::string_ref line_prefix); 103 | 104 | } // namespace cpsm 105 | 106 | #endif /* CPSM_CTRLP_UTIL_H_ */ 107 | -------------------------------------------------------------------------------- /src/matcher.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef CPSM_MATCHER_H_ 17 | #define CPSM_MATCHER_H_ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | #include "path_util.h" 31 | #include "str_util.h" 32 | 33 | namespace cpsm { 34 | 35 | // Internal type used for character counts. 36 | // 37 | // This is uint_fast16_t because cpsm is mostly used to match paths, and path 38 | // lengths are not capable of exceeding the range of 16 bits on most major 39 | // operating systems: 40 | // - Linux: PATH_MAX = 4096 41 | // - Mac OS X: PATH_MAX = 1024 42 | // - Windows: MAX_PATH = 260; Unicode interfaces may support paths of up to 43 | // 32767 characters 44 | typedef std::uint_fast16_t CharCount; 45 | 46 | // Options that apply to all items in a search. 47 | struct MatcherOptions { 48 | // The currently open file. 49 | boost::string_ref crfile; 50 | 51 | // If false, do not consider the currently open file as a candidate. 52 | bool match_crfile = false; 53 | }; 54 | 55 | // Type representing a match's score. 56 | typedef std::uint64_t Score; 57 | 58 | class MatchInfo { 59 | public: 60 | virtual ~MatchInfo() = default; 61 | 62 | // Returns the item's match score (higher is better). 63 | virtual Score score() const = 0; 64 | 65 | // Returns a string summarizing the state used to derive the last item's 66 | // match score. 67 | virtual std::string score_debug_string() const = 0; 68 | 69 | // Returns a sorted vector containing the position of each matched character 70 | // in the item. 71 | virtual std::vector match_positions() const = 0; 72 | }; 73 | 74 | template 75 | class Matcher : public MatchInfo { 76 | public: 77 | typedef typename StringTraits::Char Char; 78 | 79 | explicit Matcher(boost::string_ref const query, MatcherOptions const& opts) 80 | // Queries are smartcased (case-sensitive only if any uppercase appears 81 | // in the query). 82 | : query_(decode(query)), 83 | query_basename_(path_basename(query_.cbegin(), 84 | query_.cend())), 85 | case_sensitive_(std::any_of(query_.cbegin(), query_.cend(), 86 | StringTraits::is_uppercase)), 87 | crfile_(decode(opts.crfile)), 88 | crfile_basename_(path_basename(crfile_.cbegin(), 89 | crfile_.cend())), 90 | crfile_ext_(std::find_if(crfile_.crbegin(), 91 | ReverseIterator(crfile_basename_), 92 | PathTraits::is_extension_separator).base()), 93 | crfile_basename_word_ends_(find_word_endings(crfile_basename_, 94 | crfile_ext_)), 95 | match_crfile_(opts.match_crfile) {} 96 | 97 | // A Matcher can't be trivially copied because it contains iterators into its 98 | // vectors. 99 | Matcher(Matcher const& other) = delete; 100 | Matcher& operator=(Matcher const& other) = delete; 101 | 102 | bool match(boost::string_ref const item) { 103 | item_.clear(); 104 | decode_to(item, item_); 105 | 106 | // Determine if the query matches at all. 107 | if (!scan()) { 108 | return false; 109 | } 110 | 111 | // Check for compatibility with `crfile`. 112 | if (!check_crfile()) { 113 | return false; 114 | } 115 | 116 | // Beyond this point, the item is definitely a match, and we're only 117 | // evaluating its quality. Reset scoring state (other than what's already 118 | // been assigned by `check_crfile`). 119 | prefix_level_ = PrefixLevel::NONE; 120 | whole_basename_match_ = false; 121 | basename_longest_submatch_ = 0; 122 | basename_match_count_ = 0; 123 | basename_word_gaps_ = 0; 124 | 125 | // Don't waste any time on empty queries, which can't do any further 126 | // differentiation between items. Also return early if the item is empty, 127 | // so that the remainder of the algorithm can assume it isn't. 128 | if (query_.empty() || item_.empty()) { 129 | return true; 130 | } 131 | 132 | // If the match is case-insensitive, the query must not contain any 133 | // uppercase letters. Convert all uppercase characters in the item to 134 | // lowercase so matching below this point is simply equality comparison. 135 | make_item_matchcase(); 136 | 137 | // Try to constrain the match so that matches are required at the start of 138 | // matched path components. 139 | if (!check_component_match_front()) { 140 | // If that fails, conclude that the match is bad and don't do any further 141 | // matching. 142 | return true; 143 | } 144 | 145 | // Try to additionally constrain the match so that all matches in the 146 | // basename (rightmost path component) occur at the beginning of "words". 147 | if (check_basename_match_word_prefix()) { 148 | score_basename_word_prefix_match(); 149 | } else { 150 | // If that fails, fall back to simple greedy matching. 151 | score_basename_greedy(); 152 | } 153 | 154 | return true; 155 | } 156 | 157 | Score score() const final { 158 | return (Score(prefix_level_) << 62) | 159 | (Score(whole_basename_match_) << 61) | 160 | (mask_to(basename_longest_submatch_, 7) << 54) | 161 | (mask_to(basename_match_count_, 7) << 47) | 162 | (mask_to(penalty(basename_word_gaps_), 7) << 40) | 163 | (mask_to(crfile_basename_shared_words_, 7) << 33) | 164 | (mask_to(penalty(crfile_path_distance_), 11) << 22) | 165 | (mask_to(penalty(unmatched_suffix_len_), 8) << 14) | 166 | mask_to(penalty(item_.size()), 14); 167 | } 168 | 169 | std::string score_debug_string() const final { 170 | return str_cat("prefix_level = ", Score(prefix_level_), 171 | ", whole_basename_match = ", whole_basename_match_, 172 | ", basename_longest_submatch = ", basename_longest_submatch_, 173 | ", basename_match_count = ", basename_match_count_, 174 | ", basename_word_gaps = ", basename_word_gaps_, 175 | ", crfile_basename_shared_words = ", 176 | crfile_basename_shared_words_, ", crfile_path_distance = ", 177 | crfile_path_distance_, ", unmatched_suffix_len = ", 178 | unmatched_suffix_len_, ", item_len = ", item_.size()); 179 | } 180 | 181 | std::vector match_positions() const final { 182 | std::vector posns; 183 | if (prefix_level_ == PrefixLevel::NONE) { 184 | get_match_positions_sorted_no_prefix(posns); 185 | return posns; 186 | } 187 | get_match_positions_component_prefix_dirpath(posns); 188 | if (prefix_level_ == PrefixLevel::BASENAME_WORD) { 189 | get_match_positions_basename_word_prefix(posns); 190 | } else { 191 | get_match_positions_basename_non_word_prefix(posns); 192 | } 193 | std::sort(posns.begin(), posns.end()); 194 | return posns; 195 | } 196 | 197 | private: 198 | typedef std::vector Vec; 199 | typedef typename Vec::const_iterator Iterator; 200 | typedef typename Vec::const_reverse_iterator ReverseIterator; 201 | 202 | static std::vector find_word_endings(Iterator const first, 203 | Iterator const last) { 204 | std::vector word_ends; 205 | bool prev_uppercase = false; 206 | bool prev_alphanumeric = false; 207 | for (auto it = first; it != last; ++it) { 208 | auto const c = *it; 209 | bool const next_uppercase = StringTraits::is_uppercase(c); 210 | bool const next_alphanumeric = StringTraits::is_alphanumeric(c); 211 | if (prev_alphanumeric && 212 | (!next_alphanumeric || (!prev_uppercase && next_uppercase))) { 213 | word_ends.push_back(it - 1); 214 | } 215 | prev_uppercase = next_uppercase; 216 | prev_alphanumeric = next_alphanumeric; 217 | } 218 | if (prev_alphanumeric) { 219 | word_ends.push_back(last - 1); 220 | } 221 | return word_ends; 222 | } 223 | 224 | bool scan() { 225 | props_.resize(item_.size()); 226 | auto props_it = props_.begin(); 227 | for (auto item_it = item_.cbegin(), item_last = item_.cend(); 228 | item_it != item_last; ++item_it, ++props_it) { 229 | props_it->uppercase = StringTraits::is_uppercase(*item_it); 230 | } 231 | if (case_sensitive_) { 232 | return scan_match(); 233 | } else { 234 | return scan_match(); 235 | } 236 | } 237 | 238 | template 239 | bool scan_match() const { 240 | auto query_it = query_.cbegin(); 241 | auto const query_last = query_.cend(); 242 | if (query_it == query_last) { 243 | return true; 244 | } 245 | auto props_it = props_.cbegin(); 246 | for (auto item_it = item_.cbegin(), item_last = item_.cend(); 247 | item_it != item_last; ++item_it, ++props_it) { 248 | auto c = *item_it; 249 | // If the match is case-insensitive, the query must not contain any 250 | // uppercase letters. 251 | if (!CaseSensitive && props_it->uppercase) { 252 | c = StringTraits::uppercase_to_lowercase(c); 253 | } 254 | if (c == *query_it) { 255 | ++query_it; 256 | if (query_it == query_last) { 257 | return true; 258 | } 259 | } 260 | } 261 | return false; 262 | } 263 | 264 | bool check_crfile() { 265 | crfile_path_distance_ = path_distance( 266 | item_.cbegin(), item_.cend(), crfile_.cbegin(), crfile_.cend()); 267 | if (!match_crfile_ && crfile_path_distance_ == 0) { 268 | return false; 269 | } 270 | // If the last character in the item is a path separator, skip it for the 271 | // purposes of determining the item basename to be consistent with 272 | // `consume_path_component_match_front`. 273 | if (!item_.empty() && PathTraits::is_path_separator(item_.back())) { 274 | item_basename_ = 275 | path_basename(item_.cbegin(), item_.cend() - 1); 276 | } else { 277 | item_basename_ = path_basename(item_.cbegin(), item_.cend()); 278 | } 279 | auto props_it = props_.begin() + (item_basename_ - item_.cbegin()); 280 | for (auto item_it = item_basename_, item_last = item_.cend(); 281 | item_it != item_last; ++item_it, ++props_it) { 282 | props_it->alphanumeric = StringTraits::is_alphanumeric(*item_it); 283 | } 284 | crfile_basename_shared_words_ = [this]() -> CharCount { 285 | auto crfile_word_end_it = crfile_basename_word_ends_.cbegin(); 286 | auto const crfile_word_end_last = crfile_basename_word_ends_.cend(); 287 | if (crfile_word_end_it == crfile_word_end_last) { 288 | return 0; 289 | } 290 | for (auto item_it = item_basename_, item_last = item_.cend(), 291 | crfile_it = crfile_basename_, crfile_last = crfile_.cend(); 292 | item_it != item_last && crfile_it != crfile_last && 293 | *item_it == *crfile_it; 294 | ++item_it, ++crfile_it) { 295 | if (crfile_it == *crfile_word_end_it) { 296 | ++crfile_word_end_it; 297 | if (crfile_word_end_it == crfile_word_end_last) { 298 | // Only counts if the next character is plausibly not the 299 | // continuation of a word. 300 | std::size_t const i = item_it - item_.cbegin(); 301 | if ((i + 1 < item_.size()) && !props_[i + 1].uppercase && 302 | props_[i + 1].alphanumeric) { 303 | --crfile_word_end_it; 304 | } 305 | break; 306 | } 307 | } 308 | } 309 | return crfile_word_end_it - crfile_basename_word_ends_.cbegin(); 310 | }(); 311 | // Ensure that `unmatched_suffix_len_` is initialized even for empty 312 | // queries. 313 | unmatched_suffix_len_ = item_.cend() - item_basename_; 314 | return true; 315 | } 316 | 317 | void make_item_matchcase() { 318 | if (!case_sensitive_) { 319 | auto props_it = props_.cbegin(); 320 | for (auto item_it = item_.begin(), item_last = item_.end(); 321 | item_it != item_last; ++item_it, ++props_it) { 322 | if (props_it->uppercase) { 323 | *item_it = StringTraits::uppercase_to_lowercase(*item_it); 324 | } 325 | } 326 | } 327 | } 328 | 329 | bool check_component_match_front() { 330 | auto item_rit = item_.crbegin(); 331 | auto const item_rlast = item_.crend(); 332 | auto query_rit = query_.crbegin(); 333 | auto const query_rlast = query_.crend(); 334 | 335 | // Consume the basename. 336 | consume_path_component_match_front(item_rit, item_rlast, query_rit, 337 | query_rlast); 338 | qit_basename_ = query_rit.base(); 339 | whole_basename_match_ = qit_basename_ == query_basename_; 340 | basename_match_count_ = query_.cend() - qit_basename_; 341 | 342 | // Try to consume the remainder of the query. 343 | while (query_rit != query_rlast) { 344 | if (item_rit == item_rlast) { 345 | return false; 346 | } 347 | consume_path_component_match_front(item_rit, item_rlast, query_rit, 348 | query_rlast); 349 | } 350 | prefix_level_ = PrefixLevel::COMPONENT; 351 | return true; 352 | } 353 | 354 | // Advances `item_rit` to the next path separator before `item_rlast`. For 355 | // each iterated character in the item matched by a character in the query 356 | // before `query_rlast`, advances `query_rit`. At the end of the path 357 | // component, backtrack the match to ensure that if any matches occur, they 358 | // include the last character matched before the path separator. 359 | // 360 | // Precondition: `item_rit != item_rlast`. 361 | // Postcondition: `item_rit` is advanced by at least 1. 362 | void consume_path_component_match_front( 363 | ReverseIterator& item_rit, ReverseIterator const item_rlast, 364 | ReverseIterator& query_rit, ReverseIterator const query_rlast) const { 365 | auto const query_last = query_rit.base(); 366 | while (true) { 367 | if (query_rit != query_rlast && *item_rit == *query_rit) { 368 | ++query_rit; 369 | } 370 | ++item_rit; 371 | if (item_rit == item_rlast || 372 | PathTraits::is_path_separator(*item_rit)) { 373 | break; 374 | } 375 | } 376 | auto const item_pc_front = *item_rit.base(); 377 | auto query_it = query_rit.base(); 378 | for (; query_it != query_last; ++query_it) { 379 | if (item_pc_front == *query_it) { 380 | break; 381 | } 382 | } 383 | query_rit = ReverseIterator(query_it); 384 | } 385 | 386 | bool check_basename_match_word_prefix() { 387 | qit_basename_words_.clear(); 388 | qit_basename_words_.push_back(qit_basename_); 389 | 390 | auto item_it = item_basename_; 391 | auto const item_last = item_.cend(); 392 | if (item_it == item_last) { 393 | return false; 394 | } 395 | auto query_it = qit_basename_; 396 | auto const query_last = query_.cend(); 397 | if (query_it == query_last) { 398 | return false; 399 | } 400 | auto props_it = props_.begin() + (item_basename_ - item_.cbegin()); 401 | 402 | bool prev_uppercase = props_it->uppercase; 403 | bool prev_alphanumeric = props_it->alphanumeric; 404 | props_it->word_start = true; 405 | 406 | // Advances `item_it` and `props_it` to the beginning of the next word. For 407 | // each consecutive iterated character in the item matched by a character 408 | // in the query before `query_last`, advances `query_it`. 409 | // 410 | // Precondition: `item_it != item_last`; `query_it != query_last`. 411 | // Postcondition: `item_it` is always advanced by at least 1. 412 | auto const consume_word_prefix = [&] { 413 | bool can_match = true; 414 | while (true) { 415 | // Require that all alphanumeric matches in this word be contiguous. 416 | if (can_match || !prev_alphanumeric) { 417 | if (*item_it == *query_it) { 418 | ++query_it; 419 | if (query_it == query_last) { 420 | break; 421 | } 422 | } else { 423 | can_match = false; 424 | } 425 | } 426 | ++item_it; 427 | if (item_it == item_last) { 428 | break; 429 | } 430 | ++props_it; 431 | bool const uppercase = props_it->uppercase; 432 | bool const alphanumeric = props_it->alphanumeric; 433 | bool const word_start = (!prev_uppercase && uppercase) || 434 | (!prev_alphanumeric && alphanumeric); 435 | props_it->word_start = word_start; 436 | prev_uppercase = uppercase; 437 | prev_alphanumeric = alphanumeric; 438 | if (word_start) { 439 | break; 440 | } 441 | } 442 | }; 443 | 444 | consume_word_prefix(); 445 | while (query_it != query_last) { 446 | if (item_it == item_last) { 447 | basename_longest_submatch_ = 0; 448 | basename_word_gaps_ = 0; 449 | return false; 450 | } 451 | // If the next unmatched query character doesn't match the first 452 | // character of the next word, allow partial backtracking (all but the 453 | // first character) of the match in the previous word in order to find a 454 | // match for this one. 455 | auto const c = *item_it; 456 | if (c != *query_it) { 457 | for (auto steal_rit = ReverseIterator(query_it), 458 | steal_rlast = ReverseIterator(qit_basename_words_.back() + 1); 459 | steal_rit < steal_rlast; ++steal_rit) { 460 | if (c == *steal_rit) { 461 | query_it = (steal_rit + 1).base(); 462 | break; 463 | } 464 | } 465 | } 466 | qit_basename_words_.push_back(query_it); 467 | consume_word_prefix(); 468 | } 469 | prefix_level_ = PrefixLevel::BASENAME_WORD; 470 | // Push `query_it` onto `qit_basename_words_` even though we know it's 471 | // `query_.cend()` to avoid special-casing the end case in 472 | // `score_basename_word_prefix_match`. 473 | qit_basename_words_.push_back(query_it); 474 | return true; 475 | } 476 | 477 | void score_basename_word_prefix_match() { 478 | auto item_it = item_basename_; 479 | auto props_it = props_.cbegin() + (item_basename_ - item_.cbegin()); 480 | auto query_it = qit_basename_; 481 | auto const query_last = query_.cend(); 482 | // +1 because the first iteration of the loop skips the word start at the 483 | // beginning of the basename. 484 | auto qit_words_it = qit_basename_words_.cbegin() + 1; 485 | auto query_word_last = *qit_words_it; 486 | 487 | CharCount current_submatch = 0; 488 | bool any_word_matches = false; 489 | 490 | while (true) { 491 | if (query_it != query_word_last && *item_it == *query_it) { 492 | ++query_it; 493 | current_submatch++; 494 | any_word_matches = true; 495 | if (query_it == query_last) { 496 | break; 497 | } 498 | } else { 499 | basename_longest_submatch_ = 500 | std::max(basename_longest_submatch_, current_submatch); 501 | current_submatch = 0; 502 | } 503 | ++item_it; 504 | // At this point we know that the basename *is* a word prefix match, so 505 | // fully consuming the end of the query should be the only possible way 506 | // to leave this loop. Hence we skip the comparison to `item_.cend()`. 507 | // (The same applies to `qit_words_it` and `qit_basename_words_.cend()` 508 | // below.) 509 | ++props_it; 510 | if (props_it->word_start) { 511 | if (!any_word_matches) { 512 | basename_word_gaps_++; 513 | } 514 | any_word_matches = false; 515 | ++qit_words_it; 516 | query_word_last = *qit_words_it; 517 | } 518 | } 519 | basename_longest_submatch_ = 520 | std::max(basename_longest_submatch_, current_submatch); 521 | // -1 here because we broke out upon reaching the last match (`query_it == 522 | // query_last`) before incrementing `item_it`. 523 | unmatched_suffix_len_ = item_.cend() - item_it - 1; 524 | } 525 | 526 | void score_basename_greedy() { 527 | auto item_it = item_basename_; 528 | auto const item_last = item_.cend(); 529 | auto query_it = qit_basename_; 530 | auto const query_last = query_.cend(); 531 | if (item_it == item_last || query_it == query_last) { 532 | return; 533 | } 534 | 535 | CharCount current_submatch = 0; 536 | 537 | while (true) { 538 | if (*item_it == *query_it) { 539 | ++query_it; 540 | current_submatch++; 541 | if (query_it == query_last) { 542 | break; 543 | } 544 | } else { 545 | basename_longest_submatch_ = 546 | std::max(basename_longest_submatch_, current_submatch); 547 | current_submatch = 0; 548 | } 549 | ++item_it; 550 | if (item_it == item_last) { 551 | break; 552 | } 553 | } 554 | basename_longest_submatch_ = 555 | std::max(basename_longest_submatch_, current_submatch); 556 | // -1 here because we broke out upon reaching the last match (`query_it == 557 | // query_last`) before incrementing `item_it`. 558 | unmatched_suffix_len_ = item_last - item_it - 1; 559 | } 560 | 561 | // In all of these `get_match_positions_*` functions, we assume that the 562 | // match state is consistent with a successful last match. 563 | 564 | void get_match_positions_sorted_no_prefix( 565 | std::vector& posns) const { 566 | get_match_positions_greedy(posns, item_.cbegin(), item_.cbegin(), 567 | query_.cbegin(), query_.cend()); 568 | } 569 | 570 | void get_match_positions_component_prefix_dirpath( 571 | std::vector& posns) const { 572 | auto item_rit = ReverseIterator(item_basename_); 573 | auto const item_rlast = item_.crend(); 574 | auto const item_first = item_.cbegin(); 575 | auto query_rit = ReverseIterator(qit_basename_); 576 | auto const query_rlast = query_.crend(); 577 | auto query_pc_last = query_rit.base(); 578 | 579 | while (query_rit != query_rlast) { 580 | consume_path_component_match_front(item_rit, item_rlast, query_rit, 581 | query_rlast); 582 | get_match_positions_greedy(posns, item_first, item_rit.base(), 583 | query_rit.base(), query_pc_last); 584 | query_pc_last = query_rit.base(); 585 | } 586 | } 587 | 588 | void get_match_positions_basename_word_prefix( 589 | std::vector& posns) const { 590 | auto item_it = item_basename_; 591 | auto const item_first = item_.cbegin(); 592 | auto const item_last = item_.cend(); 593 | auto props_it = props_.cbegin() + (item_basename_ - item_.cbegin()); 594 | auto query_it = qit_basename_; 595 | auto query_last_it = qit_basename_words_.cbegin(); 596 | 597 | while (item_it != item_last) { 598 | if (props_it->word_start) { 599 | ++query_last_it; 600 | } 601 | if (query_it != *query_last_it && *item_it == *query_it) { 602 | ++query_it; 603 | posns.push_back(item_it - item_first); 604 | } 605 | ++item_it; 606 | ++props_it; 607 | } 608 | } 609 | 610 | void get_match_positions_basename_non_word_prefix( 611 | std::vector& posns) const { 612 | get_match_positions_greedy(posns, item_.cbegin(), item_basename_, 613 | qit_basename_, query_.cend()); 614 | } 615 | 616 | template 617 | void get_match_positions_greedy(std::vector& posns, 618 | InputIt1 const item_first, InputIt1 item_it, 619 | InputIt2 query_it, 620 | InputIt2 const query_last) const { 621 | auto const item_last = item_.cend(); 622 | while (item_it != item_last && query_it != query_last) { 623 | if (*item_it == *query_it) { 624 | ++query_it; 625 | posns.push_back(item_it - item_first); 626 | } 627 | ++item_it; 628 | } 629 | } 630 | 631 | static constexpr Score mask_to(Score const x, unsigned const bits) { 632 | return x & ((std::uint64_t(1) << bits) - 1); 633 | } 634 | 635 | template 636 | static constexpr T penalty(T const x) { 637 | return std::numeric_limits::max() - x; 638 | } 639 | 640 | // Internal state of an in-progress match on an item. Note that many of these 641 | // fields are set conditionally; see the implementation for details. 642 | 643 | // Decoded copy of the item being matched. 644 | Vec item_; 645 | 646 | // Iterator into `item_` at the beginning of the item's basename. 647 | Iterator item_basename_; 648 | 649 | // Properties of characters in the item. 650 | struct CharProperties { 651 | // If true, the character is uppercase. 652 | bool uppercase; 653 | 654 | // If true, the character is alphanumeric. 655 | bool alphanumeric; 656 | 657 | // If true, the character is the start of a word. 658 | bool word_start; 659 | }; 660 | std::vector props_; 661 | 662 | // Iterator into `query_` at the first character matching in the item's 663 | // basename. 664 | Iterator qit_basename_; 665 | 666 | // Iterators into `query_` before matching each word in the item's basename. 667 | // Note that if both are set, then `qit_basename_ == 668 | // qit_basename_words_[0]`. 669 | std::vector qit_basename_words_; 670 | 671 | // Metrics used to compute score, in order of descending significance. 672 | 673 | // Incrementally stronger statements about the quality of the match. Find 674 | // locations where this field is assigned for details. Higher numeric value 675 | // is better. 676 | enum class PrefixLevel { 677 | NONE, 678 | COMPONENT, 679 | BASENAME_WORD, 680 | } prefix_level_; 681 | 682 | // If true, the basename of the query matches entirely in the basename of the 683 | // item. True is better. 684 | bool whole_basename_match_; 685 | 686 | // The length of the longest substring matched in the item's basename. Higher 687 | // is better. 688 | CharCount basename_longest_submatch_; 689 | 690 | // The number of characters matched in the item's basename. Higher is better. 691 | CharCount basename_match_count_; 692 | 693 | // The number of words without any matches between the first and last words 694 | // with matches in the basename. Lower is better. 695 | CharCount basename_word_gaps_; 696 | 697 | // The number of consecutive words shared between the beginning of the item's 698 | // basename and the beginning of the current file's basename. Higher is 699 | // better. 700 | CharCount crfile_basename_shared_words_; 701 | 702 | // The number of path components that must be traversed between the item's 703 | // path and the current file's path. Lower is better. 704 | CharCount crfile_path_distance_; 705 | 706 | // The number of consecutive unmatched characters at the end of the item's 707 | // basename. Since it's easy to add characters at the end of a query to 708 | // refine a search for a longer item, lower values are weakly preferred. 709 | CharCount unmatched_suffix_len_; 710 | 711 | // Matcher state that is persistent between matches. 712 | 713 | // Decoded copy of the query. 714 | Vec const query_; 715 | 716 | // Iterator into `query_` at the beginning of the query's basename. 717 | Iterator const query_basename_; 718 | 719 | // If true, the match is case-sensitive. 720 | bool const case_sensitive_; 721 | 722 | // Decoded copy of the currently open filename. 723 | Vec const crfile_; 724 | 725 | // Iterator into `crfile_` at the beginning of the currently open file's 726 | // basename. 727 | Iterator const crfile_basename_; 728 | 729 | // Iterator into `crfile_` at the successor to the currently open file's 730 | // rightmost extension separator. 731 | Iterator const crfile_ext_; 732 | 733 | // Iterators into `crfile_` at the last character of each word in the 734 | // currently open file's basename. 735 | std::vector const crfile_basename_word_ends_; 736 | 737 | // If false, reject `crfile_` if it appears as an item. 738 | bool const match_crfile_; 739 | }; 740 | 741 | } // namespace cpsm 742 | 743 | #endif /* CPSM_MATCHER_H_ */ 744 | -------------------------------------------------------------------------------- /src/matcher_test.cc: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "api.h" 24 | #include "str_util.h" 25 | 26 | namespace cpsm { 27 | namespace testing { 28 | 29 | class TestAssertionFailure : public std::exception { 30 | public: 31 | TestAssertionFailure() : msg_("test assertion failed") {} 32 | 33 | template 34 | explicit TestAssertionFailure(Args... args) 35 | : msg_(str_cat("test assertion failed: ", args...)) {} 36 | 37 | char const* what() const noexcept override { return msg_.c_str(); } 38 | 39 | private: 40 | std::string msg_; 41 | }; 42 | 43 | struct Matches { 44 | using Vec = std::vector; 45 | using size_type = typename Vec::size_type; 46 | Vec matches; 47 | 48 | typename Vec::const_iterator find(boost::string_ref const item) const { 49 | return std::find(matches.cbegin(), matches.cend(), item); 50 | } 51 | 52 | bool matched(boost::string_ref const item) const { 53 | return find(item) != matches.cend(); 54 | } 55 | 56 | void assert_matched(boost::string_ref const item) const { 57 | if (!matched(item)) { 58 | throw TestAssertionFailure("incorrectly failed to match '", item, "'"); 59 | } 60 | } 61 | 62 | void assert_not_matched(boost::string_ref const item) const { 63 | if (matched(item)) { 64 | throw TestAssertionFailure("incorrectly matched '", item, "'"); 65 | } 66 | } 67 | 68 | size_type match_index(boost::string_ref const item) const { 69 | return find(item) - matches.cbegin(); 70 | } 71 | 72 | void assert_match_index(boost::string_ref const item, 73 | size_type const expected_index) const { 74 | auto const index = match_index(item); 75 | if (index != expected_index) { 76 | throw TestAssertionFailure("expected '", item, "' (index ", index, 77 | ") to have index ", expected_index); 78 | } 79 | } 80 | 81 | void assert_better_match(boost::string_ref const better_item, 82 | boost::string_ref const worse_item) const { 83 | auto const better_index = match_index(better_item); 84 | auto const worse_index = match_index(worse_item); 85 | if (better_index >= worse_index) { 86 | throw TestAssertionFailure( 87 | "expected '", better_item, "' (index ", better_index, 88 | ") to be ranked higher (have a lower index) than '", worse_item, 89 | "' (index ", worse_index, ")"); 90 | } 91 | } 92 | }; 93 | 94 | Matches match_and_log(std::initializer_list items, 95 | boost::string_ref const query) { 96 | Matches m; 97 | for_each_match( 98 | query, Options().set_want_match_info(true), 99 | source_from_range(begin(items), end(items)), 100 | [&](StringRefItem item, MatchInfo const* info) { 101 | std::printf("Matched %s (%s)\n", item.item().data(), 102 | info->score_debug_string().c_str()); 103 | m.matches.push_back(copy_string_ref(item.item())); 104 | }); 105 | return m; 106 | } 107 | 108 | void test_match_order() { 109 | auto m = match_and_log({"barfoo", "fbar", "foo/bar", "foo/fbar", "foo/foobar", 110 | "foo/foo_bar", "foo/foo_bar_test", "foo/foo_test_bar", 111 | "foo/FooBar", "foo/abar", "foo/qux", "foob/ar"}, 112 | "fb"); 113 | 114 | m.assert_not_matched("barfoo"); 115 | m.assert_matched("fbar"); 116 | m.assert_matched("foo/bar"); 117 | m.assert_matched("foo/fbar"); 118 | m.assert_matched("foo/foobar"); 119 | m.assert_matched("foo/foo_bar"); 120 | m.assert_matched("foo/foo_bar_test"); 121 | m.assert_matched("foo/foo_test_bar"); 122 | m.assert_matched("foo/FooBar"); 123 | m.assert_matched("foo/abar"); 124 | m.assert_not_matched("foo/qux"); 125 | m.assert_matched("foob/ar"); 126 | 127 | // "fbar" should rank highest due to the query being a full prefix. 128 | m.assert_match_index("fbar", 0); 129 | // "foo/fbar" should rank next highest due to the query being a full prefix, 130 | // but further away from cur_file (the empty string). 131 | m.assert_match_index("foo/fbar", 1); 132 | // "foo/foo_bar" and "foo/FooBar" should both rank next highest due to being 133 | // detectable word boundary matches, though it's unspecified which of the two 134 | // is higher. 135 | m.assert_better_match("foo/fbar", "foo/foo_bar"); 136 | m.assert_better_match("foo/fbar", "foo/FooBar"); 137 | // "foo/foo_bar_test" should rank below either of the above since there are 138 | // more trailing unmatched characters. 139 | m.assert_better_match("foo/foo_bar", "foo/foo_bar_test"); 140 | m.assert_better_match("foo/FooBar", "foo/foo_bar_test"); 141 | // "foo/foo_bar_test" should rank above "foo/foo_test_bar" since its matched 142 | // characters are in consecutive words. 143 | m.assert_better_match("foo/foo_bar_test", "foo/foo_test_bar"); 144 | // "foo/bar" should rank below all of the above since it breaks the match 145 | // across multiple path components. 146 | m.assert_better_match("foo/foo_test_bar", "foo/bar"); 147 | // "foo/foobar" should rank below all of the above since the 'b' is not a 148 | // detectable word boundary match. 149 | m.assert_better_match("foo/bar", "foo/foobar"); 150 | // "foo/abar" and "foob/ar" should rank lowest since the matched 'b' isn't 151 | // even at the beginning of the filename in either case, though it's 152 | // unspecified which of the two is higher. 153 | m.assert_better_match("foo/bar", "foo/abar"); 154 | m.assert_better_match("foo/bar", "foob/ar"); 155 | } 156 | 157 | void test_special_paths() { 158 | auto m = match_and_log({"", "/", "a/", "/a"}, "a"); 159 | 160 | m.assert_not_matched(""); 161 | m.assert_not_matched("/"); 162 | m.assert_matched("a/"); 163 | m.assert_matched("/a"); 164 | } 165 | 166 | template 167 | size_t run_test(F const& f) { 168 | try { 169 | std::printf("*** Test started\n"); 170 | f(); 171 | std::printf("*** Test passed\n"); 172 | return 0; 173 | } catch (std::exception const& ex) { 174 | std::printf("*** Test failed: %s\n", ex.what()); 175 | return 1; 176 | } 177 | } 178 | 179 | int run_all_tests() { 180 | size_t failed_tests = 0; 181 | failed_tests += run_test(test_match_order); 182 | failed_tests += run_test(test_special_paths); 183 | if (failed_tests == 0) { 184 | std::printf("*** All tests passed\n"); 185 | } else { 186 | std::printf("*** %zu tests failed\n", failed_tests); 187 | } 188 | return failed_tests == 0 ? 0 : 1; 189 | } 190 | 191 | } // namespace testing 192 | } // namespace cpsm 193 | 194 | int main(int argc, char** argv) { 195 | return cpsm::testing::run_all_tests(); 196 | } 197 | -------------------------------------------------------------------------------- /src/par_util.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef CPSM_PAR_UTIL_H_ 17 | #define CPSM_PAR_UTIL_H_ 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include "str_util.h" 24 | 25 | namespace cpsm { 26 | 27 | // Drop-in replacement wrapper around std::thread that handles exceptions safely 28 | // and joins on destruction. (See 29 | // https://akrzemi1.wordpress.com/2012/11/14/not-using-stdthread/.) 30 | class Thread { 31 | public: 32 | Thread() : has_exception_msg_(false) {} 33 | 34 | Thread(Thread&& other) = default; 35 | 36 | template 37 | explicit Thread(F&& f, Args&&... args) 38 | : has_exception_msg_(false), 39 | thread_(&run, this, f, std::forward(args)...) {} 40 | 41 | ~Thread() { 42 | if (thread_.joinable()) { 43 | thread_.join(); 44 | } 45 | } 46 | 47 | void swap(Thread& other) { thread_.swap(other.thread_); } 48 | 49 | bool joinable() const { return thread_.joinable(); } 50 | std::thread::id get_id() const { return thread_.get_id(); } 51 | std::thread::native_handle_type native_handle() { 52 | return thread_.native_handle(); 53 | } 54 | static unsigned hardware_concurrency() { 55 | return std::thread::hardware_concurrency(); 56 | } 57 | 58 | void join() { thread_.join(); } 59 | void detach() { thread_.detach(); } 60 | 61 | bool has_exception() const { return has_exception_msg_; } 62 | std::string const& exception_msg() const { return exception_msg_; } 63 | 64 | private: 65 | template 66 | static void run(Thread* thread, F const& f, Args&&... args) { 67 | try { 68 | f(std::forward(args)...); 69 | } catch (std::exception const& ex) { 70 | thread->exception_msg_ = ex.what(); 71 | thread->has_exception_msg_ = true; 72 | } catch (...) { 73 | thread->exception_msg_ = "(unknown exception)"; 74 | thread->has_exception_msg_ = true; 75 | } 76 | } 77 | 78 | std::string exception_msg_; 79 | bool has_exception_msg_; 80 | std::thread thread_; 81 | }; 82 | 83 | void swap(Thread& x, Thread& y) { 84 | x.swap(y); 85 | } 86 | 87 | } // namespace cpsm 88 | 89 | #endif /* CPSM_PAR_UTIL_H_ */ 90 | -------------------------------------------------------------------------------- /src/path_util.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef CPSM_PATH_UTIL_H_ 17 | #define CPSM_PATH_UTIL_H_ 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include "str_util.h" 26 | 27 | namespace cpsm { 28 | 29 | // PathTraits type for platform paths. 30 | struct PlatformPathTraits { 31 | // Returns true if `c` is the conventional coarsest-grained separator of 32 | // parts in a filename. 33 | static constexpr bool is_extension_separator(char const c) { 34 | return c == '.'; 35 | } 36 | 37 | // Returns true if `c` separates path components. 38 | static constexpr bool is_path_separator(char const c) { 39 | #ifdef _WIN32 40 | // TODO: Support shellslash 41 | return c == '\\'; 42 | #else 43 | return c == '/'; 44 | #endif 45 | } 46 | }; 47 | 48 | // PathTraits type for non-paths. 49 | struct NonPathTraits { 50 | static constexpr bool is_extension_separator(char const c) { return false; } 51 | static constexpr bool is_path_separator(char const c) { return false; } 52 | }; 53 | 54 | // If the given path contains a path separator, returns an iterator to after 55 | // the last path separator. Otherwise returns `first`. 56 | template 57 | InputIt path_basename(InputIt first, InputIt last) { 58 | return std::find_if(std::reverse_iterator(last), 59 | std::reverse_iterator(first), 60 | PathTraits::is_path_separator).base(); 61 | } 62 | 63 | // Returns the distance (in path components) between the two given paths. 64 | template 65 | std::size_t path_distance(InputIt1 first1, InputIt2 last1, InputIt2 first2, 66 | InputIt2 last2) { 67 | auto const mm = boost::algorithm::mismatch(first1, last1, first2, last2); 68 | if (mm.first == last1 && mm.second == last2) { 69 | return 0; 70 | } 71 | return std::count_if(mm.first, last1, PathTraits::is_path_separator) + 72 | std::count_if(mm.second, last2, PathTraits::is_path_separator) + 1; 73 | } 74 | 75 | } // namespace cpsm 76 | 77 | #endif // CPSM_PATH_UTIL_H_ 78 | -------------------------------------------------------------------------------- /src/python_extension.cc: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #define PY_SSIZE_T_CLEAN 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #include "api.h" 30 | #include "ctrlp_util.h" 31 | #include "par_util.h" 32 | #include "str_util.h" 33 | 34 | namespace { 35 | 36 | struct PyObjectDeleter { 37 | void operator()(PyObject* const p) const { Py_DECREF(p); } 38 | }; 39 | 40 | // Reference-owning, self-releasing PyObject smart pointer. 41 | typedef std::unique_ptr PyObjPtr; 42 | 43 | // Wrappers around Python 2/3 string type distinctions. 44 | 45 | inline bool PyVimString_AsStringAndSize(PyObject* obj, char** data, 46 | Py_ssize_t* size) { 47 | #if PY_MAJOR_VERSION >= 3 48 | *data = const_cast(PyUnicode_AsUTF8AndSize(obj, size)); 49 | return *data != nullptr; 50 | #else 51 | return PyString_AsStringAndSize(obj, data, size) >= 0; 52 | #endif 53 | } 54 | 55 | inline PyObject* PyVimString_FromStringAndSize(char const* data, 56 | Py_ssize_t size) { 57 | #if PY_MAJOR_VERSION >= 3 58 | return PyUnicode_FromStringAndSize(data, size); 59 | #else 60 | return PyString_FromStringAndSize(data, size); 61 | #endif 62 | } 63 | 64 | // Item type that wraps another, and also includes a pointer to a Python 65 | // object. 66 | template 67 | struct PyObjItem { 68 | using Obj = typename std::conditional::type; 69 | 70 | InnerItem inner; 71 | Obj obj; 72 | 73 | PyObjItem() {} 74 | explicit PyObjItem(InnerItem inner, Obj obj) 75 | : inner(std::move(inner)), obj(std::move(obj)) {} 76 | 77 | boost::string_ref match_key() const { return inner.match_key(); } 78 | boost::string_ref sort_key() const { return inner.sort_key(); } 79 | }; 80 | 81 | // Iterators do not necessarily hold a reference on iterated values, so we must 82 | // do so. 83 | template 84 | using PyIterCtrlPItem = 85 | PyObjItem, 86 | /* IsOwned = */ true>; 87 | 88 | // Thread-safe item source that batches items from a Python iterator. 89 | template 90 | class PyIterCtrlPMatchSource { 91 | public: 92 | using Item = PyIterCtrlPItem; 93 | 94 | explicit PyIterCtrlPMatchSource(PyObject* const iter) : iter_(iter) { 95 | if (!PyIter_Check(iter)) { 96 | throw cpsm::Error("input is not iterable"); 97 | } 98 | } 99 | 100 | bool fill(std::vector& items) { 101 | std::lock_guard lock(mu_); 102 | if (done_) { 103 | return false; 104 | } 105 | auto const add_item = [&](PyObjPtr item_obj) { 106 | if (item_obj == nullptr) { 107 | return false; 108 | } 109 | char* item_data; 110 | Py_ssize_t item_size; 111 | if (!PyVimString_AsStringAndSize(item_obj.get(), &item_data, 112 | &item_size)) { 113 | return false; 114 | } 115 | items.emplace_back( 116 | cpsm::CtrlPItem( 117 | (cpsm::StringRefItem(boost::string_ref(item_data, item_size)))), 118 | std::move(item_obj)); 119 | return true; 120 | }; 121 | for (Py_ssize_t i = 0; i < batch_size(); i++) { 122 | if (!add_item(PyObjPtr(PyIter_Next(iter_)))) { 123 | done_ = true; 124 | return false; 125 | } 126 | } 127 | return true; 128 | } 129 | 130 | static constexpr Py_ssize_t batch_size() { return 512; } 131 | 132 | private: 133 | std::mutex mu_; 134 | PyObject* const iter_; 135 | bool done_ = false; 136 | }; 137 | 138 | // Lists hold references on their elements, so we can use borrowed references. 139 | template 140 | using PyListCtrlPItem = 141 | PyObjItem, 142 | /* IsOwned = */ false>; 143 | 144 | // Thread-safe item source that batches items from a Python list. 145 | template 146 | class PyListCtrlPMatchSource { 147 | public: 148 | using Item = PyListCtrlPItem; 149 | 150 | explicit PyListCtrlPMatchSource(PyObject* const list) : list_(list) { 151 | size_ = PyList_Size(list); 152 | if (size_ < 0) { 153 | throw cpsm::Error("input is not a list"); 154 | } 155 | } 156 | 157 | bool fill(std::vector& items) { 158 | std::lock_guard lock(mu_); 159 | if (done_) { 160 | return false; 161 | } 162 | auto const add_item = [&](PyObject* item_obj) { 163 | if (item_obj == nullptr) { 164 | return false; 165 | } 166 | char* item_data; 167 | Py_ssize_t item_size; 168 | if (!PyVimString_AsStringAndSize(item_obj, &item_data, &item_size)) { 169 | return false; 170 | } 171 | items.emplace_back( 172 | cpsm::CtrlPItem( 173 | (cpsm::StringRefItem(boost::string_ref(item_data, item_size)))), 174 | item_obj); 175 | return true; 176 | }; 177 | Py_ssize_t const max = std::min(i_ + batch_size(), size_); 178 | for (; i_ < max; i_++) { 179 | if (!add_item(PyList_GetItem(list_, i_))) { 180 | done_ = true; 181 | return false; 182 | } 183 | } 184 | return i_ != size_; 185 | } 186 | 187 | static constexpr Py_ssize_t batch_size() { return 512; } 188 | 189 | private: 190 | std::mutex mu_; 191 | PyObject* const list_; 192 | Py_ssize_t i_ = 0; 193 | Py_ssize_t size_ = 0; 194 | bool done_ = false; 195 | }; 196 | 197 | // `dst` must be a functor compatible with signature `void(boost::string_ref 198 | // item, boost::string_ref match_key, PyObject* obj, cpsm::MatchInfo* info)`. 199 | template 200 | void for_each_pyctrlp_match(boost::string_ref const query, 201 | cpsm::Options const& opts, 202 | cpsm::CtrlPMatchMode const match_mode, 203 | PyObject* const items_iter, Sink&& dst) { 204 | bool const is_list = PyList_Check(items_iter); 205 | #define DO_MATCH_WITH(MMODE) \ 206 | if (is_list) { \ 207 | cpsm::for_each_match>( \ 208 | query, opts, PyListCtrlPMatchSource(items_iter), \ 209 | [&](PyListCtrlPItem const& item, cpsm::MatchInfo* const info) { \ 210 | dst(item.inner.inner.item(), item.match_key(), item.obj, info); \ 211 | }); \ 212 | } else { \ 213 | cpsm::for_each_match>( \ 214 | query, opts, PyIterCtrlPMatchSource(items_iter), \ 215 | [&](PyIterCtrlPItem const& item, cpsm::MatchInfo* const info) { \ 216 | dst(item.inner.inner.item(), item.match_key(), item.obj.get(), \ 217 | info); \ 218 | }); \ 219 | } 220 | switch (match_mode) { 221 | case cpsm::CtrlPMatchMode::FULL_LINE: 222 | DO_MATCH_WITH(cpsm::FullLineMatch); 223 | break; 224 | case cpsm::CtrlPMatchMode::FILENAME_ONLY: 225 | DO_MATCH_WITH(cpsm::FilenameOnlyMatch); 226 | break; 227 | case cpsm::CtrlPMatchMode::FIRST_NON_TAB: 228 | DO_MATCH_WITH(cpsm::FirstNonTabMatch); 229 | break; 230 | case cpsm::CtrlPMatchMode::UNTIL_LAST_TAB: 231 | DO_MATCH_WITH(cpsm::UntilLastTabMatch); 232 | break; 233 | } 234 | #undef DO_MATCH_WITH 235 | }; 236 | 237 | unsigned int get_nr_threads(unsigned int const max_threads) { 238 | std::size_t nr_threads = cpsm::Thread::hardware_concurrency(); 239 | if (!nr_threads) { 240 | nr_threads = 1; 241 | } 242 | if (max_threads && (nr_threads > max_threads)) { 243 | nr_threads = max_threads; 244 | } 245 | return nr_threads; 246 | } 247 | 248 | } // namespace 249 | 250 | extern "C" { 251 | 252 | constexpr char CTRLP_MATCH_DOC[] = 253 | "ctrlp_match(\n" 254 | " items, query, limit=-1, mmode=None, ispath=False, crfile=None,\n" 255 | " highlight_mode=None, match_crfile=False, max_threads=0,\n" 256 | " query_inverting_delimiter=None, unicode=False)\n" 257 | "\n" 258 | "Returns a tuple `(results, regexes)` containing information about the items\n" 259 | "in `items` that match `query`, in order of descending match quality.\n" 260 | "\n" 261 | "Options:\n" 262 | "limit -- if positive, the maximum number of results to return\n" 263 | "mmode -- CtrlP match mode (default 'full-line', i.e. full path mode)\n" 264 | "ispath -- if true, all items are paths\n" 265 | "crfile -- if set, the currently open file\n" 266 | "highlight_mode -- controls `regexes`, see README\n" 267 | "match_crfile -- if false, never match `crfile`\n" 268 | "max_threads -- if positive, limit on the number of matcher threads\n" 269 | "query_inverting_delimiter -- see README\n" 270 | "regex_line_prefix -- prefix for each regex in `regexes`\n" 271 | "unicode -- if true, all items are UTF-8-encoded"; 272 | 273 | static PyObject* cpsm_ctrlp_match(PyObject* self, PyObject* args, 274 | PyObject* kwargs) { 275 | static char const* kwlist[] = {"items", "query", "limit", "mmode", "ispath", 276 | "crfile", "highlight_mode", "match_crfile", 277 | "max_threads", "query_inverting_delimiter", 278 | "regex_line_prefix", "unicode", nullptr}; 279 | // Required parameters. 280 | PyObject* items_obj; 281 | char const* query_data; 282 | Py_ssize_t query_size; 283 | // CtrlP-provided options. 284 | int limit_int = -1; 285 | char const* mmode_data = nullptr; 286 | Py_ssize_t mmode_size = 0; 287 | int is_path = 0; 288 | char const* crfile_data = nullptr; 289 | Py_ssize_t crfile_size = 0; 290 | // cpsm-specific options. 291 | char const* highlight_mode_data = nullptr; 292 | Py_ssize_t highlight_mode_size = 0; 293 | int match_crfile = 0; 294 | int max_threads_int = 0; 295 | char const* query_inverting_delimiter_data = nullptr; 296 | Py_ssize_t query_inverting_delimiter_size = 0; 297 | char const* regex_line_prefix_data = nullptr; 298 | Py_ssize_t regex_line_prefix_size = 0; 299 | int unicode = 0; 300 | if (!PyArg_ParseTupleAndKeywords( 301 | args, kwargs, "Os#|iz#iz#z#iiz#z#i", const_cast(kwlist), 302 | &items_obj, &query_data, &query_size, &limit_int, &mmode_data, 303 | &mmode_size, &is_path, &crfile_data, &crfile_size, 304 | &highlight_mode_data, &highlight_mode_size, &match_crfile, 305 | &max_threads_int, &query_inverting_delimiter_data, 306 | &query_inverting_delimiter_size, ®ex_line_prefix_data, 307 | ®ex_line_prefix_size, &unicode)) { 308 | return nullptr; 309 | } 310 | 311 | try { 312 | std::string query(query_data, query_size); 313 | boost::string_ref query_inverting_delimiter(query_inverting_delimiter_data, 314 | query_inverting_delimiter_size); 315 | if (!query_inverting_delimiter.empty()) { 316 | if (query_inverting_delimiter.size() > 1) { 317 | throw cpsm::Error( 318 | "query inverting delimiter must be a single character"); 319 | } 320 | query = cpsm::str_join(boost::adaptors::reverse(cpsm::str_split( 321 | query, query_inverting_delimiter[0])), 322 | ""); 323 | } 324 | 325 | auto const mopts = 326 | cpsm::Options() 327 | .set_crfile(boost::string_ref(crfile_data, crfile_size)) 328 | .set_limit((limit_int >= 0) ? std::size_t(limit_int) : 0) 329 | .set_match_crfile(match_crfile) 330 | .set_nr_threads( 331 | get_nr_threads((max_threads_int >= 0) 332 | ? static_cast(max_threads_int) 333 | : 0)) 334 | .set_path(is_path) 335 | .set_unicode(unicode) 336 | .set_want_match_info(true); 337 | boost::string_ref const highlight_mode(highlight_mode_data, 338 | highlight_mode_size); 339 | 340 | PyObjPtr output_tuple(PyTuple_New(2)); 341 | if (!output_tuple) { 342 | return nullptr; 343 | } 344 | PyObjPtr matches_list(PyList_New(0)); 345 | if (!matches_list) { 346 | return nullptr; 347 | } 348 | std::vector highlight_regexes; 349 | for_each_pyctrlp_match( 350 | query, mopts, 351 | cpsm::parse_ctrlp_match_mode(boost::string_ref(mmode_data, mmode_size)), 352 | items_obj, 353 | [&](boost::string_ref const item, boost::string_ref const match_key, 354 | PyObject* const obj, cpsm::MatchInfo* const info) { 355 | if (PyList_Append(matches_list.get(), obj) < 0) { 356 | throw cpsm::Error("match appending failed"); 357 | } 358 | auto match_positions = info->match_positions(); 359 | // Adjust match positions to account for substringing. 360 | std::size_t const delta = match_key.data() - item.data(); 361 | for (auto& pos : match_positions) { 362 | pos += delta; 363 | } 364 | cpsm::get_highlight_regexes( 365 | highlight_mode, item, match_positions, highlight_regexes, 366 | boost::string_ref(regex_line_prefix_data, 367 | regex_line_prefix_size)); 368 | }); 369 | if (PyTuple_SetItem(output_tuple.get(), 0, matches_list.release())) { 370 | return nullptr; 371 | } 372 | PyObjPtr regexes_list(PyList_New(0)); 373 | if (!regexes_list) { 374 | return nullptr; 375 | } 376 | for (auto const& regex : highlight_regexes) { 377 | PyObjPtr regex_str( 378 | PyVimString_FromStringAndSize(regex.data(), regex.size())); 379 | if (!regex_str) { 380 | return nullptr; 381 | } 382 | if (PyList_Append(regexes_list.get(), regex_str.get()) < 0) { 383 | return nullptr; 384 | } 385 | } 386 | if (PyTuple_SetItem(output_tuple.get(), 1, regexes_list.release())) { 387 | return nullptr; 388 | } 389 | return output_tuple.release(); 390 | } catch (std::exception const& ex) { 391 | PyErr_SetString(PyExc_RuntimeError, ex.what()); 392 | return nullptr; 393 | } 394 | } 395 | 396 | static PyMethodDef cpsm_py_methods[] = { 397 | {"ctrlp_match", reinterpret_cast(cpsm_ctrlp_match), 398 | METH_VARARGS | METH_KEYWORDS, CTRLP_MATCH_DOC}, 399 | {nullptr, nullptr, 0, nullptr}}; 400 | 401 | #if PY_MAJOR_VERSION >= 3 402 | static struct PyModuleDef moduledef = { 403 | PyModuleDef_HEAD_INIT, 404 | "cpsm_py", 405 | NULL, 406 | -1, 407 | cpsm_py_methods, 408 | NULL, 409 | NULL, 410 | NULL, 411 | NULL 412 | }; 413 | 414 | PyMODINIT_FUNC PyInit_cpsm_py() { return PyModule_Create(&moduledef); } 415 | #else 416 | PyMODINIT_FUNC initcpsm_py() { Py_InitModule("cpsm_py", cpsm_py_methods); } 417 | #endif 418 | 419 | } /* extern "C" */ 420 | -------------------------------------------------------------------------------- /src/str_util.cc: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "str_util.h" 17 | 18 | namespace cpsm { 19 | 20 | std::vector str_split(boost::string_ref str, 21 | char const delimiter) { 22 | std::vector splits; 23 | while (true) { 24 | auto const dpos = str.find_first_of(delimiter); 25 | if (dpos == boost::string_ref::npos) { 26 | break; 27 | } 28 | splits.push_back(str.substr(0, dpos)); 29 | str.remove_prefix(dpos+1); 30 | } 31 | splits.push_back(str); 32 | return splits; 33 | } 34 | 35 | } // namespace cpsm 36 | -------------------------------------------------------------------------------- /src/str_util.h: -------------------------------------------------------------------------------- 1 | // cpsm - fuzzy path matcher 2 | // Copyright (C) 2015 the Authors 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef CPSM_STR_UTIL_H_ 17 | #define CPSM_STR_UTIL_H_ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #if CPSM_CONFIG_ICU 29 | #include 30 | #endif 31 | 32 | namespace cpsm { 33 | 34 | inline void str_cat_impl(std::stringstream& ss) {} 35 | 36 | template 37 | void str_cat_impl(std::stringstream& ss, T const& x, Args... args) { 38 | ss << x; 39 | str_cat_impl(ss, args...); 40 | } 41 | 42 | // Concatenates an arbitrary number of arguments that can be stringifed through 43 | // a stringstream. 44 | template 45 | std::string str_cat(Args... args) { 46 | std::stringstream ss; 47 | str_cat_impl(ss, args...); 48 | return ss.str(); 49 | } 50 | 51 | // Splits a string into substrings separated by a delimiter. 52 | std::vector str_split(boost::string_ref str, 53 | char const delimiter); 54 | 55 | // Joins an iterable over a type that can be stringified through a stringstream 56 | // with the given separator. 57 | template 58 | std::string str_join(T const& xs, boost::string_ref const sep) { 59 | std::stringstream ss; 60 | boost::string_ref s; 61 | for (auto const& x : xs) { 62 | ss << s << x; 63 | s = sep; 64 | } 65 | return ss.str(); 66 | } 67 | 68 | // Exception type used by this package. 69 | class Error : public std::exception { 70 | public: 71 | Error() : msg_("(unknown error)") {} 72 | 73 | template 74 | explicit Error(Args... args) 75 | : msg_(str_cat(args...)) {} 76 | 77 | char const* what() const noexcept override { return msg_.c_str(); } 78 | 79 | private: 80 | std::string msg_; 81 | }; 82 | 83 | // Returns a new `std::string` that is a copy of the data viewed by the given 84 | // `boost::string_ref`. 85 | inline std::string copy_string_ref(boost::string_ref const sref) { 86 | return std::string(sref.data(), sref.size()); 87 | } 88 | 89 | // Constructs a copy of the range defined by the given iterators over a char[]. 90 | template 91 | boost::string_ref ref_str_iters(It first, It last) { 92 | return boost::string_ref(&*first, last - first); 93 | } 94 | 95 | // StringTraits type for paths that are 7-bit clean, which is the common case 96 | // for source code. 97 | struct SimpleStringTraits { 98 | typedef char Char; 99 | 100 | // For each character `c` in `str`, invokes `f(c, pos, len)` where `pos` is 101 | // the offset in bytes of the first byte corresponding to `c` in `str` and 102 | // `len` is its length in bytes. 103 | template 104 | static void for_each_char(boost::string_ref const str, F const& f) { 105 | for (std::size_t i = 0, end = str.size(); i < end; i++) { 106 | f(str[i], i, 1); 107 | } 108 | } 109 | 110 | // Returns true if the given character represents a letter or number. 111 | static constexpr bool is_alphanumeric(Char const c) { 112 | return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || 113 | (c >= 'A' && c <= 'Z'); 114 | } 115 | 116 | // Returns true if the given character represents an uppercase letter. 117 | static constexpr bool is_uppercase(Char const c) { 118 | return c >= 'A' && c <= 'Z'; 119 | } 120 | 121 | // Returns the lowercase version of the given uppercase letter. 122 | static constexpr Char uppercase_to_lowercase(Char const c) { 123 | return c + ('a' - 'A'); 124 | } 125 | }; 126 | 127 | template 128 | void decode_to(boost::string_ref const str, 129 | std::vector& chars) { 130 | chars.reserve(str.size()); 131 | StringTraits::for_each_char(str, [&](typename StringTraits::Char c, int, 132 | int) { chars.push_back(c); }); 133 | } 134 | 135 | template 136 | std::vector decode(boost::string_ref const str) { 137 | std::vector vec; 138 | decode_to(str, vec); 139 | return vec; 140 | } 141 | 142 | #if CPSM_CONFIG_ICU 143 | 144 | // StringTraits type for UTF-8-encoded strings. Non-UTF-8 bytes are decoded as 145 | // the low surrogate 0xdc00+(byte) so that a match can still be attempted for 146 | // malformed strings. 147 | struct Utf8StringTraits { 148 | typedef char32_t Char; 149 | 150 | template 151 | static void for_each_char(boost::string_ref str, F const& f) { 152 | std::size_t pos = 0; 153 | char32_t b0 = 0; 154 | // Even though most of this function deals with byte-sized quantities, use 155 | // char32_t throughout to avoid casting. 156 | auto const lookahead = [&](size_t n) -> char32_t { 157 | if (n >= str.size()) { 158 | return 0; 159 | } 160 | return str[n]; 161 | }; 162 | auto const decode_as = [&](char32_t c, std::size_t len) { 163 | f(c, pos, len); 164 | str.remove_prefix(len); 165 | pos += len; 166 | }; 167 | auto const invalid = [&]() { decode_as(0xdc00 + b0, 1); }; 168 | auto const is_continuation = 169 | [](char32_t b) -> bool { return (b & 0xc0) == 0x80; }; 170 | while (!str.empty()) { 171 | auto const b0 = lookahead(0); 172 | if (b0 == 0x00) { 173 | // Input is a string_ref, not a null-terminated string - premature null? 174 | invalid(); 175 | } else if (b0 < 0x80) { 176 | // 1-byte character 177 | decode_as(b0, 1); 178 | } else if (b0 < 0xc2) { 179 | // Continuation or overlong encoding 180 | invalid(); 181 | } else if (b0 < 0xe0) { 182 | // 2-byte sequence 183 | auto const b1 = lookahead(1); 184 | if (!is_continuation(b1)) { 185 | invalid(); 186 | } else { 187 | decode_as(((b0 & 0x1f) << 6) | (b1 & 0x3f), 2); 188 | } 189 | } else if (b0 < 0xf0) { 190 | // 3-byte sequence 191 | auto const b1 = lookahead(1), b2 = lookahead(2); 192 | if (!is_continuation(b1) || !is_continuation(b2)) { 193 | invalid(); 194 | } else if (b0 == 0xe0 && b1 < 0xa0) { 195 | // Overlong encoding 196 | invalid(); 197 | } else { 198 | decode_as(((b0 & 0x0f) << 12) | ((b1 & 0x3f) << 6) | (b2 & 0x3f), 3); 199 | } 200 | } else if (b0 < 0xf5) { 201 | // 4-byte sequence 202 | auto const b1 = lookahead(1), b2 = lookahead(2), b3 = lookahead(3); 203 | if (!is_continuation(b1) || !is_continuation(b2) || 204 | !is_continuation(b3)) { 205 | invalid(); 206 | } else if (b0 == 0xf0 && b1 < 0x90) { 207 | // Overlong encoding 208 | invalid(); 209 | } else if (b0 == 0xf4 && b1 >= 0x90) { 210 | // > U+10FFFF 211 | invalid(); 212 | } else { 213 | decode_as(((b0 & 0x07) << 18) | ((b1 & 0x3f) << 12) | 214 | ((b2 & 0x3f) << 6) | (b3 & 0x3f), 215 | 4); 216 | } 217 | } else { 218 | // > U+10FFFF 219 | invalid(); 220 | } 221 | } 222 | } 223 | 224 | static bool is_alphanumeric(Char const c) { 225 | return u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM); 226 | } 227 | 228 | static bool is_uppercase(Char const c) { 229 | return u_hasBinaryProperty(c, UCHAR_UPPERCASE); 230 | } 231 | 232 | static Char uppercase_to_lowercase(Char const c) { 233 | return u_tolower(c); 234 | } 235 | }; 236 | 237 | #else // CPSM_CONFIG_ICU 238 | 239 | struct Utf8StringTraits { 240 | typedef char32_t Char; 241 | 242 | [[noreturn]] static void unimplemented() { 243 | throw Error("cpsm built without Unicode support"); 244 | } 245 | 246 | template 247 | static void for_each_char(boost::string_ref str, F const& f) { 248 | unimplemented(); 249 | } 250 | 251 | static bool is_alphanumeric(Char const c) { 252 | unimplemented(); 253 | } 254 | 255 | static bool is_uppercase(Char const c) { 256 | unimplemented(); 257 | } 258 | 259 | static Char uppercase_to_lowercase(Char const c) { 260 | unimplemented(); 261 | } 262 | }; 263 | 264 | #endif // CPSM_CONFIG_ICU 265 | 266 | } // namespace cpsm 267 | 268 | #endif /* CPSM_STR_UTIL_H_ */ 269 | --------------------------------------------------------------------------------