├── .clang-format
├── .gitignore
├── .travis.yml
├── .travis
    ├── install.sh
    └── script.sh
├── CMakeLists.txt
├── LICENSE
├── NOTICE
├── README.md
├── autoload
    ├── cpsm.py
    └── cpsm.vim
├── bench
    ├── bench.py
    ├── bench_cpsm.py
    └── linuxclock.py
├── cmake
    ├── FindICU.cmake
    └── FindPythonConfig.cmake
├── install.sh
└── src
    ├── .ycm_extra_conf.py
    ├── api.h
    ├── cpsm_cli_main.cc
    ├── ctrlp_util.cc
    ├── ctrlp_util.h
    ├── matcher.h
    ├── matcher_test.cc
    ├── par_util.h
    ├── path_util.h
    ├── python_extension.cc
    ├── str_util.cc
    └── str_util.h


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language:        Cpp
 3 | # BasedOnStyle:  Google
 4 | AccessModifierOffset: -1
 5 | AlignAfterOpenBracket: true
 6 | AlignEscapedNewlinesLeft: true
 7 | AlignOperands:   true
 8 | AlignTrailingComments: true
 9 | AllowAllParametersOfDeclarationOnNextLine: true
10 | AllowShortBlocksOnASingleLine: false
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortIfStatementsOnASingleLine: true
13 | AllowShortLoopsOnASingleLine: true
14 | AllowShortFunctionsOnASingleLine: All
15 | AlwaysBreakAfterDefinitionReturnType: false
16 | AlwaysBreakTemplateDeclarations: true
17 | AlwaysBreakBeforeMultilineStrings: true
18 | BreakBeforeBinaryOperators: None
19 | BreakBeforeTernaryOperators: true
20 | BreakConstructorInitializersBeforeComma: false
21 | BinPackParameters: true
22 | BinPackArguments: true
23 | ColumnLimit:     80
24 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
25 | ConstructorInitializerIndentWidth: 4
26 | DerivePointerAlignment: true
27 | ExperimentalAutoDetectBinPacking: false
28 | IndentCaseLabels: true
29 | IndentWrappedFunctionNames: false
30 | IndentFunctionDeclarationAfterType: false
31 | MaxEmptyLinesToKeep: 1
32 | KeepEmptyLinesAtTheStartOfBlocks: false
33 | NamespaceIndentation: None
34 | ObjCBlockIndentWidth: 2
35 | ObjCSpaceAfterProperty: false
36 | ObjCSpaceBeforeProtocolList: false
37 | PenaltyBreakBeforeFirstCallParameter: 1
38 | PenaltyBreakComment: 300
39 | PenaltyBreakString: 1000
40 | PenaltyBreakFirstLessLess: 120
41 | PenaltyExcessCharacter: 1000000
42 | PenaltyReturnTypeOnItsOwnLine: 200
43 | PointerAlignment: Left
44 | SpacesBeforeTrailingComments: 2
45 | Cpp11BracedListStyle: true
46 | Standard:        Auto
47 | IndentWidth:     2
48 | TabWidth:        8
49 | UseTab:          Never
50 | BreakBeforeBraces: Attach
51 | SpacesInParentheses: false
52 | SpacesInSquareBrackets: false
53 | SpacesInAngles:  false
54 | SpaceInEmptyParentheses: false
55 | SpacesInCStyleCastParentheses: false
56 | SpaceAfterCStyleCast: false
57 | SpacesInContainerLiterals: true
58 | SpaceBeforeAssignmentOperators: true
59 | ContinuationIndentWidth: 4
60 | CommentPragmas:  '^ IWYU pragma:'
61 | ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
62 | SpaceBeforeParens: ControlStatements
63 | DisableFormat:   false
64 | ...
65 | 
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Output directories
 2 | bin/
 3 | build/
 4 | 
 5 | # Vim
 6 | *[._]s[a-w][a-z]
 7 | 
 8 | # C/C++
 9 | *.o
10 | *.so
11 | 
12 | # Python
13 | *.py[cod]
14 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Language must be "generic" to get minimal Trusty image.
 2 | language: generic
 3 | 
 4 | env:
 5 |   - TEST_PY=py2
 6 |   - TEST_PY=py3
 7 | 
 8 | os:
 9 |   - linux
10 |   - osx
11 | 
12 | # Need Trusty for version of GCC that supports C++11.
13 | dist: trusty
14 | sudo: required
15 | addons:
16 |   apt:
17 |     packages:
18 |       - cmake
19 |       - libboost-dev
20 |       - libboost-program-options-dev
21 |       - python-dev
22 |       - python3-dev
23 | 
24 | install: ./.travis/install.sh
25 | 
26 | script: ./.travis/script.sh
27 | 


--------------------------------------------------------------------------------
/.travis/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | if [[ "${TRAVIS_OS_NAME}" == 'osx' ]]; then
 8 |     brew update
 9 |     # Skip updating Boost, since doing so takes a long time and we'd like to
10 |     # know about compatibility breakage anyway.
11 |     brew install cmake || brew outdated cmake || brew upgrade cmake
12 |     case "${TEST_PY}" in
13 |         py2) brew install python || brew outdated python || brew upgrade python;;
14 |         py3) brew install python3 || brew outdated python3 || brew upgrade python3;;
15 |     esac
16 | fi
17 | 


--------------------------------------------------------------------------------
/.travis/script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | case "${TEST_PY}" in
 8 |     py2) PY3=OFF ./install.sh;;
 9 |     py3) PY3=ON ./install.sh;;
10 |     *) echo "Unknown TEST_PY: ${TEST_PY}"; false;;
11 | esac
12 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.12)
 2 | 
 3 | project(cpsm)
 4 | option(PY3 "Build for python3 instead of python2." OFF)
 5 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 6 | include_directories(${PROJECT_SOURCE_DIR}/src)
 7 | set(CMAKE_BUILD_TYPE Release)
 8 | if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU|Clang|Intel")
 9 |   add_compile_options(-std=c++0x)
10 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
11 |   # Versions of MSVC that support C++11 at all support it by default, so hope
12 |   # for the best...
13 | else()
14 |   message(FATAL_ERROR "Unknown compiler - don't know how to turn on C++11.")
15 | endif()
16 | set(CMAKE_MACOSX_RPATH 1)
17 | 
18 | find_package(Threads REQUIRED)
19 | 
20 | set(Boost_USE_MULTITHREADED ON)
21 | find_package(Boost REQUIRED COMPONENTS program_options)
22 | include_directories(${Boost_INCLUDE_DIRS})
23 | 
24 | if(PY3)
25 |     set(Python_ADDITIONAL_VERSIONS 3.8 3.7 3.6 3.5 3.4 3.3)
26 |     find_package(PythonInterp 3 REQUIRED)
27 | else()
28 |     set(Python_ADDITIONAL_VERSIONS 2.7 2.6)
29 |     find_package(PythonInterp REQUIRED)
30 | endif()
31 | find_package(PythonConfig REQUIRED)
32 | 
33 | find_package(ICU)
34 | if(ICU_FOUND)
35 |   include_directories(${ICU_INCLUDE_DIRS})
36 |   add_definitions(-DCPSM_CONFIG_ICU=1)
37 | endif()
38 | 
39 | add_library(cpsm_core src/str_util.cc)
40 | if(ICU_FOUND)
41 |   target_link_libraries(cpsm_core ${ICU_LIBRARIES})
42 | endif()
43 | target_link_libraries(cpsm_core ${CMAKE_THREAD_LIBS_INIT})
44 | set_target_properties(cpsm_core PROPERTIES COMPILE_FLAGS "-fPIC")
45 | 
46 | add_library(cpsm_py SHARED src/ctrlp_util.cc src/python_extension.cc)
47 | target_link_libraries(cpsm_py cpsm_core)
48 | set_target_properties(cpsm_py PROPERTIES COMPILE_FLAGS ${PYTHON_COMPILE_FLAGS})
49 | set_target_properties(cpsm_py PROPERTIES LINK_FLAGS ${PYTHON_LINK_FLAGS})
50 | set_target_properties(cpsm_py PROPERTIES PREFIX "")
51 | if(APPLE)
52 |   set_target_properties(cpsm_py PROPERTIES SUFFIX ".so")
53 | endif()
54 | install(TARGETS cpsm_py DESTINATION ${PROJECT_SOURCE_DIR}/autoload)
55 | 
56 | add_executable(cpsm_cli src/cpsm_cli_main.cc)
57 | target_link_libraries(cpsm_cli cpsm_core ${Boost_PROGRAM_OPTIONS_LIBRARIES})
58 | install(TARGETS cpsm_cli DESTINATION ${PROJECT_SOURCE_DIR}/bin)
59 | 
60 | enable_testing()
61 | 
62 | add_executable(matcher_test src/matcher_test.cc)
63 | target_link_libraries(matcher_test cpsm_core)
64 | add_test(matcher_test matcher_test)
65 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | nixprime/cpsm
2 | Copyright 2015 the Authors
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | cpsm
  2 | ====
  3 | 
  4 | [![Build Status](https://travis-ci.org/nixprime/cpsm.svg?branch=master)](https://travis-ci.org/nixprime/cpsm)
  5 | 
  6 | cpsm is a matcher for [CtrlP][]. Although it supports all kinds of queries, it
  7 | is highly optimized for file paths (and, to a lesser extent, similar strings
  8 | like identifiers in source code).
  9 | 
 10 | Motivation
 11 | ----------
 12 | 
 13 | There are a lot of different way to manage multiple files in Vim. The goal of
 14 | cpsm is to enable a particular one based on CtrlP:
 15 | 
 16 | 1. Completely forget about the current set of open buffers.
 17 | 
 18 | 2. When you want to open a file, invoke CtrlP and type - at most - a handful of
 19 |    immediately obvious letters in the file's name or path, like the beginning
 20 |    of its filename.
 21 | 
 22 | 3. Get immediate visual feedback from CtrlP as to whether or not it has
 23 |    correctly determined what file you want.
 24 | 
 25 | 4. Hit Enter to open the file you wanted in the current window.
 26 | 
 27 | To achieve this, cpsm needs to deliver:
 28 | 
 29 | - high quality search results (at sufficiently high levels of quality, it's
 30 |   possible to enter a short query, hit Enter without needing to look at and
 31 |   mentally parse the top match, and have a reasonable amount of confidence that
 32 |   CtrlP/cpsm got your file right anyway)
 33 | 
 34 | - with as little user input as possible (every keystroke matters because of how
 35 |   common switching between files is)
 36 | 
 37 | - with as little latency as possible (to support scaling to very large, and
 38 |   especially very deeply nested, code bases with very long pathnames)
 39 | 
 40 | See the "Performance" section below for both search quality and time
 41 | comparisons to other matchers.
 42 | 
 43 | Requirements
 44 | ------------
 45 | 
 46 | - Vim 7.4, compiled with the `+python` flag.
 47 | 
 48 | - A C++ compiler supporting C++11.
 49 | 
 50 | - Boost (Ubuntu: package `libboost-all-dev`).
 51 | 
 52 | - CMake (Ubuntu: package `cmake`).
 53 | 
 54 | - Python headers (Ubuntu: package `python-dev`).
 55 | 
 56 | - Optional, required for Unicode support: ICU (Ubuntu: package `libicu-dev`).
 57 | 
 58 | Pyenv users on Mac OS X: cpsm requires dynamic Python libraries, which pyenv
 59 | does not build by default. If CMake indicates that it's using static libraries,
 60 | e.g.:
 61 | 
 62 | ```
 63 | -- Found PythonLibs: /Users/<username>/.pyenv/versions/3.5.2/lib/python3.5/config-3.5m/libpython3.5m.a
 64 | ```
 65 | 
 66 | Rebuild with dynamic library support by running `env
 67 | PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install <version>`.
 68 | 
 69 | Python 3 Support
 70 | ----------------
 71 | 
 72 | If your Vim is compiled against Python 3 instead (`+python3` flag) the install
 73 | script should detect this in most cases and everything should just work. You may
 74 | need different python headers installed (e.g. `python3-dev` on Ubuntu).
 75 | 
 76 | If the detection does not work for any reason you can set `PY3=ON` or `PY3=OFF`
 77 | as appropriate when running `./install.sh` to override it.
 78 | 
 79 | Installation
 80 | ------------
 81 | 
 82 | 1. Install cpsm using your favorite Vim package manager. For example, with
 83 |    [Vundle](http://github.com/gmarik/Vundle.vim), this consists of adding:
 84 | 
 85 |         Vundle 'nixprime/cpsm'
 86 | 
 87 |    to your `vimrc` and then running `:PluginInstall` from Vim.
 88 | 
 89 | 2. Build the Python module. On Linux, `cd` into `~/.vim/bundle/cpsm` and run
 90 |    `./install.sh`. Otherwise, peek inside `install.sh` and see what it does.
 91 | 
 92 | 3. Add:
 93 | 
 94 |         let g:ctrlp_match_func = {'match': 'cpsm#CtrlPMatch'}
 95 | 
 96 |    to your `vimrc`.
 97 | 
 98 | Options
 99 | -------
100 | 
101 | All of the following options are set by adding
102 | 
103 |     let (option name) = (option value)
104 | 
105 | to your .vimrc.
106 | 
107 | - As shown below, cpsm will still attempt to perform matching on an empty query
108 |   based on the open file in the current buffer. This interacts badly with e.g.
109 |   CtrlPMRU. To disable all matching on empty queries, set
110 |   `g:cpsm_match_empty_query` to 0. If you want empty query matching to only be
111 |   disabled for MRU mode, it's recommended that you configure this in your
112 |   bindings, e.g.:
113 | 
114 |         nnoremap <silent> <C-o> :let g:cpsm_match_empty_query = 0<CR>:CtrlPMRU<CR>
115 |         nnoremap <silent> <C-p> :let g:cpsm_match_empty_query = 1<CR>:CtrlP<CR>
116 | 
117 |   Note that if you do the above, you may need to prevent CtrlP from overriding
118 |   your binding by setting `let g:ctrlp_map = ''`.
119 | 
120 | - `g:cpsm_highlight_mode` controls how matches are highlighted. Valid highlight
121 |   modes are:
122 | 
123 |   - "none": Do not highlight any match characters.
124 | 
125 |   - "basic": Highlight the entire region between the leftmost and rightmost
126 |     matched characters.
127 | 
128 |   - "detailed": Highlight each matched character.
129 | 
130 |   The default is "detailed". The highlight group used to highlight matched
131 |   characters is "CtrlPMatch" (the same as for CtrlP's default matcher).
132 | 
133 | - By default, cpsm will automatically detect the number of matcher threads
134 |   based on the available hardware concurrency. To limit the number of threads
135 |   that cpsm can use, set `g:cpsm_max_threads`.
136 | 
137 | - When `g:cpsm_query_inverting_delimiter` is set to a single character, it can
138 |   be used in a query to move the part of the query after it to the part of the
139 |   query before it. For example, if `g:cpsm_query_inverting_delimiter` is a
140 |   space, then:
141 | 
142 |   - A query of "foo" is matched normally.
143 | 
144 |   - A query of "foo bar" is matched as if it were "barfoo".
145 | 
146 |   - A query of "foo bar qux" is matched as if it were "quxbarfoo".
147 | 
148 |   If `g:cpsm_query_inverting_delimiter` is unset or empty, this feature is
149 |   disabled.
150 | 
151 | - To enable Unicode support, set `g:cpsm_unicode` to 1. Unicode support is
152 |   currently very limited, and consists mostly of parsing input strings as UTF-8
153 |   and handling the case of non-ASCII letters correctly.
154 | 
155 | In addition, cpsm respects the value set for `g:ctrlp_match_current_file`.
156 | 
157 | Performance
158 | -----------
159 | 
160 | - The matchers in this comparison:
161 | 
162 |   - cpsm: cpsm in its default configuration, as accessed through the
163 |     cpsm_py Python extension (the same way the Vim plugin works)
164 | 
165 |   - ctrlp-cmatcher: https://github.com/JazzCore/ctrlp-cmatcher/
166 | 
167 |   - ctrlp-py-matcher: https://github.com/FelikZ/ctrlp-py-matcher
168 | 
169 |   - ctrlp: the default CtrlP matcher
170 | 
171 |   - fzf: https://github.com/junegunn/fzf
172 | 
173 | - All data is measured on Ubuntu 14.04, running in a VirtualBox VM in a Windows
174 |   7 host, on an Intel i5-4670K, with all 4 CPUs visible to the VM. Both the
175 |   host and the guest are relatively quiescent while benchmarking.
176 | 
177 | - The search corpus consists of the 48728 files in a clean Linux kernel source
178 |   repository checked out at the v4.0 tag, as collected by running `ag "" -i
179 |   --nocolor --nogroup --hidden --ignore .git -g ""`.
180 | 
181 | - For all CtrlP-based matchers, the match mode is "full-line" (the default) and
182 |   the limit is 10 (also the default). ctrlp-cmatcher only uses the current
183 |   filename to remove it from the list of candidate items; ctrlp-py-matcher
184 |   doesn't use it at all; there doesn't seem to be a way to pass this
185 |   information to fzf.
186 | 
187 | - All times are averages over 100 runs. No timing information is available for
188 |   the default CtrlP matcher or fzf because I can't figure out how to run either
189 |   in a single-shot standalone configuration. (A quick search finds claims that
190 |   ctrlp-cmatcher and ctrlp-py-matcher are both about an order of magnitude
191 |   faster than the default matcher. YMMV.) cpsm times include both the default
192 |   configuration (automatic selection of number of matcher threads) and with
193 |   `max_threads` set to 1.
194 | 
195 | - Results (given as the best match and the average time to return matches):
196 | 
197 |   - Query "", current file "":
198 | 
199 |     - cpsm: "Kbuild"; 2.863ms (9.946ms with 1 thread)
200 | 
201 |     - ctrlp: "security/keys/encrypted-keys/Makefile"
202 | 
203 |     - fzf: "COPYING"
204 | 
205 |     - All others: "security/capability.c" in roughly zero time
206 | 
207 |     - Only cpsm and fzf do any ranking; cpsm is falling back on the shortest
208 |       filename in the closest directory to the current file (which is the
209 |       repository's root), while fzf picks the lexicographically lowest filename
210 |       in the root directory.
211 | 
212 |     - I think the default CtrlP matcher is returning a different result simply
213 |       because it gets filenames in a slightly different order from ag (results
214 |       for the default matcher are collected by actually running Vim, while the
215 |       others use a precomputed list of items; "security/capability.c" is the
216 |       first file ag returned in the precomputed list.)
217 | 
218 |   - Query "", current file "mm/memcontrol.c":
219 | 
220 |     - cpsm: "include/linux/memcontrol.h"; 2.953ms (10.490ms with 1 thread)
221 | 
222 |     - All others: same as above
223 | 
224 |     - "memcontrol" is a sufficiently unique prefix that cpsm returns (IMO) the
225 |       best possible default result, "mm/memcontrol.c"'s corresponding header
226 |       file, with no query entered whatsoever and with no special knowledge of
227 |       the kernel's source layout.
228 | 
229 |     - It looks like the default CtrlP matcher doesn't use information about the
230 |       currently open file either.
231 | 
232 |   - Query "", current file "kernel/signal.c":
233 | 
234 |     - cpsm: "include/linux/signal.h"; 2.838ms (10.242ms with 1 thread)
235 | 
236 |     - All others: same as above
237 | 
238 |     - "signal" is a significantly more common prefix, but cpsm gets probably
239 |       the best match (although this is largely luck; "include/linux" is shorter
240 |       than the other likeliest possibility, "include/asm-generic", and all else
241 |       being equal cpsm prefers shorter matches on the grounds that it's easier
242 |       to add more characters to select a longer match.)
243 | 
244 |   - Query "x86/", current file "kernel/signal.c":
245 | 
246 |     - cpsm: "arch/x86/um/signal.c"; 1.932ms (6.693ms with 1 thread)
247 | 
248 |     - ctrlp-cmatcher: "arch/x86/Kbuild"; 25.034ms
249 | 
250 |     - ctrlp-py-matcher: "arch/x86/Kbuild"; 27.298ms
251 | 
252 |     - ctrlp: "tools/perf/arch/x86/util/tsc.h"
253 | 
254 |     - fzf: "Documentation/x86/early-microcode.txt"
255 | 
256 |     - Without using the current filename, there is nothing the other matchers
257 |       can do to disambiguate the query.
258 | 
259 |   - The next set of cases simulate a user typing progressively more letters in
260 |     a desired file's name ("include/linux/rcupdate.h"), when they happen to be
261 |     in a different unrelated file.
262 | 
263 |   - Query "r", current file "kernel/signal.c":
264 | 
265 |     - cpsm: "kernel/range.c"; 3.980ms (14.070ms with 1 thread)
266 | 
267 |     - ctrlp-cmatcher: "README"; 19.825ms
268 | 
269 |     - ctrlp-py-matcher: "README"; 34.215ms
270 | 
271 |     - ctrlp: "security/keys/encrypted-keys/Makefile"
272 | 
273 |     - fzf: "CREDITS"
274 | 
275 |     - cpsm is much faster than either of the other two benchmarkable matchers
276 |       with multithreading enabled, and competitive with ctrlp-cmatcher when
277 |       locked to a single thread.
278 | 
279 |   - Query "rc", current file "kernel/signal.c":
280 | 
281 |     - cpsm: "kernel/rcu/rcu.h"; 3.891ms (13.827ms with 1 thread)
282 | 
283 |     - ctrlp-cmatcher: "arch/Kconfig"; 24.391ms
284 | 
285 |     - ctrlp-py-matcher: "fs/dlm/rcom.h"; 39.328ms
286 | 
287 |     - ctrlp: "security/capability.c"
288 | 
289 |     - fzf: "Documentation/circular-buffers.txt"
290 | 
291 |   - Query "rcu", current file "kernel/signal.c":
292 | 
293 |     - cpsm: "kernel/rcu/rcu.h"; 2.534ms (9.009ms with 1 thread)
294 | 
295 |     - ctrlp-cmatcher: "arch/um/Makefile"; 29.619ms
296 | 
297 |     - ctrlp-py-matcher: "kernel/rcu/rcu.h"; 37.312ms
298 | 
299 |     - ctrlp: "security/security.c"
300 | 
301 |     - fzf: "Documentation/circular-buffers.txt"
302 | 
303 |   - Query "rcup", current file "kernel/signal.c":
304 | 
305 |     - cpsm: "include/linux/rcupdate.h"; 2.210ms (7.810ms with 1 thread)
306 | 
307 |     - ctrlp-cmatcher: "kernel/rcu/update.c"; 31.301ms
308 | 
309 |     - ctrlp-py-matcher: "include/linux/rcupdate.h"; 37.560ms
310 | 
311 |     - ctrlp: "security/apparmor/include/path.h"
312 | 
313 |     - fzf: "Documentation/power/suspend-and-cpuhotplug.txt"
314 | 
315 |   - Skipping the rest of the letter-by-letter results, since cpsm and
316 |     ctrlp-py-matcher have already "won":
317 | 
318 |     - ctrlp-cmatcher stays with "kernel/rcu/update.c" as its best match until
319 |       the entire string "rcupdate.h" is used as the query.
320 | 
321 |     - ctrlp continues to return completely unrelated results for all of the top
322 |       10 until the query "rcupdate", when it suddenly gets the correct best
323 |       match.
324 | 
325 |     - fzf switches to the correct best match after one more letter (query
326 |       "rcupd").
327 | 
328 | License
329 | -------
330 | 
331 | This software is licensed under the [Apache License, Version 2.0][LICENSE].
332 | 
333 | [CtrlP]: http://github.com/kien/ctrlp.vim
334 | [LICENSE]: http://www.apache.org/licenses/LICENSE-2.0
335 | 


--------------------------------------------------------------------------------
/autoload/cpsm.py:
--------------------------------------------------------------------------------
 1 | # cpsm - fuzzy path matcher
 2 | # Copyright (C) 2015 the Authors
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import print_function
17 | 
18 | import os
19 | import sys
20 | import traceback
21 | import vim
22 | 
23 | try:
24 |     _vim_eval = vim.api.eval
25 | except AttributeError:
26 |     # vim.api is a neovim feature.
27 |     _vim_eval = vim.eval
28 | 
29 | script_dir = _vim_eval("s:script_dir")
30 | sys.path.append(script_dir)
31 | import cpsm_py
32 | 
33 | def ctrlp_match_with(**kwargs):
34 |     """
35 |     Wrapper for cpsm_py.ctrlp_match() that converts Vim numbers from strings
36 |     back to numbers, and handles exceptions.
37 |     """
38 |     try:
39 |         for key in ("limit", "ispath", "match_crfile", "max_threads",
40 |                     "unicode"):
41 |             kwargs[key] = int(kwargs[key])
42 |         return cpsm_py.ctrlp_match(**kwargs)
43 |     except Exception as ex:
44 |         # Log the exception. Unfortunately something CtrlP causes all messages
45 |         # to be discarded, so this is only visible in Vim verbose logging.
46 |         print("cpsm error:")
47 |         traceback.print_exc(file=sys.stdout)
48 |         # Return a short error message in the results.
49 |         ex_str = str(ex)
50 |         if (sys.exc_info()[0] is TypeError and
51 |             "function takes at most" in ex_str):
52 |             # Most likely due to a new parameter being added to
53 |             # cpsm_py.ctrlp_match.
54 |             ex_str = "rebuild cpsm by running %s: %s" % (
55 |                     os.path.normpath(os.path.join(
56 |                             script_dir, "..", "install.sh")),
57 |                     ex_str)
58 |         return ["ERROR:" + ex_str], []
59 | 
60 | def _ctrlp_match_evalinput():
61 |     return ctrlp_match_with(**_vim_eval("s:input"))
62 | 
63 | def ctrlp_match():
64 |     """
65 |     Deprecated interface that gets arguments by calling vim.eval() and returns
66 |     outputs by calling vim.command(). Kept for Denite. Use ctrlp_match_with()
67 |     or cpsm_py.ctrlp_match() in new code.
68 |     """
69 |     # TODO: a:regex is unimplemented.
70 |     results, regexes = ctrlp_match_with(
71 |             items=_vim_eval("a:items"), query=_vim_eval("a:str"),
72 |             limit=int(_vim_eval("a:limit")), mmode=_vim_eval("a:mmode"),
73 |             ispath=int(_vim_eval("a:ispath")), crfile=_vim_eval("a:crfile"),
74 |             highlight_mode=_vim_eval("g:cpsm_highlight_mode"),
75 |             match_crfile=int(_vim_eval("s:match_crfile")),
76 |             max_threads=int(_vim_eval("g:cpsm_max_threads")),
77 |             query_inverting_delimiter=_vim_eval("g:cpsm_query_inverting_delimiter"),
78 |             regex_line_prefix=_vim_eval("s:regex_line_prefix"),
79 |             unicode=int(_vim_eval("g:cpsm_unicode")))
80 |     vim.command("let s:results = [%s]" % ",".join(
81 |             map(_escape_and_quote, results)))
82 |     vim.command("let s:regexes = [%s]" % ",".join(
83 |             map(_escape_and_quote, regexes)))
84 | 
85 | def _escape_and_quote(s):
86 |     return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
87 | 


--------------------------------------------------------------------------------
/autoload/cpsm.vim:
--------------------------------------------------------------------------------
  1 | " cpsm - fuzzy path matcher
  2 | " Copyright (C) 2015 the Authors
  3 | "
  4 | " Licensed under the Apache License, Version 2.0 (the "License");
  5 | " you may not use this file except in compliance with the License.
  6 | " You may obtain a copy of the License at
  7 | "
  8 | "     http://www.apache.org/licenses/LICENSE-2.0
  9 | "
 10 | " Unless required by applicable law or agreed to in writing, software
 11 | " distributed under the License is distributed on an "AS IS" BASIS,
 12 | " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | " See the License for the specific language governing permissions and
 14 | " limitations under the License.
 15 | 
 16 | " Global variables and defaults
 17 | if !exists('g:cpsm_highlight_mode')
 18 |   let g:cpsm_highlight_mode = 'detailed'
 19 | endif
 20 | if !exists('g:cpsm_match_empty_query')
 21 |   let g:cpsm_match_empty_query = 1
 22 | endif
 23 | if !exists('g:cpsm_max_threads')
 24 |   if has('win32unix')
 25 |     " Synchronization primitives are extremely slow on Cygwin:
 26 |     " https://cygwin.com/ml/cygwin/2012-08/msg00200.html
 27 |     let g:cpsm_max_threads = 1
 28 |   else
 29 |     let g:cpsm_max_threads = 0
 30 |   endif
 31 | endif
 32 | if !exists('g:cpsm_query_inverting_delimiter')
 33 |   let g:cpsm_query_inverting_delimiter = ''
 34 | endif
 35 | if !exists('g:cpsm_unicode')
 36 |   let g:cpsm_unicode = 0
 37 | endif
 38 | 
 39 | let s:script_dir = escape(expand('<sfile>:p:h'), '\')
 40 | " s:status is:
 41 | " - 0: no Python support, or module loading failed for other reasons
 42 | " - 1: cpsm module built with incompatible version of Python
 43 | " - 2: cpsm module usable with Python 2
 44 | " - 3: cpsm module usable with Python 3
 45 | let s:status = 0
 46 | if has('python3')
 47 |   try
 48 |     execute 'py3file ' . s:script_dir . '/cpsm.py'
 49 |     let s:status = 3
 50 |   catch
 51 |     " Ideally we'd check specifically for the exception
 52 |     " 'ImportError: dynamic module does not define module export function',
 53 |     " but Vim's handling of multiline exceptions seems to be completely
 54 |     " broken.
 55 |     if !has('python')
 56 |       let s:status = 1
 57 |     endif
 58 |   endtry
 59 | endif
 60 | if s:status == 0 && has('python')
 61 |   try
 62 |     execute 'pyfile ' . s:script_dir . '/cpsm.py'
 63 |     let s:status = 2
 64 |   catch
 65 |     let s:status = 1
 66 |   endtry
 67 | endif
 68 | 
 69 | function cpsm#CtrlPMatch(items, str, limit, mmode, ispath, crfile, regex)
 70 |   if !has('python3') && !has('python')
 71 |     return ['ERROR: cpsm requires Vim built with Python or Python3 support']
 72 |   elseif s:status == 0
 73 |     return ['ERROR: failed to load cpsm module']
 74 |   elseif s:status == 1
 75 |     return ['ERROR: cpsm built with version of Python not supported by Vim']
 76 |   endif
 77 | 
 78 |   if empty(a:str) && g:cpsm_match_empty_query == 0
 79 |     let s:results = a:items[0:(a:limit)]
 80 |     let s:regexes = []
 81 |   else
 82 |     let s:match_crfile = exists('g:ctrlp_match_current_file') ? g:ctrlp_match_current_file : 0
 83 |     let s:regex_line_prefix = '> '
 84 |     if exists('g:ctrlp_line_prefix')
 85 |       let s:regex_line_prefix = g:ctrlp_line_prefix
 86 |     endif
 87 |     let s:input = {
 88 |     \   'items': a:items,
 89 |     \   'query': a:str,
 90 |     \   'limit': a:limit,
 91 |     \   'mmode': a:mmode,
 92 |     \   'ispath': a:ispath,
 93 |     \   'crfile': a:crfile,
 94 |     \   'highlight_mode': g:cpsm_highlight_mode,
 95 |     \   'match_crfile': s:match_crfile,
 96 |     \   'max_threads': g:cpsm_max_threads,
 97 |     \   'query_inverting_delimiter': g:cpsm_query_inverting_delimiter,
 98 |     \   'regex_line_prefix': s:regex_line_prefix,
 99 |     \   'unicode': g:cpsm_unicode,
100 |     \ }
101 |     if s:status == 3
102 |       let s:output = py3eval('_ctrlp_match_evalinput()')
103 |     else
104 |       let s:output = pyeval('_ctrlp_match_evalinput()')
105 |     endif
106 |     let s:results = s:output[0]
107 |     let s:regexes = s:output[1]
108 |   endif
109 | 
110 |   call clearmatches()
111 |   " Apply highlight regexes.
112 |   for r in s:regexes
113 |     call matchadd('CtrlPMatch', r)
114 |   endfor
115 |   " CtrlP does this match to hide the leading > in results.
116 |   call matchadd('CtrlPLinePre', '^>')
117 |   return s:results
118 | endfunction
119 | 


--------------------------------------------------------------------------------
/bench/bench_cpsm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # cpsm - fuzzy path matcher
 4 | # Copyright (C) 2015 the Authors
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from __future__ import print_function
19 | 
20 | import argparse
21 | 
22 | import bench
23 | import cpsm_py
24 | import linuxclock
25 | 
26 | if __name__ == "__main__":
27 |     argp = argparse.ArgumentParser()
28 |     argp.add_argument("-c", "--count", nargs="?", type=int, default=1,
29 |                       help="number of matches to show")
30 |     argp.add_argument("-n", "--iterations", nargs="?", type=int,
31 |                       default=bench.DEFAULT_ITERATIONS,
32 |                       help="number of iterations per query")
33 |     argp.add_argument("-t", "--threads", nargs="?", type=int, default=0,
34 |                       help="number of matcher threads")
35 |     args = argp.parse_args()
36 |     for query in bench.QUERIES:
37 |         times = []
38 |         for _ in xrange(args.iterations):
39 |             start = linuxclock.monotonic()
40 |             results, _ = cpsm_py.ctrlp_match(bench.ITEMS, query.query,
41 |                                              limit=bench.LIMIT, ispath=True,
42 |                                              crfile=query.cur_file,
43 |                                              max_threads=args.threads)
44 |             finish = linuxclock.monotonic()
45 |             times.append(finish - start)
46 |         print("%s: avg time %fs, results: %s" % (
47 |                 query, sum(times) / len(times), results[:args.count]))
48 | 


--------------------------------------------------------------------------------
/bench/linuxclock.py:
--------------------------------------------------------------------------------
 1 | # cpsm - fuzzy path matcher
 2 | # Copyright (C) 2015 the Authors
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import print_function
17 | 
18 | import ctypes
19 | import os
20 | 
21 | # From <linux/time.h>
22 | CLOCK_REALTIME = 0
23 | CLOCK_MONOTONIC = 1
24 | CLOCK_PROCESS_CPUTIME_ID = 2
25 | CLOCK_THREAD_CPUTIME_ID = 3
26 | CLOCK_MONOTONIC_RAW = 4
27 | CLOCK_REALTIME_COARSE = 5
28 | CLOCK_MONOTONIC_COARSE = 6
29 | CLOCK_BOOTTIME = 7
30 | CLOCK_REALTIME_ALARM = 8
31 | CLOCK_BOOTTIME_ALARM = 9
32 | CLOCK_SGI_CYCLE = 10
33 | CLOCK_TAI = 11
34 | 
35 | class Timespec(ctypes.Structure):
36 |     _fields_ = [
37 |         ('tv_sec', ctypes.c_long),
38 |         ('tv_nsec', ctypes.c_long),
39 |     ]
40 | 
41 |     def to_seconds(self):
42 |         return self.tv_sec + (self.tv_nsec * 1e-9)
43 | 
44 | _clock_gettime = ctypes.CDLL("librt.so.1", use_errno=True).clock_gettime
45 | _clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(Timespec)]
46 | 
47 | def gettime(clock):
48 |     """Returns the current time on the given clock as a Timespec."""
49 |     t = Timespec()
50 |     if _clock_gettime(clock, ctypes.pointer(t)) != 0:
51 |         errno = ctypes.get_errno()
52 |         raise OSError(errno, os.strerror(errno))
53 |     return t
54 | 
55 | def monotonic():
56 |     """Returns the value (in fractional seconds) of a monotonic clock."""
57 |     return gettime(CLOCK_MONOTONIC_RAW).to_seconds()
58 | 


--------------------------------------------------------------------------------
/cmake/FindICU.cmake:
--------------------------------------------------------------------------------
 1 | # Finds the International Components for Unicode (ICU) Library
 2 | #
 3 | #  ICU_FOUND          - True if ICU found.
 4 | #  ICU_I18N_FOUND     - True if ICU's internationalization library found.
 5 | #  ICU_INCLUDE_DIRS   - Directory to include to get ICU headers
 6 | #                       Note: always include ICU headers as, e.g.,
 7 | #                       unicode/utypes.h
 8 | #  ICU_LIBRARIES      - Libraries to link against for the common ICU
 9 | #  ICU_I18N_LIBRARIES - Libraries to link against for ICU internationaliation
10 | #                       (note: in addition to ICU_LIBRARIES)
11 | # Look for the header file.
12 | find_path(
13 |     ICU_INCLUDE_DIR
14 |     NAMES unicode/utypes.h
15 |     DOC "Include directory for the ICU library")
16 | mark_as_advanced(ICU_INCLUDE_DIR)
17 | # Look for the library.
18 | find_library(
19 |     ICU_LIBRARY
20 |     NAMES icuuc cygicuuc cygicuuc32
21 |     DOC "Libraries to link against for the common parts of ICU")
22 | mark_as_advanced(ICU_LIBRARY)
23 | # Copy the results to the output variables.
24 | if (ICU_INCLUDE_DIR AND ICU_LIBRARY)
25 |     set(ICU_FOUND 1)
26 |     set(ICU_LIBRARIES ${ICU_LIBRARY})
27 |     set(ICU_INCLUDE_DIRS ${ICU_INCLUDE_DIR})
28 |     set(ICU_VERSION 0)
29 |     set(ICU_MAJOR_VERSION 0)
30 |     set(ICU_MINOR_VERSION 0)
31 |     file(READ "${ICU_INCLUDE_DIR}/unicode/uversion.h" _ICU_VERSION_CONENTS)
32 |     string(REGEX REPLACE ".*#define U_ICU_VERSION_MAJOR_NUM ([0-9]+).*" "\\1" ICU_MAJOR_VERSION "${_ICU_VERSION_CONENTS}")
33 |     string(REGEX REPLACE ".*#define U_ICU_VERSION_MINOR_NUM ([0-9]+).*" "\\1" ICU_MINOR_VERSION "${_ICU_VERSION_CONENTS}")
34 |     set(ICU_VERSION "${ICU_MAJOR_VERSION}.${ICU_MINOR_VERSION}")
35 |     # Look for the ICU internationalization libraries
36 |     find_library(
37 |         ICU_I18N_LIBRARY
38 |         NAMES icuin icui18n cygicuin cygicuin32
39 |         DOC "Libraries to link against for ICU internationalization")
40 |     mark_as_advanced(ICU_I18N_LIBRARY)
41 |     if (ICU_I18N_LIBRARY)
42 |         set(ICU_I18N_FOUND 1)
43 |         set(ICU_I18N_LIBRARIES ${ICU_I18N_LIBRARY})
44 |     else ()
45 |         set(ICU_I18N_FOUND 0)
46 |         set(ICU_I18N_LIBRARIES)
47 |     endif ()
48 | else ()
49 |     set(ICU_FOUND 0)
50 |     set(ICU_I18N_FOUND 0)
51 |     set(ICU_LIBRARIES)
52 |     set(ICU_I18N_LIBRARIES)
53 |     set(ICU_INCLUDE_DIRS)
54 |     set(ICU_VERSION)
55 |     set(ICU_MAJOR_VERSION)
56 |     set(ICU_MINOR_VERSION)
57 | endif ()
58 | if (ICU_FOUND)
59 |     if (NOT ICU_FIND_QUIETLY)
60 |         message(STATUS "Found ICU header files in ${ICU_INCLUDE_DIRS}")
61 |         message(STATUS "Found ICU libraries: ${ICU_LIBRARIES}")
62 |     endif ()
63 | else ()
64 |     if (ICU_FIND_REQUIRED)
65 |         message(FATAL_ERROR "Could not find ICU")
66 |     else ()
67 |         message(STATUS "Optional package ICU was not found")
68 |     endif ()
69 | endif ()
70 | 


--------------------------------------------------------------------------------
/cmake/FindPythonConfig.cmake:
--------------------------------------------------------------------------------
 1 | # FindPythonConfig
 2 | # ----------------
 3 | #
 4 | # This module locates Python libraries.
 5 | #
 6 | # This code sets the following variables:
 7 | #
 8 | # PYTHONCONFIG_FOUND - have the Python libs been found
 9 | # PYTHON_COMPILE_FLAGS - compiler flags required to include Python headers
10 | # PYTHON_LINK_FLAGS - linker flags required to link Python libraries
11 | #
12 | # If calling both `find_package(PythonInterp)` and
13 | # `find_package(PythonConfig)`, call `find_package(PythonInterp)` first.
14 | 
15 | include(FindPackageHandleStandardArgs)
16 | 
17 | find_package(PythonInterp)
18 | if(PYTHONINTERP_FOUND)
19 |   set(_Python_config "${PYTHON_EXECUTABLE}-config")
20 |   execute_process(COMMAND ${_Python_config} "--includes" OUTPUT_VARIABLE PYTHON_COMPILE_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE)
21 |   # "To embed Python into an application, a new --embed option must be passed
22 |   # to python3-config --libs --embed to get -lpython3.8 (link the application
23 |   # to libpython). To support both 3.8 and older, try python3-config --libs
24 |   # --embed first and fallback to python3-config --libs (without --embed) if
25 |   # the previous command fails." -
26 |   # https://docs.python.org/3/whatsnew/3.8.html#debug-build-uses-the-same-abi-as-release-build
27 |   execute_process(COMMAND ${_Python_config} "--ldflags" "--embed" OUTPUT_VARIABLE PYTHON_LINK_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE PYTHON_LINK_FLAGS_EMBED_RESULT)
28 |   if(NOT PYTHON_LINK_FLAGS_EMBED_RESULT EQUAL 0)
29 |     message(STATUS "python-config failed, retrying without --embed")
30 |     execute_process(COMMAND ${_Python_config} "--ldflags" OUTPUT_VARIABLE PYTHON_LINK_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE)
31 |   endif(NOT PYTHON_LINK_FLAGS_EMBED_RESULT EQUAL 0)
32 |   set(_Python_config_message "${PYTHON_COMPILE_FLAGS}; ${PYTHON_LINK_FLAGS}")
33 |   unset(_Python_config)
34 | else(PYTHONINTERP_FOUND)
35 |   message(SEND_ERROR "Python interpreter not found")
36 | endif(PYTHONINTERP_FOUND)
37 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonConfig DEFAULT_MSG _Python_config_message PYTHON_COMPILE_FLAGS PYTHON_LINK_FLAGS)
38 | unset(_Python_config_message)
39 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | function choose_vim {
 8 |     local vim="${VIM:-}"
 9 |     if [[ -d "${vim}" ]]; then
10 |         # Assume that this is because install.sh is being executed by vim (see
11 |         # `:help $VIM`), such that vim is the parent process.
12 |         vim="$(readlink /proc/${PPID}/exe)"
13 |         # Note that this can fail if /proc/$PPID/exe doesn't exist (e.g. on Mac
14 |         # OS X), in which case we fall through to the following.
15 |     fi
16 |     if [[ -z "${vim}" ]]; then
17 |         vim="$(which vim)"
18 |     fi
19 |     echo "${vim}"
20 | }
21 | 
22 | function vim_has {
23 |     local vim="$1"
24 |     local feature="$2"
25 | 
26 |     # We can't use `vim --version` because neovim is too unique to print
27 |     # +/-python{,3}, so instead we get to play stupid games with script(1).
28 |     local uname="$(uname)"
29 |     case "${uname}" in
30 |         Linux) echo $(script -eqc "${vim} -S <(echo -e \"echo 'x=' . has('${feature}')\\nqa!\")" /dev/null | grep -o 'x=.' | grep -o '[[:digit:]]' -m 1);;
31 |         Darwin | FreeBSD) echo $(script -q /dev/null ${vim} -S <(echo -e "echo 'x=' . has('${feature}')\nqa!") | grep -o 'x=.' | grep -o '[[:digit:]]' -m 1);;
32 |         *) >&2 echo "ERROR: Unknown uname: ${uname}; Vim feature detection not supported"; false;;
33 |     esac
34 | }
35 | 
36 | if [ -z "${PY3+x}" ]; then
37 |     vim="$(choose_vim)"
38 |     echo "PY3 not specified; inferring Python version from ${vim}"
39 |     have_py2="$(vim_has ${vim} python)"
40 |     have_py3="$(vim_has ${vim} python3)"
41 |     if [ "${have_py3}" -eq "1" ]; then
42 |         echo "Python 3 selected"
43 |         PY3="ON"
44 |     elif [ "${have_py2}" -eq "1" ]; then
45 |         echo "Python 2 selected"
46 |         PY3="OFF"
47 |     else
48 |         >&2 echo "ERROR: No Python support detected"
49 |         false
50 |     fi
51 | else
52 |     case "${PY3}" in
53 |         ON) echo "Python 3 selected by PY3=${PY3}";;
54 |         OFF) echo "Python 2 selected by PY3=${PY3}";;
55 |         *) >&2 echo "ERROR: invalid PY3=${PY3}"; false;;
56 |     esac
57 | fi
58 | 
59 | cd "$(dirname "${BASH_SOURCE[0]}")"
60 | rm -rf bin/* build/*
61 | mkdir -p bin build
62 | (
63 |     cd build
64 |     cmake -DPY3:BOOL=${PY3} ..
65 |     make install && make test
66 | )
67 | 


--------------------------------------------------------------------------------
/src/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
 1 | def FlagsForFile(filename, **kwargs):
 2 |     return {
 3 |             "flags": [
 4 |                     "-x", "c++",
 5 |                     "-std=c++11",
 6 |                     "-I", ".",
 7 |                     "-I", "/usr/include/python2.7",
 8 |                     "-Wall",
 9 |             ],
10 |             "do_cache": True,
11 |     }
12 | 


--------------------------------------------------------------------------------
/src/api.h:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2016 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #ifndef CPSM_API_H_
 17 | #define CPSM_API_H_
 18 | 
 19 | #include <algorithm>
 20 | #include <cstddef>
 21 | #include <cstdint>
 22 | #include <vector>
 23 | 
 24 | #include <boost/utility/string_ref.hpp>
 25 | 
 26 | #include "matcher.h"
 27 | #include "par_util.h"
 28 | #include "str_util.h"
 29 | 
 30 | namespace cpsm {
 31 | 
 32 | // User options that influence match behavior.
 33 | struct Options {
 34 |  private:
 35 |   // The currently open file.
 36 |   boost::string_ref crfile_;
 37 | 
 38 |   // The maximum number of matches to return. If 0, there is no limit.
 39 |   std::size_t limit_ = 0;
 40 | 
 41 |   // If false, do not consider the currently open file as a candidate.
 42 |   bool match_crfile_ = false;
 43 | 
 44 |   // The number of threads the matcher should use.
 45 |   unsigned int nr_threads_ = 1;
 46 | 
 47 |   // If true, the query and all items are paths.
 48 |   bool path_ = true;
 49 | 
 50 |   // If true, attempt to interpret the query and all items as UTF-8-encoded
 51 |   // strings.
 52 |   bool unicode_ = false;
 53 | 
 54 |   // If true, pass `match_info` to match sinks.
 55 |   bool want_match_info_ = false;
 56 | 
 57 |  public:
 58 |   boost::string_ref crfile() const { return crfile_; }
 59 |   Options& set_crfile(boost::string_ref const crfile) {
 60 |     crfile_ = crfile;
 61 |     return *this;
 62 |   }
 63 | 
 64 |   std::size_t limit() const { return limit_; }
 65 |   Options& set_limit(std::size_t const limit) {
 66 |     limit_ = limit;
 67 |     return *this;
 68 |   }
 69 | 
 70 |   bool match_crfile() const { return match_crfile_; }
 71 |   Options& set_match_crfile(bool const match_crfile) {
 72 |     match_crfile_ = match_crfile;
 73 |     return *this;
 74 |   }
 75 | 
 76 |   unsigned int nr_threads() const { return nr_threads_; }
 77 |   Options& set_nr_threads(unsigned int const nr_threads) {
 78 |     if (nr_threads < 1) {
 79 |       throw Error("invalid nr_threads: ", nr_threads);
 80 |     }
 81 |     nr_threads_ = nr_threads;
 82 |     return *this;
 83 |   }
 84 | 
 85 |   bool path() const { return path_; }
 86 |   Options& set_path(bool const path) {
 87 |     path_ = path;
 88 |     return *this;
 89 |   }
 90 | 
 91 |   bool unicode() const { return unicode_; }
 92 |   Options& set_unicode(bool const unicode) {
 93 |     unicode_ = unicode;
 94 |     return *this;
 95 |   }
 96 | 
 97 |   bool want_match_info() const { return want_match_info_; }
 98 |   Options& set_want_match_info(bool const want_match_info) {
 99 |     want_match_info_ = want_match_info;
100 |     return *this;
101 |   }
102 | };
103 | 
104 | namespace detail {
105 | 
106 | template <typename PathTraits, typename StringTraits, typename Item,
107 |           typename Source, typename Sink>
108 | void for_each_match(boost::string_ref const query, Options const& opts,
109 |                     Source&& src, Sink&& dst);
110 | 
111 | }  // namespace detail
112 | 
113 | // For each item in a list of items, invoke `dst` in descending order of
114 | // compatibility with the given query in the given context with the given
115 | // options.
116 | //
117 | // `Item` must be a default-constructable, movable type with the following
118 | // member functions:
119 | // - `match_key`, which returns a `boost::string_ref` representing the string
120 | //   that the query should match against.
121 | // - `sort_key`, which returns a value of unspecified type that can be compared
122 | //   to other values of the same type with operator `<`. When the matcher is
123 | //   otherwise unable to order two matched items, it will prefer the one whose
124 | //   `sort_key` compares lower.
125 | //
126 | // `src` must have the following member functions:
127 | // - `bool fill(std::vector<Item>& items)`, which inserts new unmatched items
128 | // into `items` (which must initially be empty) and returns true iff it may
129 | // produce more unmatched items in the future.
130 | // - `size_t batch_size() const`, which returns an optional upper bound on the
131 | // number of items inserted by each call to `fill`.
132 | // If `opts.nr_threads() > 1`, `src` must be thread-safe.
133 | //
134 | // `dst` must be a functor compatible with signature `void(Item& item,
135 | // MatchInfo const* match_info)`, where `item` is a matched item and
136 | // `match_info`, if not null, holds the state of the match. `dst` need not be
137 | // thread-safe.
138 | //
139 | // Example:
140 | //
141 | //   // Prints the top 10 matches of query against items.
142 | //   for_each_match<Item>(
143 | //       query, Options().set_limit(10).set_unicode(true),
144 | //       [&](std::vector<Item>& batch) {
145 | //         if (items.empty()) return false;
146 | //         batch.push_back(std::move(items.back()));
147 | //         items.pop_back();
148 | //         return true;
149 | //       },
150 | //       [&](Item item, void*) {
151 | //         std::cout << item.item << std::endl;
152 | //       });
153 | template <typename Item, typename Source, typename Sink>
154 | void for_each_match(boost::string_ref const query, Options const& opts,
155 |                     Source&& src, Sink&& dst) {
156 |   if (opts.path()) {
157 |     if (opts.unicode()) {
158 |       detail::for_each_match<PlatformPathTraits, Utf8StringTraits, Item>(
159 |           query, opts, std::forward<Source>(src), std::forward<Sink>(dst));
160 |     } else {
161 |       detail::for_each_match<PlatformPathTraits, SimpleStringTraits, Item>(
162 |           query, opts, std::forward<Source>(src), std::forward<Sink>(dst));
163 |     }
164 |   } else {
165 |     if (opts.unicode()) {
166 |       detail::for_each_match<NonPathTraits, Utf8StringTraits, Item>(
167 |           query, opts, std::forward<Source>(src), std::forward<Sink>(dst));
168 |     } else {
169 |       detail::for_each_match<NonPathTraits, SimpleStringTraits, Item>(
170 |           query, opts, std::forward<Source>(src), std::forward<Sink>(dst));
171 |     }
172 |   }
173 | }
174 | 
175 | // Simple Item type wrapping a `boost::string_ref`.
176 | class StringRefItem {
177 |  public:
178 |   StringRefItem() {}
179 |   explicit StringRefItem(boost::string_ref const item) : item_(item) {}
180 | 
181 |   boost::string_ref item() const { return item_; }
182 |   boost::string_ref match_key() const { return item_; }
183 |   boost::string_ref sort_key() const { return item_; }
184 | 
185 |  private:
186 |   boost::string_ref item_;
187 | };
188 | 
189 | // Thread-unsafe source functor that constructs items from elements of a range
190 | // defined by a pair of iterators.
191 | template <typename Item, typename It>
192 | class RangeSource {
193 |  public:
194 |   explicit RangeSource(It first, It last)
195 |       : it_(std::move(first)), last_(std::move(last)) {}
196 | 
197 |   bool fill(std::vector<Item>& items) {
198 |     if (it_ == last_) {
199 |       return false;
200 |     }
201 |     items.emplace_back(*it_);
202 |     ++it_;
203 |     return it_ != last_;
204 |   }
205 | 
206 |   static constexpr size_t batch_size() { return 1; }
207 | 
208 |  private:
209 |   It it_;
210 |   It const last_;
211 | };
212 | 
213 | template <typename Item, typename It>
214 | RangeSource<Item, It> source_from_range(It first, It last) {
215 |   return RangeSource<Item, It>(std::move(first), std::move(last));
216 | }
217 | 
218 | namespace detail {
219 | 
220 | // Type binding a matched item together with its score.
221 | template <typename Item>
222 | struct Matched {
223 |   Score score;
224 |   Item item;
225 | 
226 |   Matched() {}
227 |   explicit Matched(Score score, Item item)
228 |       : score(score), item(std::move(item)) {}
229 | 
230 |   // Returns true if `x` is a better match than `y`.
231 |   static bool is_better(Matched<Item> const &x, Matched<Item> const &y) {
232 |     if (x.score != y.score) {
233 |       return x.score > y.score;
234 |     }
235 |     return x.item.sort_key() < y.item.sort_key();
236 |   }
237 | };
238 | 
239 | template <typename PathTraits, typename StringTraits, typename Item,
240 |           typename Source, typename Sink>
241 | void for_each_match(boost::string_ref const query, Options const& opts,
242 |                     Source&& src, Sink&& dst) {
243 |   MatcherOptions mopts;
244 |   mopts.crfile = opts.crfile();
245 |   mopts.match_crfile = opts.match_crfile();
246 | 
247 |   // Match in parallel.
248 |   std::vector<std::vector<Matched<Item>>> thread_matches(opts.nr_threads());
249 |   std::vector<Thread> threads;
250 |   threads.reserve(opts.nr_threads());
251 |   for (unsigned int i = 0; i < opts.nr_threads(); i++) {
252 |     threads.emplace_back([&, i] {
253 |       std::vector<Matched<Item>> matches;
254 |       std::vector<Item> batch;
255 |       // If a limit exists, each thread should only keep that many matches.
256 |       if (opts.limit()) {
257 |         matches.reserve(opts.limit() + 1);
258 |       }
259 |       batch.reserve(src.batch_size());
260 |       Matcher<PathTraits, StringTraits> matcher(query, mopts);
261 |       bool more;
262 |       do {
263 |         // Collect and match a batch.
264 |         more = src.fill(batch);
265 |         for (auto& item : batch) {
266 |           if (matcher.match(item.match_key())) {
267 |             matches.emplace_back(matcher.score(), std::move(item));
268 |             if (opts.limit()) {
269 |               std::push_heap(matches.begin(), matches.end(),
270 |                              Matched<Item>::is_better);
271 |               if (matches.size() > opts.limit()) {
272 |                 std::pop_heap(matches.begin(), matches.end(),
273 |                               Matched<Item>::is_better);
274 |                 matches.pop_back();
275 |               }
276 |             }
277 |           }
278 |         }
279 |         batch.clear();
280 |       } while (more);
281 |       thread_matches[i] = std::move(matches);
282 |     });
283 |   }
284 | 
285 |   // Collect matcher threads.
286 |   std::size_t nr_matches = 0;
287 |   for (unsigned int i = 0; i < opts.nr_threads(); i++) {
288 |     auto& thread = threads[i];
289 |     thread.join();
290 |     if (thread.has_exception()) {
291 |       throw Error(thread.exception_msg());
292 |     }
293 |     nr_matches += thread_matches[i].size();
294 |   }
295 | 
296 |   // Combine per-thread match lists.
297 |   std::vector<Matched<Item>> all_matches;
298 |   all_matches.reserve(nr_matches);
299 |   for (auto& matches : thread_matches) {
300 |     std::move(matches.begin(), matches.end(), std::back_inserter(all_matches));
301 |     matches.shrink_to_fit();
302 |   }
303 | 
304 |   // Sort and limit matches.
305 |   if (opts.limit() && opts.limit() < all_matches.size()) {
306 |     std::partial_sort(all_matches.begin(), all_matches.begin() + opts.limit(),
307 |                       all_matches.end(), Matched<Item>::is_better);
308 |     all_matches.resize(opts.limit());
309 |   } else {
310 |     std::sort(all_matches.begin(), all_matches.end(), Matched<Item>::is_better);
311 |   }
312 | 
313 |   // Emit matches.
314 |   if (opts.want_match_info()) {
315 |     Matcher<PathTraits, StringTraits> matcher(query, mopts);
316 |     for (auto& match : all_matches) {
317 |       if (!matcher.match(match.item.match_key())) {
318 |         throw Error("failed to re-match known match '",
319 |                     match.item.match_key(),
320 |                     "' during match position collection");
321 |       }
322 |       dst(match.item, &matcher);
323 |     }
324 |   } else {
325 |     for (auto& match : all_matches) {
326 |       dst(match.item, nullptr);
327 |     }
328 |   }
329 | }
330 | 
331 | }  // namespace detail
332 | 
333 | }  // namespace cpsm
334 | 
335 | #endif  // CPSM_API_H_
336 | 


--------------------------------------------------------------------------------
/src/cpsm_cli_main.cc:
--------------------------------------------------------------------------------
 1 | // cpsm - fuzzy path matcher
 2 | // Copyright (C) 2015 the Authors
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include <algorithm>
17 | #include <iostream>
18 | #include <string>
19 | #include <utility>
20 | #include <vector>
21 | 
22 | #include <boost/program_options.hpp>
23 | #include <boost/utility/string_ref.hpp>
24 | 
25 | #include "api.h"
26 | #include "str_util.h"
27 | 
28 | namespace po = boost::program_options;
29 | 
30 | int main(int argc, char** argv) {
31 |   std::cin.sync_with_stdio(false);
32 |   std::cout.sync_with_stdio(false);
33 |   std::cerr.sync_with_stdio(false);
34 | 
35 |   po::options_description opts_desc("Options");
36 |   opts_desc.add_options()
37 |       ("crfile", po::value<std::string>()->default_value(""),
38 |        "'currently open file' passed to the matcher")
39 |       ("limit", po::value<std::size_t>()->default_value(10),
40 |        "maximum number of matches to return")
41 |       ("query", po::value<std::string>()->default_value(""),
42 |        "query to match items against")
43 |       ("help", "display this help and exit")
44 |       ;
45 | 
46 |   po::variables_map opts;
47 |   po::store(po::parse_command_line(argc, argv, opts_desc), opts);
48 |   po::notify(opts);
49 | 
50 |   if (opts.count("help")) {
51 |     std::cout << opts_desc << std::endl;
52 |     return 0;
53 |   }
54 | 
55 |   std::vector<std::string> lines;
56 |   std::string line;
57 |   while (std::getline(std::cin, line)) {
58 |     lines.emplace_back(std::move(line));
59 |     line.clear();
60 |   }
61 | 
62 |   auto const crfile = opts["crfile"].as<std::string>();
63 |   auto const limit = opts["limit"].as<std::size_t>();
64 |   auto const query = opts["query"].as<std::string>();
65 |   auto const mopts =
66 |       cpsm::Options().set_crfile(crfile).set_limit(limit).set_want_match_info(
67 |           true);
68 |   cpsm::for_each_match<cpsm::StringRefItem>(
69 |       query, mopts, cpsm::source_from_range<cpsm::StringRefItem>(lines.cbegin(),
70 |                                                                  lines.cend()),
71 |       [&](cpsm::StringRefItem item, cpsm::MatchInfo const* info) {
72 |         std::cout << item.item() << "\n- score: " << info->score() << "; "
73 |                   << info->score_debug_string() << "\n- match positions: "
74 |                   << cpsm::str_join(info->match_positions(), ", ") << std::endl;
75 |       });
76 | 
77 |   return 0;
78 | }
79 | 


--------------------------------------------------------------------------------
/src/ctrlp_util.cc:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include "ctrlp_util.h"
 17 | 
 18 | #include <algorithm>
 19 | #include <sstream>
 20 | #include <stdexcept>
 21 | #include <utility>
 22 | 
 23 | namespace cpsm {
 24 | 
 25 | namespace {
 26 | 
 27 | // Groups match positions into matched intervals.
 28 | std::vector<std::pair<std::size_t, std::size_t>> group_positions_detailed(
 29 |     std::vector<std::size_t> const& positions) {
 30 |   std::vector<std::pair<std::size_t, std::size_t>> groups;
 31 |   std::size_t begin = 0;
 32 |   std::size_t end = 0;
 33 |   for (std::size_t const pos : positions) {
 34 |     if (pos != end) {
 35 |       // End of previous group, start of new group.
 36 |       if (begin != end) {
 37 |         groups.emplace_back(begin, end);
 38 |       }
 39 |       begin = end = pos;
 40 |     }
 41 |     end++;
 42 |   }
 43 |   if (begin != end) {
 44 |     groups.emplace_back(begin, end);
 45 |   }
 46 |   return groups;
 47 | }
 48 | 
 49 | // Returns a single match group spanning from the first to last match.
 50 | std::vector<std::pair<std::size_t, std::size_t>> group_positions_basic(
 51 |     std::vector<std::size_t> const& positions) {
 52 |   std::vector<std::pair<std::size_t, std::size_t>> group;
 53 |   if (!positions.empty()) {
 54 |     group.emplace_back(*positions.cbegin(), (*positions.crbegin()) + 1);
 55 |   }
 56 |   return group;
 57 | }
 58 | 
 59 | std::vector<std::pair<std::size_t, std::size_t>> group_positions(
 60 |     boost::string_ref const mode, std::vector<std::size_t> const& positions) {
 61 |   if (mode.empty() || mode == "none") {
 62 |     return std::vector<std::pair<std::size_t, std::size_t>>();
 63 |   } else if (mode == "basic") {
 64 |     return group_positions_basic(positions);
 65 |   } else if (mode == "detailed") {
 66 |     return group_positions_detailed(positions);
 67 |   }
 68 |   throw Error("unknown highlight mode '", mode, "'");
 69 | }
 70 | 
 71 | }  // anonymous namespace
 72 | 
 73 | CtrlPMatchMode parse_ctrlp_match_mode(boost::string_ref const mmode) {
 74 |   if (mmode.empty() || mmode == "full-line") {
 75 |     return CtrlPMatchMode::FULL_LINE;
 76 |   } else if (mmode == "filename-only") {
 77 |     return CtrlPMatchMode::FILENAME_ONLY;
 78 |   } else if (mmode == "first-non-tab") {
 79 |     return CtrlPMatchMode::FIRST_NON_TAB;
 80 |   } else if (mmode == "until-last-tab") {
 81 |     return CtrlPMatchMode::UNTIL_LAST_TAB;
 82 |   }
 83 |   throw Error("unknown match mode ", mmode);
 84 | }
 85 | 
 86 | void get_highlight_regexes(boost::string_ref const mode,
 87 |                            boost::string_ref const item,
 88 |                            std::vector<std::size_t> const& positions,
 89 |                            std::vector<std::string>& regexes,
 90 |                            boost::string_ref const line_prefix) {
 91 |   for (auto const group : group_positions(mode, positions)) {
 92 |     // Each match group's regex has the same structure:
 93 |     // - "\V": very nomagic (only "\" needs to be escaped)
 94 |     // - "\C": forces case sensitivity
 95 |     // - "\^": beginning of string
 96 |     // - the line prefix
 97 |     // - characters in the item before the match
 98 |     // - "\zs": starts the match
 99 |     // - characters in the match group
100 |     // - "\ze": ends the match
101 |     // - characters in the item after the match
102 |     // - "\$": end of string
103 |     std::string regex = R"(\V\C\^)";
104 |     auto const write_char = [&](char c) {
105 |       if (c == '\\') {
106 |         regex += R"(\\)";
107 |       } else {
108 |         regex += c;
109 |       }
110 |     };
111 |     for (char const c : line_prefix) {
112 |       write_char(c);
113 |     }
114 |     // support highlight for CtrlP with devicons
115 |     regex += R"(\%\(\.\+\s\+\)\?)";
116 |     std::size_t i = 0;
117 |     for (; i < group.first; i++) {
118 |       write_char(item[i]);
119 |     }
120 |     regex += R"(\zs)";
121 |     for (; i < group.second; i++) {
122 |       write_char(item[i]);
123 |     }
124 |     regex += R"(\ze)";
125 |     for (; i < item.size(); i++) {
126 |       write_char(item[i]);
127 |     }
128 |     regex += R"(\$)";
129 |     regexes.emplace_back(std::move(regex));
130 |   }
131 | }
132 | 
133 | }  // namespace cpsm
134 | 


--------------------------------------------------------------------------------
/src/ctrlp_util.h:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | // CtrlP-specific support utilities.
 17 | 
 18 | #ifndef CPSM_CTRLP_UTIL_H_
 19 | #define CPSM_CTRLP_UTIL_H_
 20 | 
 21 | #include <algorithm>
 22 | #include <functional>
 23 | #include <string>
 24 | #include <utility>
 25 | #include <vector>
 26 | 
 27 | #include <boost/utility/string_ref.hpp>
 28 | 
 29 | #include "path_util.h"
 30 | #include "str_util.h"
 31 | 
 32 | namespace cpsm {
 33 | 
 34 | enum class CtrlPMatchMode {
 35 |   // Match the entire line.
 36 |   FULL_LINE,
 37 | 
 38 |   // Match only the filename.
 39 |   FILENAME_ONLY,
 40 | 
 41 |   // Match until the first tab char.
 42 |   FIRST_NON_TAB,
 43 | 
 44 |   // Match until the last tab char.
 45 |   UNTIL_LAST_TAB,
 46 | };
 47 | 
 48 | // Parses a CtrlP match mode.
 49 | CtrlPMatchMode parse_ctrlp_match_mode(boost::string_ref mmode);
 50 | 
 51 | // Functor types implementing transformations for each CtrlP match mode.
 52 | 
 53 | struct FullLineMatch {
 54 |   boost::string_ref operator()(boost::string_ref const item) const {
 55 |     return item;
 56 |   }
 57 | };
 58 | 
 59 | struct FilenameOnlyMatch {
 60 |   boost::string_ref operator()(boost::string_ref const item) const {
 61 |     return ref_str_iters(
 62 |         path_basename<PlatformPathTraits>(item.cbegin(), item.cend()),
 63 |         item.cend());
 64 |   }
 65 | };
 66 | 
 67 | struct FirstNonTabMatch {
 68 |   boost::string_ref operator()(boost::string_ref const item) const {
 69 |     return ref_str_iters(item.cbegin(),
 70 |                          std::find(item.cbegin(), item.cend(), '\t'));
 71 |   }
 72 | };
 73 | 
 74 | struct UntilLastTabMatch {
 75 |   boost::string_ref operator()(boost::string_ref const item) const {
 76 |     auto const item_rend = item.crend();
 77 |     auto const last_tab_rit = std::find(item.crbegin(), item_rend, '\t');
 78 |     return ref_str_iters(item.cbegin(), (last_tab_rit == item_rend)
 79 |                                             ? item.cend()
 80 |                                             : (last_tab_rit + 1).base());
 81 |   }
 82 | };
 83 | 
 84 | // Item type that wraps another, but applies a CtrlP match mode to their
 85 | // `match_key`s.
 86 | template <typename InnerItem, typename MatchMode>
 87 | struct CtrlPItem {
 88 |   InnerItem inner;
 89 | 
 90 |   CtrlPItem() {}
 91 |   explicit CtrlPItem(InnerItem inner) : inner(std::move(inner)) {}
 92 | 
 93 |   boost::string_ref match_key() const { return MatchMode()(inner.match_key()); }
 94 |   boost::string_ref sort_key() const { return inner.sort_key(); }
 95 | };
 96 | 
 97 | // Appends a set of Vim regexes to highlight the bytes at `positions` in `item`
 98 | // for the given highlight mode. `positions` must be sorted.
 99 | void get_highlight_regexes(boost::string_ref mode, boost::string_ref item,
100 |                            std::vector<std::size_t> const& positions,
101 |                            std::vector<std::string>& regexes,
102 |                            boost::string_ref line_prefix);
103 | 
104 | }  // namespace cpsm
105 | 
106 | #endif /* CPSM_CTRLP_UTIL_H_ */
107 | 


--------------------------------------------------------------------------------
/src/matcher.h:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #ifndef CPSM_MATCHER_H_
 17 | #define CPSM_MATCHER_H_
 18 | 
 19 | #include <algorithm>
 20 | #include <cstdint>
 21 | #include <functional>
 22 | #include <iterator>
 23 | #include <limits>
 24 | #include <string>
 25 | #include <utility>
 26 | #include <vector>
 27 | 
 28 | #include <boost/utility/string_ref.hpp>
 29 | 
 30 | #include "path_util.h"
 31 | #include "str_util.h"
 32 | 
 33 | namespace cpsm {
 34 | 
 35 | // Internal type used for character counts.
 36 | //
 37 | // This is uint_fast16_t because cpsm is mostly used to match paths, and path
 38 | // lengths are not capable of exceeding the range of 16 bits on most major
 39 | // operating systems:
 40 | // - Linux: PATH_MAX = 4096
 41 | // - Mac OS X: PATH_MAX = 1024
 42 | // - Windows: MAX_PATH = 260; Unicode interfaces may support paths of up to
 43 | //   32767 characters
 44 | typedef std::uint_fast16_t CharCount;
 45 | 
 46 | // Options that apply to all items in a search.
 47 | struct MatcherOptions {
 48 |   // The currently open file.
 49 |   boost::string_ref crfile;
 50 | 
 51 |   // If false, do not consider the currently open file as a candidate.
 52 |   bool match_crfile = false;
 53 | };
 54 | 
 55 | // Type representing a match's score.
 56 | typedef std::uint64_t Score;
 57 | 
 58 | class MatchInfo {
 59 |  public:
 60 |   virtual ~MatchInfo() = default;
 61 | 
 62 |   // Returns the item's match score (higher is better).
 63 |   virtual Score score() const = 0;
 64 | 
 65 |   // Returns a string summarizing the state used to derive the last item's
 66 |   // match score.
 67 |   virtual std::string score_debug_string() const = 0;
 68 | 
 69 |   // Returns a sorted vector containing the position of each matched character
 70 |   // in the item.
 71 |   virtual std::vector<std::size_t> match_positions() const = 0;
 72 | };
 73 | 
 74 | template <typename PathTraits, typename StringTraits>
 75 | class Matcher : public MatchInfo {
 76 |  public:
 77 |   typedef typename StringTraits::Char Char;
 78 | 
 79 |   explicit Matcher(boost::string_ref const query, MatcherOptions const& opts)
 80 |       // Queries are smartcased (case-sensitive only if any uppercase appears
 81 |       // in the query).
 82 |       : query_(decode<StringTraits>(query)),
 83 |         query_basename_(path_basename<PathTraits>(query_.cbegin(),
 84 |                                                   query_.cend())),
 85 |         case_sensitive_(std::any_of(query_.cbegin(), query_.cend(),
 86 |                                     StringTraits::is_uppercase)),
 87 |         crfile_(decode<StringTraits>(opts.crfile)),
 88 |         crfile_basename_(path_basename<PathTraits>(crfile_.cbegin(),
 89 |                                                    crfile_.cend())),
 90 |         crfile_ext_(std::find_if(crfile_.crbegin(),
 91 |                                  ReverseIterator(crfile_basename_),
 92 |                                  PathTraits::is_extension_separator).base()),
 93 |         crfile_basename_word_ends_(find_word_endings(crfile_basename_,
 94 |                                                      crfile_ext_)),
 95 |         match_crfile_(opts.match_crfile) {}
 96 | 
 97 |   // A Matcher can't be trivially copied because it contains iterators into its
 98 |   // vectors.
 99 |   Matcher(Matcher const& other) = delete;
100 |   Matcher& operator=(Matcher const& other) = delete;
101 | 
102 |   bool match(boost::string_ref const item) {
103 |     item_.clear();
104 |     decode_to<StringTraits>(item, item_);
105 | 
106 |     // Determine if the query matches at all.
107 |     if (!scan()) {
108 |       return false;
109 |     }
110 | 
111 |     // Check for compatibility with `crfile`.
112 |     if (!check_crfile()) {
113 |       return false;
114 |     }
115 | 
116 |     // Beyond this point, the item is definitely a match, and we're only
117 |     // evaluating its quality. Reset scoring state (other than what's already
118 |     // been assigned by `check_crfile`).
119 |     prefix_level_ = PrefixLevel::NONE;
120 |     whole_basename_match_ = false;
121 |     basename_longest_submatch_ = 0;
122 |     basename_match_count_ = 0;
123 |     basename_word_gaps_ = 0;
124 | 
125 |     // Don't waste any time on empty queries, which can't do any further
126 |     // differentiation between items. Also return early if the item is empty,
127 |     // so that the remainder of the algorithm can assume it isn't.
128 |     if (query_.empty() || item_.empty()) {
129 |       return true;
130 |     }
131 | 
132 |     // If the match is case-insensitive, the query must not contain any
133 |     // uppercase letters. Convert all uppercase characters in the item to
134 |     // lowercase so matching below this point is simply equality comparison.
135 |     make_item_matchcase();
136 | 
137 |     // Try to constrain the match so that matches are required at the start of
138 |     // matched path components.
139 |     if (!check_component_match_front()) {
140 |       // If that fails, conclude that the match is bad and don't do any further
141 |       // matching.
142 |       return true;
143 |     }
144 | 
145 |     // Try to additionally constrain the match so that all matches in the
146 |     // basename (rightmost path component) occur at the beginning of "words".
147 |     if (check_basename_match_word_prefix()) {
148 |       score_basename_word_prefix_match();
149 |     } else {
150 |       // If that fails, fall back to simple greedy matching.
151 |       score_basename_greedy();
152 |     }
153 | 
154 |     return true;
155 |   }
156 | 
157 |   Score score() const final {
158 |     return (Score(prefix_level_) << 62) |
159 |            (Score(whole_basename_match_) << 61) |
160 |            (mask_to(basename_longest_submatch_, 7) << 54) |
161 |            (mask_to(basename_match_count_, 7) << 47) |
162 |            (mask_to(penalty(basename_word_gaps_), 7) << 40) |
163 |            (mask_to(crfile_basename_shared_words_, 7) << 33) |
164 |            (mask_to(penalty(crfile_path_distance_), 11) << 22) |
165 |            (mask_to(penalty(unmatched_suffix_len_), 8) << 14) |
166 |            mask_to(penalty(item_.size()), 14);
167 |   }
168 | 
169 |   std::string score_debug_string() const final {
170 |     return str_cat("prefix_level = ", Score(prefix_level_),
171 |                    ", whole_basename_match = ", whole_basename_match_,
172 |                    ", basename_longest_submatch = ", basename_longest_submatch_,
173 |                    ", basename_match_count = ", basename_match_count_,
174 |                    ", basename_word_gaps = ", basename_word_gaps_,
175 |                    ", crfile_basename_shared_words = ",
176 |                    crfile_basename_shared_words_, ", crfile_path_distance = ",
177 |                    crfile_path_distance_, ", unmatched_suffix_len = ",
178 |                    unmatched_suffix_len_, ", item_len = ", item_.size());
179 |   }
180 | 
181 |   std::vector<std::size_t> match_positions() const final {
182 |     std::vector<std::size_t> posns;
183 |     if (prefix_level_ == PrefixLevel::NONE) {
184 |       get_match_positions_sorted_no_prefix(posns);
185 |       return posns;
186 |     }
187 |     get_match_positions_component_prefix_dirpath(posns);
188 |     if (prefix_level_ == PrefixLevel::BASENAME_WORD) {
189 |       get_match_positions_basename_word_prefix(posns);
190 |     } else {
191 |       get_match_positions_basename_non_word_prefix(posns);
192 |     }
193 |     std::sort(posns.begin(), posns.end());
194 |     return posns;
195 |   }
196 | 
197 |  private:
198 |   typedef std::vector<Char> Vec;
199 |   typedef typename Vec::const_iterator Iterator;
200 |   typedef typename Vec::const_reverse_iterator ReverseIterator;
201 | 
202 |   static std::vector<Iterator> find_word_endings(Iterator const first,
203 |                                                  Iterator const last) {
204 |     std::vector<Iterator> word_ends;
205 |     bool prev_uppercase = false;
206 |     bool prev_alphanumeric = false;
207 |     for (auto it = first; it != last; ++it) {
208 |       auto const c = *it;
209 |       bool const next_uppercase = StringTraits::is_uppercase(c);
210 |       bool const next_alphanumeric = StringTraits::is_alphanumeric(c);
211 |       if (prev_alphanumeric &&
212 |           (!next_alphanumeric || (!prev_uppercase && next_uppercase))) {
213 |         word_ends.push_back(it - 1);
214 |       }
215 |       prev_uppercase = next_uppercase;
216 |       prev_alphanumeric = next_alphanumeric;
217 |     }
218 |     if (prev_alphanumeric) {
219 |       word_ends.push_back(last - 1);
220 |     }
221 |     return word_ends;
222 |   }
223 | 
224 |   bool scan() {
225 |     props_.resize(item_.size());
226 |     auto props_it = props_.begin();
227 |     for (auto item_it = item_.cbegin(), item_last = item_.cend();
228 |          item_it != item_last; ++item_it, ++props_it) {
229 |       props_it->uppercase = StringTraits::is_uppercase(*item_it);
230 |     }
231 |     if (case_sensitive_) {
232 |       return scan_match<true>();
233 |     } else {
234 |       return scan_match<false>();
235 |     }
236 |   }
237 | 
238 |   template <bool CaseSensitive>
239 |   bool scan_match() const {
240 |     auto query_it = query_.cbegin();
241 |     auto const query_last = query_.cend();
242 |     if (query_it == query_last) {
243 |       return true;
244 |     }
245 |     auto props_it = props_.cbegin();
246 |     for (auto item_it = item_.cbegin(), item_last = item_.cend();
247 |          item_it != item_last; ++item_it, ++props_it) {
248 |       auto c = *item_it;
249 |       // If the match is case-insensitive, the query must not contain any
250 |       // uppercase letters.
251 |       if (!CaseSensitive && props_it->uppercase) {
252 |         c = StringTraits::uppercase_to_lowercase(c);
253 |       }
254 |       if (c == *query_it) {
255 |         ++query_it;
256 |         if (query_it == query_last) {
257 |           return true;
258 |         }
259 |       }
260 |     }
261 |     return false;
262 |   }
263 | 
264 |   bool check_crfile() {
265 |     crfile_path_distance_ = path_distance<PathTraits>(
266 |         item_.cbegin(), item_.cend(), crfile_.cbegin(), crfile_.cend());
267 |     if (!match_crfile_ && crfile_path_distance_ == 0) {
268 |       return false;
269 |     }
270 |     // If the last character in the item is a path separator, skip it for the
271 |     // purposes of determining the item basename to be consistent with
272 |     // `consume_path_component_match_front`.
273 |     if (!item_.empty() && PathTraits::is_path_separator(item_.back())) {
274 |       item_basename_ =
275 |           path_basename<PathTraits>(item_.cbegin(), item_.cend() - 1);
276 |     } else {
277 |       item_basename_ = path_basename<PathTraits>(item_.cbegin(), item_.cend());
278 |     }
279 |     auto props_it = props_.begin() + (item_basename_ - item_.cbegin());
280 |     for (auto item_it = item_basename_, item_last = item_.cend();
281 |          item_it != item_last; ++item_it, ++props_it) {
282 |       props_it->alphanumeric = StringTraits::is_alphanumeric(*item_it);
283 |     }
284 |     crfile_basename_shared_words_ = [this]() -> CharCount {
285 |       auto crfile_word_end_it = crfile_basename_word_ends_.cbegin();
286 |       auto const crfile_word_end_last = crfile_basename_word_ends_.cend();
287 |       if (crfile_word_end_it == crfile_word_end_last) {
288 |         return 0;
289 |       }
290 |       for (auto item_it = item_basename_, item_last = item_.cend(),
291 |                 crfile_it = crfile_basename_, crfile_last = crfile_.cend();
292 |            item_it != item_last && crfile_it != crfile_last &&
293 |                *item_it == *crfile_it;
294 |            ++item_it, ++crfile_it) {
295 |         if (crfile_it == *crfile_word_end_it) {
296 |           ++crfile_word_end_it;
297 |           if (crfile_word_end_it == crfile_word_end_last) {
298 |             // Only counts if the next character is plausibly not the
299 |             // continuation of a word.
300 |             std::size_t const i = item_it - item_.cbegin();
301 |             if ((i + 1 < item_.size()) && !props_[i + 1].uppercase &&
302 |                 props_[i + 1].alphanumeric) {
303 |               --crfile_word_end_it;
304 |             }
305 |             break;
306 |           }
307 |         }
308 |       }
309 |       return crfile_word_end_it - crfile_basename_word_ends_.cbegin();
310 |     }();
311 |     // Ensure that `unmatched_suffix_len_` is initialized even for empty
312 |     // queries.
313 |     unmatched_suffix_len_ = item_.cend() - item_basename_;
314 |     return true;
315 |   }
316 | 
317 |   void make_item_matchcase() {
318 |     if (!case_sensitive_) {
319 |       auto props_it = props_.cbegin();
320 |       for (auto item_it = item_.begin(), item_last = item_.end();
321 |            item_it != item_last; ++item_it, ++props_it) {
322 |         if (props_it->uppercase) {
323 |           *item_it = StringTraits::uppercase_to_lowercase(*item_it);
324 |         }
325 |       }
326 |     }
327 |   }
328 | 
329 |   bool check_component_match_front() {
330 |     auto item_rit = item_.crbegin();
331 |     auto const item_rlast = item_.crend();
332 |     auto query_rit = query_.crbegin();
333 |     auto const query_rlast = query_.crend();
334 | 
335 |     // Consume the basename.
336 |     consume_path_component_match_front(item_rit, item_rlast, query_rit,
337 |                                        query_rlast);
338 |     qit_basename_ = query_rit.base();
339 |     whole_basename_match_ = qit_basename_ == query_basename_;
340 |     basename_match_count_ = query_.cend() - qit_basename_;
341 | 
342 |     // Try to consume the remainder of the query.
343 |     while (query_rit != query_rlast) {
344 |       if (item_rit == item_rlast) {
345 |         return false;
346 |       }
347 |       consume_path_component_match_front(item_rit, item_rlast, query_rit,
348 |                                          query_rlast);
349 |     }
350 |     prefix_level_ = PrefixLevel::COMPONENT;
351 |     return true;
352 |   }
353 | 
354 |   // Advances `item_rit` to the next path separator before `item_rlast`. For
355 |   // each iterated character in the item matched by a character in the query
356 |   // before `query_rlast`, advances `query_rit`. At the end of the path
357 |   // component, backtrack the match to ensure that if any matches occur, they
358 |   // include the last character matched before the path separator.
359 |   //
360 |   // Precondition: `item_rit != item_rlast`.
361 |   // Postcondition: `item_rit` is advanced by at least 1.
362 |   void consume_path_component_match_front(
363 |       ReverseIterator& item_rit, ReverseIterator const item_rlast,
364 |       ReverseIterator& query_rit, ReverseIterator const query_rlast) const {
365 |     auto const query_last = query_rit.base();
366 |     while (true) {
367 |       if (query_rit != query_rlast && *item_rit == *query_rit) {
368 |         ++query_rit;
369 |       }
370 |       ++item_rit;
371 |       if (item_rit == item_rlast ||
372 |           PathTraits::is_path_separator(*item_rit)) {
373 |         break;
374 |       }
375 |     }
376 |     auto const item_pc_front = *item_rit.base();
377 |     auto query_it = query_rit.base();
378 |     for (; query_it != query_last; ++query_it) {
379 |       if (item_pc_front == *query_it) {
380 |         break;
381 |       }
382 |     }
383 |     query_rit = ReverseIterator(query_it);
384 |   }
385 | 
386 |   bool check_basename_match_word_prefix() {
387 |     qit_basename_words_.clear();
388 |     qit_basename_words_.push_back(qit_basename_);
389 | 
390 |     auto item_it = item_basename_;
391 |     auto const item_last = item_.cend();
392 |     if (item_it == item_last) {
393 |       return false;
394 |     }
395 |     auto query_it = qit_basename_;
396 |     auto const query_last = query_.cend();
397 |     if (query_it == query_last) {
398 |       return false;
399 |     }
400 |     auto props_it = props_.begin() + (item_basename_ - item_.cbegin());
401 | 
402 |     bool prev_uppercase = props_it->uppercase;
403 |     bool prev_alphanumeric = props_it->alphanumeric;
404 |     props_it->word_start = true;
405 | 
406 |     // Advances `item_it` and `props_it` to the beginning of the next word. For
407 |     // each consecutive iterated character in the item matched by a character
408 |     // in the query before `query_last`, advances `query_it`.
409 |     //
410 |     // Precondition: `item_it != item_last`; `query_it != query_last`.
411 |     // Postcondition: `item_it` is always advanced by at least 1.
412 |     auto const consume_word_prefix = [&] {
413 |       bool can_match = true;
414 |       while (true) {
415 |         // Require that all alphanumeric matches in this word be contiguous.
416 |         if (can_match || !prev_alphanumeric) {
417 |           if (*item_it == *query_it) {
418 |             ++query_it;
419 |             if (query_it == query_last) {
420 |               break;
421 |             }
422 |           } else {
423 |             can_match = false;
424 |           }
425 |         }
426 |         ++item_it;
427 |         if (item_it == item_last) {
428 |           break;
429 |         }
430 |         ++props_it;
431 |         bool const uppercase = props_it->uppercase;
432 |         bool const alphanumeric = props_it->alphanumeric;
433 |         bool const word_start = (!prev_uppercase && uppercase) ||
434 |                                 (!prev_alphanumeric && alphanumeric);
435 |         props_it->word_start = word_start;
436 |         prev_uppercase = uppercase;
437 |         prev_alphanumeric = alphanumeric;
438 |         if (word_start) {
439 |           break;
440 |         }
441 |       }
442 |     };
443 | 
444 |     consume_word_prefix();
445 |     while (query_it != query_last) {
446 |       if (item_it == item_last) {
447 |         basename_longest_submatch_ = 0;
448 |         basename_word_gaps_ = 0;
449 |         return false;
450 |       }
451 |       // If the next unmatched query character doesn't match the first
452 |       // character of the next word, allow partial backtracking (all but the
453 |       // first character) of the match in the previous word in order to find a
454 |       // match for this one.
455 |       auto const c = *item_it;
456 |       if (c != *query_it) {
457 |         for (auto steal_rit = ReverseIterator(query_it),
458 |                   steal_rlast = ReverseIterator(qit_basename_words_.back() + 1);
459 |              steal_rit < steal_rlast; ++steal_rit) {
460 |           if (c == *steal_rit) {
461 |             query_it = (steal_rit + 1).base();
462 |             break;
463 |           }
464 |         }
465 |       }
466 |       qit_basename_words_.push_back(query_it);
467 |       consume_word_prefix();
468 |     }
469 |     prefix_level_ = PrefixLevel::BASENAME_WORD;
470 |     // Push `query_it` onto `qit_basename_words_` even though we know it's
471 |     // `query_.cend()` to avoid special-casing the end case in
472 |     // `score_basename_word_prefix_match`.
473 |     qit_basename_words_.push_back(query_it);
474 |     return true;
475 |   }
476 | 
477 |   void score_basename_word_prefix_match() {
478 |     auto item_it = item_basename_;
479 |     auto props_it = props_.cbegin() + (item_basename_ - item_.cbegin());
480 |     auto query_it = qit_basename_;
481 |     auto const query_last = query_.cend();
482 |     // +1 because the first iteration of the loop skips the word start at the
483 |     // beginning of the basename.
484 |     auto qit_words_it = qit_basename_words_.cbegin() + 1;
485 |     auto query_word_last = *qit_words_it;
486 | 
487 |     CharCount current_submatch = 0;
488 |     bool any_word_matches = false;
489 | 
490 |     while (true) {
491 |       if (query_it != query_word_last && *item_it == *query_it) {
492 |         ++query_it;
493 |         current_submatch++;
494 |         any_word_matches = true;
495 |         if (query_it == query_last) {
496 |           break;
497 |         }
498 |       } else {
499 |         basename_longest_submatch_ =
500 |             std::max(basename_longest_submatch_, current_submatch);
501 |         current_submatch = 0;
502 |       }
503 |       ++item_it;
504 |       // At this point we know that the basename *is* a word prefix match, so
505 |       // fully consuming the end of the query should be the only possible way
506 |       // to leave this loop. Hence we skip the comparison to `item_.cend()`.
507 |       // (The same applies to `qit_words_it` and `qit_basename_words_.cend()`
508 |       // below.)
509 |       ++props_it;
510 |       if (props_it->word_start) {
511 |         if (!any_word_matches) {
512 |           basename_word_gaps_++;
513 |         }
514 |         any_word_matches = false;
515 |         ++qit_words_it;
516 |         query_word_last = *qit_words_it;
517 |       }
518 |     }
519 |     basename_longest_submatch_ =
520 |         std::max(basename_longest_submatch_, current_submatch);
521 |     // -1 here because we broke out upon reaching the last match (`query_it ==
522 |     // query_last`) before incrementing `item_it`.
523 |     unmatched_suffix_len_ = item_.cend() - item_it - 1;
524 |   }
525 | 
526 |   void score_basename_greedy() {
527 |     auto item_it = item_basename_;
528 |     auto const item_last = item_.cend();
529 |     auto query_it = qit_basename_;
530 |     auto const query_last = query_.cend();
531 |     if (item_it == item_last || query_it == query_last) {
532 |       return;
533 |     }
534 | 
535 |     CharCount current_submatch = 0;
536 | 
537 |     while (true) {
538 |       if (*item_it == *query_it) {
539 |         ++query_it;
540 |         current_submatch++;
541 |         if (query_it == query_last) {
542 |           break;
543 |         }
544 |       } else {
545 |         basename_longest_submatch_ =
546 |             std::max(basename_longest_submatch_, current_submatch);
547 |         current_submatch = 0;
548 |       }
549 |       ++item_it;
550 |       if (item_it == item_last) {
551 |         break;
552 |       }
553 |     }
554 |     basename_longest_submatch_ =
555 |         std::max(basename_longest_submatch_, current_submatch);
556 |     // -1 here because we broke out upon reaching the last match (`query_it ==
557 |     // query_last`) before incrementing `item_it`.
558 |     unmatched_suffix_len_ = item_last - item_it - 1;
559 |   }
560 | 
561 |   // In all of these `get_match_positions_*` functions, we assume that the
562 |   // match state is consistent with a successful last match.
563 | 
564 |   void get_match_positions_sorted_no_prefix(
565 |       std::vector<std::size_t>& posns) const {
566 |     get_match_positions_greedy(posns, item_.cbegin(), item_.cbegin(),
567 |                                query_.cbegin(), query_.cend());
568 |   }
569 | 
570 |   void get_match_positions_component_prefix_dirpath(
571 |       std::vector<std::size_t>& posns) const {
572 |     auto item_rit = ReverseIterator(item_basename_);
573 |     auto const item_rlast = item_.crend();
574 |     auto const item_first = item_.cbegin();
575 |     auto query_rit = ReverseIterator(qit_basename_);
576 |     auto const query_rlast = query_.crend();
577 |     auto query_pc_last = query_rit.base();
578 | 
579 |     while (query_rit != query_rlast) {
580 |       consume_path_component_match_front(item_rit, item_rlast, query_rit,
581 |                                          query_rlast);
582 |       get_match_positions_greedy(posns, item_first, item_rit.base(),
583 |                                  query_rit.base(), query_pc_last);
584 |       query_pc_last = query_rit.base();
585 |     }
586 |   }
587 | 
588 |   void get_match_positions_basename_word_prefix(
589 |       std::vector<std::size_t>& posns) const {
590 |     auto item_it = item_basename_;
591 |     auto const item_first = item_.cbegin();
592 |     auto const item_last = item_.cend();
593 |     auto props_it = props_.cbegin() + (item_basename_ - item_.cbegin());
594 |     auto query_it = qit_basename_;
595 |     auto query_last_it = qit_basename_words_.cbegin();
596 | 
597 |     while (item_it != item_last) {
598 |       if (props_it->word_start) {
599 |         ++query_last_it;
600 |       }
601 |       if (query_it != *query_last_it && *item_it == *query_it) {
602 |         ++query_it;
603 |         posns.push_back(item_it - item_first);
604 |       }
605 |       ++item_it;
606 |       ++props_it;
607 |     }
608 |   }
609 | 
610 |   void get_match_positions_basename_non_word_prefix(
611 |       std::vector<std::size_t>& posns) const {
612 |     get_match_positions_greedy(posns, item_.cbegin(), item_basename_,
613 |                                qit_basename_, query_.cend());
614 |   }
615 | 
616 |   template <typename InputIt1, typename InputIt2>
617 |   void get_match_positions_greedy(std::vector<std::size_t>& posns,
618 |                                   InputIt1 const item_first, InputIt1 item_it,
619 |                                   InputIt2 query_it,
620 |                                   InputIt2 const query_last) const {
621 |     auto const item_last = item_.cend();
622 |     while (item_it != item_last && query_it != query_last) {
623 |       if (*item_it == *query_it) {
624 |         ++query_it;
625 |         posns.push_back(item_it - item_first);
626 |       }
627 |       ++item_it;
628 |     }
629 |   }
630 | 
631 |   static constexpr Score mask_to(Score const x, unsigned const bits) {
632 |     return x & ((std::uint64_t(1) << bits) - 1);
633 |   }
634 | 
635 |   template <typename T>
636 |   static constexpr T penalty(T const x) {
637 |     return std::numeric_limits<T>::max() - x;
638 |   }
639 | 
640 |   // Internal state of an in-progress match on an item. Note that many of these
641 |   // fields are set conditionally; see the implementation for details.
642 | 
643 |   // Decoded copy of the item being matched.
644 |   Vec item_;
645 | 
646 |   // Iterator into `item_` at the beginning of the item's basename.
647 |   Iterator item_basename_;
648 | 
649 |   // Properties of characters in the item.
650 |   struct CharProperties {
651 |     // If true, the character is uppercase.
652 |     bool uppercase;
653 | 
654 |     // If true, the character is alphanumeric.
655 |     bool alphanumeric;
656 | 
657 |     // If true, the character is the start of a word.
658 |     bool word_start;
659 |   };
660 |   std::vector<CharProperties> props_;
661 | 
662 |   // Iterator into `query_` at the first character matching in the item's
663 |   // basename.
664 |   Iterator qit_basename_;
665 | 
666 |   // Iterators into `query_` before matching each word in the item's basename.
667 |   // Note that if both are set, then `qit_basename_ ==
668 |   // qit_basename_words_[0]`.
669 |   std::vector<Iterator> qit_basename_words_;
670 | 
671 |   // Metrics used to compute score, in order of descending significance.
672 | 
673 |   // Incrementally stronger statements about the quality of the match. Find
674 |   // locations where this field is assigned for details. Higher numeric value
675 |   // is better.
676 |   enum class PrefixLevel {
677 |     NONE,
678 |     COMPONENT,
679 |     BASENAME_WORD,
680 |   } prefix_level_;
681 | 
682 |   // If true, the basename of the query matches entirely in the basename of the
683 |   // item. True is better.
684 |   bool whole_basename_match_;
685 | 
686 |   // The length of the longest substring matched in the item's basename. Higher
687 |   // is better.
688 |   CharCount basename_longest_submatch_;
689 | 
690 |   // The number of characters matched in the item's basename. Higher is better.
691 |   CharCount basename_match_count_;
692 | 
693 |   // The number of words without any matches between the first and last words
694 |   // with matches in the basename. Lower is better.
695 |   CharCount basename_word_gaps_;
696 | 
697 |   // The number of consecutive words shared between the beginning of the item's
698 |   // basename and the beginning of the current file's basename. Higher is
699 |   // better.
700 |   CharCount crfile_basename_shared_words_;
701 | 
702 |   // The number of path components that must be traversed between the item's
703 |   // path and the current file's path. Lower is better.
704 |   CharCount crfile_path_distance_;
705 | 
706 |   // The number of consecutive unmatched characters at the end of the item's
707 |   // basename. Since it's easy to add characters at the end of a query to
708 |   // refine a search for a longer item, lower values are weakly preferred.
709 |   CharCount unmatched_suffix_len_;
710 | 
711 |   // Matcher state that is persistent between matches.
712 | 
713 |   // Decoded copy of the query.
714 |   Vec const query_;
715 | 
716 |   // Iterator into `query_` at the beginning of the query's basename.
717 |   Iterator const query_basename_;
718 | 
719 |   // If true, the match is case-sensitive.
720 |   bool const case_sensitive_;
721 | 
722 |   // Decoded copy of the currently open filename.
723 |   Vec const crfile_;
724 | 
725 |   // Iterator into `crfile_` at the beginning of the currently open file's
726 |   // basename.
727 |   Iterator const crfile_basename_;
728 | 
729 |   // Iterator into `crfile_` at the successor to the currently open file's
730 |   // rightmost extension separator.
731 |   Iterator const crfile_ext_;
732 | 
733 |   // Iterators into `crfile_` at the last character of each word in the
734 |   // currently open file's basename.
735 |   std::vector<Iterator> const crfile_basename_word_ends_;
736 | 
737 |   // If false, reject `crfile_` if it appears as an item.
738 |   bool const match_crfile_;
739 | };
740 | 
741 | }  // namespace cpsm
742 | 
743 | #endif /* CPSM_MATCHER_H_ */
744 | 


--------------------------------------------------------------------------------
/src/matcher_test.cc:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include <algorithm>
 17 | #include <cinttypes>
 18 | #include <cstdio>
 19 | #include <stdexcept>
 20 | #include <string>
 21 | #include <vector>
 22 | 
 23 | #include "api.h"
 24 | #include "str_util.h"
 25 | 
 26 | namespace cpsm {
 27 | namespace testing {
 28 | 
 29 | class TestAssertionFailure : public std::exception {
 30 |  public:
 31 |   TestAssertionFailure() : msg_("test assertion failed") {}
 32 | 
 33 |   template <typename... Args>
 34 |   explicit TestAssertionFailure(Args... args)
 35 |       : msg_(str_cat("test assertion failed: ", args...)) {}
 36 | 
 37 |   char const* what() const noexcept override { return msg_.c_str(); }
 38 | 
 39 |  private:
 40 |   std::string msg_;
 41 | };
 42 | 
 43 | struct Matches {
 44 |   using Vec = std::vector<std::string>;
 45 |   using size_type = typename Vec::size_type;
 46 |   Vec matches;
 47 | 
 48 |   typename Vec::const_iterator find(boost::string_ref const item) const {
 49 |     return std::find(matches.cbegin(), matches.cend(), item);
 50 |   }
 51 | 
 52 |   bool matched(boost::string_ref const item) const {
 53 |     return find(item) != matches.cend();
 54 |   }
 55 | 
 56 |   void assert_matched(boost::string_ref const item) const {
 57 |     if (!matched(item)) {
 58 |       throw TestAssertionFailure("incorrectly failed to match '", item, "'");
 59 |     }
 60 |   }
 61 | 
 62 |   void assert_not_matched(boost::string_ref const item) const {
 63 |     if (matched(item)) {
 64 |       throw TestAssertionFailure("incorrectly matched '", item, "'");
 65 |     }
 66 |   }
 67 | 
 68 |   size_type match_index(boost::string_ref const item) const {
 69 |     return find(item) - matches.cbegin();
 70 |   }
 71 | 
 72 |   void assert_match_index(boost::string_ref const item,
 73 |                           size_type const expected_index) const {
 74 |     auto const index = match_index(item);
 75 |     if (index != expected_index) {
 76 |       throw TestAssertionFailure("expected '", item, "' (index ", index,
 77 |                                  ") to have index ", expected_index);
 78 |     }
 79 |   }
 80 | 
 81 |   void assert_better_match(boost::string_ref const better_item,
 82 |                            boost::string_ref const worse_item) const {
 83 |     auto const better_index = match_index(better_item);
 84 |     auto const worse_index = match_index(worse_item);
 85 |     if (better_index >= worse_index) {
 86 |       throw TestAssertionFailure(
 87 |           "expected '", better_item, "' (index ", better_index,
 88 |           ") to be ranked higher (have a lower index) than '", worse_item,
 89 |           "' (index ", worse_index, ")");
 90 |     }
 91 |   }
 92 | };
 93 | 
 94 | Matches match_and_log(std::initializer_list<boost::string_ref> items,
 95 |                       boost::string_ref const query) {
 96 |   Matches m;
 97 |   for_each_match<StringRefItem>(
 98 |       query, Options().set_want_match_info(true),
 99 |       source_from_range<StringRefItem>(begin(items), end(items)),
100 |       [&](StringRefItem item, MatchInfo const* info) {
101 |         std::printf("Matched %s (%s)\n", item.item().data(),
102 |                     info->score_debug_string().c_str());
103 |         m.matches.push_back(copy_string_ref(item.item()));
104 |       });
105 |   return m;
106 | }
107 | 
108 | void test_match_order() {
109 |   auto m = match_and_log({"barfoo", "fbar", "foo/bar", "foo/fbar", "foo/foobar",
110 |                           "foo/foo_bar", "foo/foo_bar_test", "foo/foo_test_bar",
111 |                           "foo/FooBar", "foo/abar", "foo/qux", "foob/ar"},
112 |                          "fb");
113 | 
114 |   m.assert_not_matched("barfoo");
115 |   m.assert_matched("fbar");
116 |   m.assert_matched("foo/bar");
117 |   m.assert_matched("foo/fbar");
118 |   m.assert_matched("foo/foobar");
119 |   m.assert_matched("foo/foo_bar");
120 |   m.assert_matched("foo/foo_bar_test");
121 |   m.assert_matched("foo/foo_test_bar");
122 |   m.assert_matched("foo/FooBar");
123 |   m.assert_matched("foo/abar");
124 |   m.assert_not_matched("foo/qux");
125 |   m.assert_matched("foob/ar");
126 | 
127 |   // "fbar" should rank highest due to the query being a full prefix.
128 |   m.assert_match_index("fbar", 0);
129 |   // "foo/fbar" should rank next highest due to the query being a full prefix,
130 |   // but further away from cur_file (the empty string).
131 |   m.assert_match_index("foo/fbar", 1);
132 |   // "foo/foo_bar" and "foo/FooBar" should both rank next highest due to being
133 |   // detectable word boundary matches, though it's unspecified which of the two
134 |   // is higher.
135 |   m.assert_better_match("foo/fbar", "foo/foo_bar");
136 |   m.assert_better_match("foo/fbar", "foo/FooBar");
137 |   // "foo/foo_bar_test" should rank below either of the above since there are
138 |   // more trailing unmatched characters.
139 |   m.assert_better_match("foo/foo_bar", "foo/foo_bar_test");
140 |   m.assert_better_match("foo/FooBar", "foo/foo_bar_test");
141 |   // "foo/foo_bar_test" should rank above "foo/foo_test_bar" since its matched
142 |   // characters are in consecutive words.
143 |   m.assert_better_match("foo/foo_bar_test", "foo/foo_test_bar");
144 |   // "foo/bar" should rank below all of the above since it breaks the match
145 |   // across multiple path components.
146 |   m.assert_better_match("foo/foo_test_bar", "foo/bar");
147 |   // "foo/foobar" should rank below all of the above since the 'b' is not a
148 |   // detectable word boundary match.
149 |   m.assert_better_match("foo/bar", "foo/foobar");
150 |   // "foo/abar" and "foob/ar" should rank lowest since the matched 'b' isn't
151 |   // even at the beginning of the filename in either case, though it's
152 |   // unspecified which of the two is higher.
153 |   m.assert_better_match("foo/bar", "foo/abar");
154 |   m.assert_better_match("foo/bar", "foob/ar");
155 | }
156 | 
157 | void test_special_paths() {
158 |   auto m = match_and_log({"", "/", "a/", "/a"}, "a");
159 | 
160 |   m.assert_not_matched("");
161 |   m.assert_not_matched("/");
162 |   m.assert_matched("a/");
163 |   m.assert_matched("/a");
164 | }
165 | 
166 | template <typename F>
167 | size_t run_test(F const& f) {
168 |   try {
169 |     std::printf("*** Test started\n");
170 |     f();
171 |     std::printf("*** Test passed\n");
172 |     return 0;
173 |   } catch (std::exception const& ex) {
174 |     std::printf("*** Test failed: %s\n", ex.what());
175 |     return 1;
176 |   }
177 | }
178 | 
179 | int run_all_tests() {
180 |   size_t failed_tests = 0;
181 |   failed_tests += run_test(test_match_order);
182 |   failed_tests += run_test(test_special_paths);
183 |   if (failed_tests == 0) {
184 |     std::printf("*** All tests passed\n");
185 |   } else {
186 |     std::printf("*** %zu tests failed\n", failed_tests);
187 |   }
188 |   return failed_tests == 0 ? 0 : 1;
189 | }
190 | 
191 | }  // namespace testing
192 | }  // namespace cpsm
193 | 
194 | int main(int argc, char** argv) {
195 |   return cpsm::testing::run_all_tests();
196 | }
197 | 


--------------------------------------------------------------------------------
/src/par_util.h:
--------------------------------------------------------------------------------
 1 | // cpsm - fuzzy path matcher
 2 | // Copyright (C) 2015 the Authors
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef CPSM_PAR_UTIL_H_
17 | #define CPSM_PAR_UTIL_H_
18 | 
19 | #include <thread>
20 | #include <utility>
21 | #include <vector>
22 | 
23 | #include "str_util.h"
24 | 
25 | namespace cpsm {
26 | 
27 | // Drop-in replacement wrapper around std::thread that handles exceptions safely
28 | // and joins on destruction. (See
29 | // https://akrzemi1.wordpress.com/2012/11/14/not-using-stdthread/.)
30 | class Thread {
31 |  public:
32 |   Thread() : has_exception_msg_(false) {}
33 | 
34 |   Thread(Thread&& other) = default;
35 | 
36 |   template <typename F, typename... Args>
37 |   explicit Thread(F&& f, Args&&... args)
38 |       : has_exception_msg_(false),
39 |         thread_(&run<F, Args...>, this, f, std::forward(args)...) {}
40 | 
41 |   ~Thread() {
42 |     if (thread_.joinable()) {
43 |       thread_.join();
44 |     }
45 |   }
46 | 
47 |   void swap(Thread& other) { thread_.swap(other.thread_); }
48 | 
49 |   bool joinable() const { return thread_.joinable(); }
50 |   std::thread::id get_id() const { return thread_.get_id(); }
51 |   std::thread::native_handle_type native_handle() {
52 |     return thread_.native_handle();
53 |   }
54 |   static unsigned hardware_concurrency() {
55 |     return std::thread::hardware_concurrency();
56 |   }
57 | 
58 |   void join() { thread_.join(); }
59 |   void detach() { thread_.detach(); }
60 | 
61 |   bool has_exception() const { return has_exception_msg_; }
62 |   std::string const& exception_msg() const { return exception_msg_; }
63 | 
64 |  private:
65 |   template <typename F, typename... Args>
66 |   static void run(Thread* thread, F const& f, Args&&... args) {
67 |     try {
68 |       f(std::forward(args)...);
69 |     } catch (std::exception const& ex) {
70 |       thread->exception_msg_ = ex.what();
71 |       thread->has_exception_msg_ = true;
72 |     } catch (...) {
73 |       thread->exception_msg_ = "(unknown exception)";
74 |       thread->has_exception_msg_ = true;
75 |     }
76 |   }
77 | 
78 |   std::string exception_msg_;
79 |   bool has_exception_msg_;
80 |   std::thread thread_;
81 | };
82 | 
83 | void swap(Thread& x, Thread& y) {
84 |   x.swap(y);
85 | }
86 | 
87 | }  // namespace cpsm
88 | 
89 | #endif /* CPSM_PAR_UTIL_H_ */
90 | 


--------------------------------------------------------------------------------
/src/path_util.h:
--------------------------------------------------------------------------------
 1 | // cpsm - fuzzy path matcher
 2 | // Copyright (C) 2015 the Authors
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef CPSM_PATH_UTIL_H_
17 | #define CPSM_PATH_UTIL_H_
18 | 
19 | #include <algorithm>
20 | #include <cstddef>
21 | 
22 | #include <boost/algorithm/cxx14/mismatch.hpp>
23 | #include <boost/utility/string_ref.hpp>
24 | 
25 | #include "str_util.h"
26 | 
27 | namespace cpsm {
28 | 
29 | // PathTraits type for platform paths.
30 | struct PlatformPathTraits {
31 |   // Returns true if `c` is the conventional coarsest-grained separator of
32 |   // parts in a filename.
33 |   static constexpr bool is_extension_separator(char const c) {
34 |     return c == '.';
35 |   }
36 | 
37 |   // Returns true if `c` separates path components.
38 |   static constexpr bool is_path_separator(char const c) {
39 | #ifdef _WIN32
40 |     // TODO: Support shellslash
41 |     return c == '\\';
42 | #else
43 |     return c == '/';
44 | #endif
45 |   }
46 | };
47 | 
48 | // PathTraits type for non-paths.
49 | struct NonPathTraits {
50 |   static constexpr bool is_extension_separator(char const c) { return false; }
51 |   static constexpr bool is_path_separator(char const c) { return false; }
52 | };
53 | 
54 | // If the given path contains a path separator, returns an iterator to after
55 | // the last path separator. Otherwise returns `first`.
56 | template <typename PathTraits, typename InputIt>
57 | InputIt path_basename(InputIt first, InputIt last) {
58 |   return std::find_if(std::reverse_iterator<InputIt>(last),
59 |                       std::reverse_iterator<InputIt>(first),
60 |                       PathTraits::is_path_separator).base();
61 | }
62 | 
63 | // Returns the distance (in path components) between the two given paths.
64 | template <typename PathTraits, typename InputIt1, typename InputIt2>
65 | std::size_t path_distance(InputIt1 first1, InputIt2 last1, InputIt2 first2,
66 |                           InputIt2 last2) {
67 |   auto const mm = boost::algorithm::mismatch(first1, last1, first2, last2);
68 |   if (mm.first == last1 && mm.second == last2) {
69 |     return 0;
70 |   }
71 |   return std::count_if(mm.first, last1, PathTraits::is_path_separator) +
72 |          std::count_if(mm.second, last2, PathTraits::is_path_separator) + 1;
73 | }
74 | 
75 | }  // namespace cpsm
76 | 
77 | #endif  // CPSM_PATH_UTIL_H_
78 | 


--------------------------------------------------------------------------------
/src/python_extension.cc:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #define PY_SSIZE_T_CLEAN
 17 | #include <Python.h>
 18 | 
 19 | #include <algorithm>
 20 | #include <memory>
 21 | #include <mutex>
 22 | #include <stdexcept>
 23 | #include <string>
 24 | #include <utility>
 25 | 
 26 | #include <boost/range/adaptor/reversed.hpp>
 27 | #include <boost/utility/string_ref.hpp>
 28 | 
 29 | #include "api.h"
 30 | #include "ctrlp_util.h"
 31 | #include "par_util.h"
 32 | #include "str_util.h"
 33 | 
 34 | namespace {
 35 | 
 36 | struct PyObjectDeleter {
 37 |   void operator()(PyObject* const p) const { Py_DECREF(p); }
 38 | };
 39 | 
 40 | // Reference-owning, self-releasing PyObject smart pointer.
 41 | typedef std::unique_ptr<PyObject, PyObjectDeleter> PyObjPtr;
 42 | 
 43 | // Wrappers around Python 2/3 string type distinctions.
 44 | 
 45 | inline bool PyVimString_AsStringAndSize(PyObject* obj, char** data,
 46 |                                         Py_ssize_t* size) {
 47 | #if PY_MAJOR_VERSION >= 3
 48 |   *data = const_cast<char *>(PyUnicode_AsUTF8AndSize(obj, size));
 49 |   return *data != nullptr;
 50 | #else
 51 |   return PyString_AsStringAndSize(obj, data, size) >= 0;
 52 | #endif
 53 | }
 54 | 
 55 | inline PyObject* PyVimString_FromStringAndSize(char const* data,
 56 |                                                Py_ssize_t size) {
 57 | #if PY_MAJOR_VERSION >= 3
 58 |   return PyUnicode_FromStringAndSize(data, size);
 59 | #else
 60 |   return PyString_FromStringAndSize(data, size);
 61 | #endif
 62 | }
 63 | 
 64 | // Item type that wraps another, and also includes a pointer to a Python
 65 | // object.
 66 | template <typename InnerItem, bool IsOwned>
 67 | struct PyObjItem {
 68 |   using Obj = typename std::conditional<IsOwned, PyObjPtr, PyObject*>::type;
 69 | 
 70 |   InnerItem inner;
 71 |   Obj obj;
 72 | 
 73 |   PyObjItem() {}
 74 |   explicit PyObjItem(InnerItem inner, Obj obj)
 75 |       : inner(std::move(inner)), obj(std::move(obj)) {}
 76 | 
 77 |   boost::string_ref match_key() const { return inner.match_key(); }
 78 |   boost::string_ref sort_key() const { return inner.sort_key(); }
 79 | };
 80 | 
 81 | // Iterators do not necessarily hold a reference on iterated values, so we must
 82 | // do so.
 83 | template <typename MatchMode>
 84 | using PyIterCtrlPItem =
 85 |     PyObjItem<cpsm::CtrlPItem<cpsm::StringRefItem, MatchMode>,
 86 |               /* IsOwned = */ true>;
 87 | 
 88 | // Thread-safe item source that batches items from a Python iterator.
 89 | template <typename MatchMode>
 90 | class PyIterCtrlPMatchSource {
 91 |  public:
 92 |   using Item = PyIterCtrlPItem<MatchMode>;
 93 | 
 94 |   explicit PyIterCtrlPMatchSource(PyObject* const iter) : iter_(iter) {
 95 |     if (!PyIter_Check(iter)) {
 96 |       throw cpsm::Error("input is not iterable");
 97 |     }
 98 |   }
 99 | 
100 |   bool fill(std::vector<Item>& items) {
101 |     std::lock_guard<std::mutex> lock(mu_);
102 |     if (done_) {
103 |       return false;
104 |     }
105 |     auto const add_item = [&](PyObjPtr item_obj) {
106 |       if (item_obj == nullptr) {
107 |         return false;
108 |       }
109 |       char* item_data;
110 |       Py_ssize_t item_size;
111 |       if (!PyVimString_AsStringAndSize(item_obj.get(), &item_data,
112 |                                        &item_size)) {
113 |         return false;
114 |       }
115 |       items.emplace_back(
116 |           cpsm::CtrlPItem<cpsm::StringRefItem, MatchMode>(
117 |               (cpsm::StringRefItem(boost::string_ref(item_data, item_size)))),
118 |           std::move(item_obj));
119 |       return true;
120 |     };
121 |     for (Py_ssize_t i = 0; i < batch_size(); i++) {
122 |       if (!add_item(PyObjPtr(PyIter_Next(iter_)))) {
123 |         done_ = true;
124 |         return false;
125 |       }
126 |     }
127 |     return true;
128 |   }
129 | 
130 |   static constexpr Py_ssize_t batch_size() { return 512; }
131 | 
132 |  private:
133 |   std::mutex mu_;
134 |   PyObject* const iter_;
135 |   bool done_ = false;
136 | };
137 | 
138 | // Lists hold references on their elements, so we can use borrowed references.
139 | template <typename MatchMode>
140 | using PyListCtrlPItem =
141 |     PyObjItem<cpsm::CtrlPItem<cpsm::StringRefItem, MatchMode>,
142 |               /* IsOwned = */ false>;
143 | 
144 | // Thread-safe item source that batches items from a Python list.
145 | template <typename MatchMode>
146 | class PyListCtrlPMatchSource {
147 |  public:
148 |   using Item = PyListCtrlPItem<MatchMode>;
149 | 
150 |   explicit PyListCtrlPMatchSource(PyObject* const list) : list_(list) {
151 |     size_ = PyList_Size(list);
152 |     if (size_ < 0) {
153 |       throw cpsm::Error("input is not a list");
154 |     }
155 |   }
156 | 
157 |   bool fill(std::vector<Item>& items) {
158 |     std::lock_guard<std::mutex> lock(mu_);
159 |     if (done_) {
160 |       return false;
161 |     }
162 |     auto const add_item = [&](PyObject* item_obj) {
163 |       if (item_obj == nullptr) {
164 |         return false;
165 |       }
166 |       char* item_data;
167 |       Py_ssize_t item_size;
168 |       if (!PyVimString_AsStringAndSize(item_obj, &item_data, &item_size)) {
169 |         return false;
170 |       }
171 |       items.emplace_back(
172 |           cpsm::CtrlPItem<cpsm::StringRefItem, MatchMode>(
173 |               (cpsm::StringRefItem(boost::string_ref(item_data, item_size)))),
174 |           item_obj);
175 |       return true;
176 |     };
177 |     Py_ssize_t const max = std::min(i_ + batch_size(), size_);
178 |     for (; i_ < max; i_++) {
179 |       if (!add_item(PyList_GetItem(list_, i_))) {
180 |         done_ = true;
181 |         return false;
182 |       }
183 |     }
184 |     return i_ != size_;
185 |   }
186 | 
187 |   static constexpr Py_ssize_t batch_size() { return 512; }
188 | 
189 |  private:
190 |   std::mutex mu_;
191 |   PyObject* const list_;
192 |   Py_ssize_t i_ = 0;
193 |   Py_ssize_t size_ = 0;
194 |   bool done_ = false;
195 | };
196 | 
197 | // `dst` must be a functor compatible with signature `void(boost::string_ref
198 | // item, boost::string_ref match_key, PyObject* obj, cpsm::MatchInfo* info)`.
199 | template <typename Sink>
200 | void for_each_pyctrlp_match(boost::string_ref const query,
201 |                             cpsm::Options const& opts,
202 |                             cpsm::CtrlPMatchMode const match_mode,
203 |                             PyObject* const items_iter, Sink&& dst) {
204 |   bool const is_list = PyList_Check(items_iter);
205 | #define DO_MATCH_WITH(MMODE)                                                   \
206 |   if (is_list) {                                                               \
207 |     cpsm::for_each_match<PyListCtrlPItem<MMODE>>(                              \
208 |         query, opts, PyListCtrlPMatchSource<MMODE>(items_iter),                \
209 |         [&](PyListCtrlPItem<MMODE> const& item, cpsm::MatchInfo* const info) { \
210 |           dst(item.inner.inner.item(), item.match_key(), item.obj, info);      \
211 |         });                                                                    \
212 |   } else {                                                                     \
213 |     cpsm::for_each_match<PyIterCtrlPItem<MMODE>>(                              \
214 |         query, opts, PyIterCtrlPMatchSource<MMODE>(items_iter),                \
215 |         [&](PyIterCtrlPItem<MMODE> const& item, cpsm::MatchInfo* const info) { \
216 |           dst(item.inner.inner.item(), item.match_key(), item.obj.get(),       \
217 |               info);                                                           \
218 |         });                                                                    \
219 |   }
220 |   switch (match_mode) {
221 |     case cpsm::CtrlPMatchMode::FULL_LINE:
222 |       DO_MATCH_WITH(cpsm::FullLineMatch);
223 |       break;
224 |     case cpsm::CtrlPMatchMode::FILENAME_ONLY:
225 |       DO_MATCH_WITH(cpsm::FilenameOnlyMatch);
226 |       break;
227 |     case cpsm::CtrlPMatchMode::FIRST_NON_TAB:
228 |       DO_MATCH_WITH(cpsm::FirstNonTabMatch);
229 |       break;
230 |     case cpsm::CtrlPMatchMode::UNTIL_LAST_TAB:
231 |       DO_MATCH_WITH(cpsm::UntilLastTabMatch);
232 |       break;
233 |   }
234 | #undef DO_MATCH_WITH
235 | };
236 | 
237 | unsigned int get_nr_threads(unsigned int const max_threads) {
238 |   std::size_t nr_threads = cpsm::Thread::hardware_concurrency();
239 |   if (!nr_threads) {
240 |     nr_threads = 1;
241 |   }
242 |   if (max_threads && (nr_threads > max_threads)) {
243 |     nr_threads = max_threads;
244 |   }
245 |   return nr_threads;
246 | }
247 | 
248 | }  // namespace
249 | 
250 | extern "C" {
251 | 
252 | constexpr char CTRLP_MATCH_DOC[] =
253 | "ctrlp_match(\n"
254 | "    items, query, limit=-1, mmode=None, ispath=False, crfile=None,\n"
255 | "    highlight_mode=None, match_crfile=False, max_threads=0,\n"
256 | "    query_inverting_delimiter=None, unicode=False)\n"
257 | "\n"
258 | "Returns a tuple `(results, regexes)` containing information about the items\n"
259 | "in `items` that match `query`, in order of descending match quality.\n"
260 | "\n"
261 | "Options:\n"
262 | "limit -- if positive, the maximum number of results to return\n"
263 | "mmode -- CtrlP match mode (default 'full-line', i.e. full path mode)\n"
264 | "ispath -- if true, all items are paths\n"
265 | "crfile -- if set, the currently open file\n"
266 | "highlight_mode -- controls `regexes`, see README\n"
267 | "match_crfile -- if false, never match `crfile`\n"
268 | "max_threads -- if positive, limit on the number of matcher threads\n"
269 | "query_inverting_delimiter -- see README\n"
270 | "regex_line_prefix -- prefix for each regex in `regexes`\n"
271 | "unicode -- if true, all items are UTF-8-encoded";
272 | 
273 | static PyObject* cpsm_ctrlp_match(PyObject* self, PyObject* args,
274 |                                   PyObject* kwargs) {
275 |   static char const* kwlist[] = {"items", "query", "limit", "mmode", "ispath",
276 |                                  "crfile", "highlight_mode", "match_crfile",
277 |                                  "max_threads", "query_inverting_delimiter",
278 |                                  "regex_line_prefix", "unicode", nullptr};
279 |   // Required parameters.
280 |   PyObject* items_obj;
281 |   char const* query_data;
282 |   Py_ssize_t query_size;
283 |   // CtrlP-provided options.
284 |   int limit_int = -1;
285 |   char const* mmode_data = nullptr;
286 |   Py_ssize_t mmode_size = 0;
287 |   int is_path = 0;
288 |   char const* crfile_data = nullptr;
289 |   Py_ssize_t crfile_size = 0;
290 |   // cpsm-specific options.
291 |   char const* highlight_mode_data = nullptr;
292 |   Py_ssize_t highlight_mode_size = 0;
293 |   int match_crfile = 0;
294 |   int max_threads_int = 0;
295 |   char const* query_inverting_delimiter_data = nullptr;
296 |   Py_ssize_t query_inverting_delimiter_size = 0;
297 |   char const* regex_line_prefix_data = nullptr;
298 |   Py_ssize_t regex_line_prefix_size = 0;
299 |   int unicode = 0;
300 |   if (!PyArg_ParseTupleAndKeywords(
301 |           args, kwargs, "Os#|iz#iz#z#iiz#z#i", const_cast<char**>(kwlist),
302 |           &items_obj, &query_data, &query_size, &limit_int, &mmode_data,
303 |           &mmode_size, &is_path, &crfile_data, &crfile_size,
304 |           &highlight_mode_data, &highlight_mode_size, &match_crfile,
305 |           &max_threads_int, &query_inverting_delimiter_data,
306 |           &query_inverting_delimiter_size, &regex_line_prefix_data,
307 |           &regex_line_prefix_size, &unicode)) {
308 |     return nullptr;
309 |   }
310 | 
311 |   try {
312 |     std::string query(query_data, query_size);
313 |     boost::string_ref query_inverting_delimiter(query_inverting_delimiter_data,
314 |                                                 query_inverting_delimiter_size);
315 |     if (!query_inverting_delimiter.empty()) {
316 |       if (query_inverting_delimiter.size() > 1) {
317 |         throw cpsm::Error(
318 |             "query inverting delimiter must be a single character");
319 |       }
320 |       query = cpsm::str_join(boost::adaptors::reverse(cpsm::str_split(
321 |                                  query, query_inverting_delimiter[0])),
322 |                              "");
323 |     }
324 | 
325 |     auto const mopts =
326 |         cpsm::Options()
327 |             .set_crfile(boost::string_ref(crfile_data, crfile_size))
328 |             .set_limit((limit_int >= 0) ? std::size_t(limit_int) : 0)
329 |             .set_match_crfile(match_crfile)
330 |             .set_nr_threads(
331 |                  get_nr_threads((max_threads_int >= 0)
332 |                                     ? static_cast<unsigned int>(max_threads_int)
333 |                                     : 0))
334 |             .set_path(is_path)
335 |             .set_unicode(unicode)
336 |             .set_want_match_info(true);
337 |     boost::string_ref const highlight_mode(highlight_mode_data,
338 |                                            highlight_mode_size);
339 | 
340 |     PyObjPtr output_tuple(PyTuple_New(2));
341 |     if (!output_tuple) {
342 |       return nullptr;
343 |     }
344 |     PyObjPtr matches_list(PyList_New(0));
345 |     if (!matches_list) {
346 |       return nullptr;
347 |     }
348 |     std::vector<std::string> highlight_regexes;
349 |     for_each_pyctrlp_match(
350 |         query, mopts,
351 |         cpsm::parse_ctrlp_match_mode(boost::string_ref(mmode_data, mmode_size)),
352 |         items_obj,
353 |         [&](boost::string_ref const item, boost::string_ref const match_key,
354 |             PyObject* const obj, cpsm::MatchInfo* const info) {
355 |           if (PyList_Append(matches_list.get(), obj) < 0) {
356 |             throw cpsm::Error("match appending failed");
357 |           }
358 |           auto match_positions = info->match_positions();
359 |           // Adjust match positions to account for substringing.
360 |           std::size_t const delta = match_key.data() - item.data();
361 |           for (auto& pos : match_positions) {
362 |             pos += delta;
363 |           }
364 |           cpsm::get_highlight_regexes(
365 |               highlight_mode, item, match_positions, highlight_regexes,
366 |               boost::string_ref(regex_line_prefix_data,
367 |                                 regex_line_prefix_size));
368 |         });
369 |     if (PyTuple_SetItem(output_tuple.get(), 0, matches_list.release())) {
370 |       return nullptr;
371 |     }
372 |     PyObjPtr regexes_list(PyList_New(0));
373 |     if (!regexes_list) {
374 |       return nullptr;
375 |     }
376 |     for (auto const& regex : highlight_regexes) {
377 |       PyObjPtr regex_str(
378 |           PyVimString_FromStringAndSize(regex.data(), regex.size()));
379 |       if (!regex_str) {
380 |         return nullptr;
381 |       }
382 |       if (PyList_Append(regexes_list.get(), regex_str.get()) < 0) {
383 |         return nullptr;
384 |       }
385 |     }
386 |     if (PyTuple_SetItem(output_tuple.get(), 1, regexes_list.release())) {
387 |       return nullptr;
388 |     }
389 |     return output_tuple.release();
390 |   } catch (std::exception const& ex) {
391 |     PyErr_SetString(PyExc_RuntimeError, ex.what());
392 |     return nullptr;
393 |   }
394 | }
395 | 
396 | static PyMethodDef cpsm_py_methods[] = {
397 |     {"ctrlp_match", reinterpret_cast<PyCFunction>(cpsm_ctrlp_match),
398 |      METH_VARARGS | METH_KEYWORDS, CTRLP_MATCH_DOC},
399 |     {nullptr, nullptr, 0, nullptr}};
400 | 
401 | #if PY_MAJOR_VERSION >= 3
402 | static struct PyModuleDef moduledef = {
403 |     PyModuleDef_HEAD_INIT,
404 |     "cpsm_py",
405 |     NULL,
406 |     -1,
407 |     cpsm_py_methods,
408 |     NULL,
409 |     NULL,
410 |     NULL,
411 |     NULL
412 | };
413 | 
414 | PyMODINIT_FUNC PyInit_cpsm_py() { return PyModule_Create(&moduledef); }
415 | #else
416 | PyMODINIT_FUNC initcpsm_py() { Py_InitModule("cpsm_py", cpsm_py_methods); }
417 | #endif
418 | 
419 | } /* extern "C" */
420 | 


--------------------------------------------------------------------------------
/src/str_util.cc:
--------------------------------------------------------------------------------
 1 | // cpsm - fuzzy path matcher
 2 | // Copyright (C) 2015 the Authors
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "str_util.h"
17 | 
18 | namespace cpsm {
19 | 
20 | std::vector<boost::string_ref> str_split(boost::string_ref str,
21 |                                          char const delimiter) {
22 |   std::vector<boost::string_ref> splits;
23 |   while (true) {
24 |     auto const dpos = str.find_first_of(delimiter);
25 |     if (dpos == boost::string_ref::npos) {
26 |       break;
27 |     }
28 |     splits.push_back(str.substr(0, dpos));
29 |     str.remove_prefix(dpos+1);
30 |   }
31 |   splits.push_back(str);
32 |   return splits;
33 | }
34 | 
35 | }  // namespace cpsm
36 | 


--------------------------------------------------------------------------------
/src/str_util.h:
--------------------------------------------------------------------------------
  1 | // cpsm - fuzzy path matcher
  2 | // Copyright (C) 2015 the Authors
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #ifndef CPSM_STR_UTIL_H_
 17 | #define CPSM_STR_UTIL_H_
 18 | 
 19 | #include <cstdint>
 20 | #include <sstream>
 21 | #include <stdexcept>
 22 | #include <string>
 23 | #include <utility>
 24 | #include <vector>
 25 | 
 26 | #include <boost/utility/string_ref.hpp>
 27 | 
 28 | #if CPSM_CONFIG_ICU
 29 | #include <unicode/uchar.h>
 30 | #endif
 31 | 
 32 | namespace cpsm {
 33 | 
 34 | inline void str_cat_impl(std::stringstream& ss) {}
 35 | 
 36 | template <typename T, typename... Args>
 37 | void str_cat_impl(std::stringstream& ss, T const& x, Args... args) {
 38 |   ss << x;
 39 |   str_cat_impl(ss, args...);
 40 | }
 41 | 
 42 | // Concatenates an arbitrary number of arguments that can be stringifed through
 43 | // a stringstream.
 44 | template <typename... Args>
 45 | std::string str_cat(Args... args) {
 46 |   std::stringstream ss;
 47 |   str_cat_impl(ss, args...);
 48 |   return ss.str();
 49 | }
 50 | 
 51 | // Splits a string into substrings separated by a delimiter.
 52 | std::vector<boost::string_ref> str_split(boost::string_ref str,
 53 |                                          char const delimiter);
 54 | 
 55 | // Joins an iterable over a type that can be stringified through a stringstream
 56 | // with the given separator.
 57 | template <typename T>
 58 | std::string str_join(T const& xs, boost::string_ref const sep) {
 59 |   std::stringstream ss;
 60 |   boost::string_ref s;
 61 |   for (auto const& x : xs) {
 62 |     ss << s << x;
 63 |     s = sep;
 64 |   }
 65 |   return ss.str();
 66 | }
 67 | 
 68 | // Exception type used by this package.
 69 | class Error : public std::exception {
 70 |  public:
 71 |   Error() : msg_("(unknown error)") {}
 72 | 
 73 |   template <typename... Args>
 74 |   explicit Error(Args... args)
 75 |       : msg_(str_cat(args...)) {}
 76 | 
 77 |   char const* what() const noexcept override { return msg_.c_str(); }
 78 | 
 79 |  private:
 80 |   std::string msg_;
 81 | };
 82 | 
 83 | // Returns a new `std::string` that is a copy of the data viewed by the given
 84 | // `boost::string_ref`.
 85 | inline std::string copy_string_ref(boost::string_ref const sref) {
 86 |   return std::string(sref.data(), sref.size());
 87 | }
 88 | 
 89 | // Constructs a copy of the range defined by the given iterators over a char[].
 90 | template <typename It>
 91 | boost::string_ref ref_str_iters(It first, It last) {
 92 |   return boost::string_ref(&*first, last - first);
 93 | }
 94 | 
 95 | // StringTraits type for paths that are 7-bit clean, which is the common case
 96 | // for source code.
 97 | struct SimpleStringTraits {
 98 |   typedef char Char;
 99 | 
100 |   // For each character `c` in `str`, invokes `f(c, pos, len)` where `pos` is
101 |   // the offset in bytes of the first byte corresponding to `c` in `str` and
102 |   // `len` is its length in bytes.
103 |   template <typename F>
104 |   static void for_each_char(boost::string_ref const str, F const& f) {
105 |     for (std::size_t i = 0, end = str.size(); i < end; i++) {
106 |       f(str[i], i, 1);
107 |     }
108 |   }
109 | 
110 |   // Returns true if the given character represents a letter or number.
111 |   static constexpr bool is_alphanumeric(Char const c) {
112 |     return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
113 |            (c >= 'A' && c <= 'Z');
114 |   }
115 | 
116 |   // Returns true if the given character represents an uppercase letter.
117 |   static constexpr bool is_uppercase(Char const c) {
118 |     return c >= 'A' && c <= 'Z';
119 |   }
120 | 
121 |   // Returns the lowercase version of the given uppercase letter.
122 |   static constexpr Char uppercase_to_lowercase(Char const c) {
123 |     return c + ('a' - 'A');
124 |   }
125 | };
126 | 
127 | template <typename StringTraits>
128 | void decode_to(boost::string_ref const str,
129 |                std::vector<typename StringTraits::Char>& chars) {
130 |   chars.reserve(str.size());
131 |   StringTraits::for_each_char(str, [&](typename StringTraits::Char c, int,
132 |                                        int) { chars.push_back(c); });
133 | }
134 | 
135 | template <typename StringTraits>
136 | std::vector<typename StringTraits::Char> decode(boost::string_ref const str) {
137 |   std::vector<typename StringTraits::Char> vec;
138 |   decode_to<StringTraits>(str, vec);
139 |   return vec;
140 | }
141 | 
142 | #if CPSM_CONFIG_ICU
143 | 
144 | // StringTraits type for UTF-8-encoded strings. Non-UTF-8 bytes are decoded as
145 | // the low surrogate 0xdc00+(byte) so that a match can still be attempted for
146 | // malformed strings.
147 | struct Utf8StringTraits {
148 |   typedef char32_t Char;
149 | 
150 |   template <typename F>
151 |   static void for_each_char(boost::string_ref str, F const& f) {
152 |     std::size_t pos = 0;
153 |     char32_t b0 = 0;
154 |     // Even though most of this function deals with byte-sized quantities, use
155 |     // char32_t throughout to avoid casting.
156 |     auto const lookahead = [&](size_t n) -> char32_t {
157 |       if (n >= str.size()) {
158 |         return 0;
159 |       }
160 |       return str[n];
161 |     };
162 |     auto const decode_as = [&](char32_t c, std::size_t len) {
163 |       f(c, pos, len);
164 |       str.remove_prefix(len);
165 |       pos += len;
166 |     };
167 |     auto const invalid = [&]() { decode_as(0xdc00 + b0, 1); };
168 |     auto const is_continuation =
169 |         [](char32_t b) -> bool { return (b & 0xc0) == 0x80; };
170 |     while (!str.empty()) {
171 |       auto const b0 = lookahead(0);
172 |       if (b0 == 0x00) {
173 |         // Input is a string_ref, not a null-terminated string - premature null?
174 |         invalid();
175 |       } else if (b0 < 0x80) {
176 |         // 1-byte character
177 |         decode_as(b0, 1);
178 |       } else if (b0 < 0xc2) {
179 |         // Continuation or overlong encoding
180 |         invalid();
181 |       } else if (b0 < 0xe0) {
182 |         // 2-byte sequence
183 |         auto const b1 = lookahead(1);
184 |         if (!is_continuation(b1)) {
185 |           invalid();
186 |         } else {
187 |           decode_as(((b0 & 0x1f) << 6) | (b1 & 0x3f), 2);
188 |         }
189 |       } else if (b0 < 0xf0) {
190 |         // 3-byte sequence
191 |         auto const b1 = lookahead(1), b2 = lookahead(2);
192 |         if (!is_continuation(b1) || !is_continuation(b2)) {
193 |           invalid();
194 |         } else if (b0 == 0xe0 && b1 < 0xa0) {
195 |           // Overlong encoding
196 |           invalid();
197 |         } else {
198 |           decode_as(((b0 & 0x0f) << 12) | ((b1 & 0x3f) << 6) | (b2 & 0x3f), 3);
199 |         }
200 |       } else if (b0 < 0xf5) {
201 |         // 4-byte sequence
202 |         auto const b1 = lookahead(1), b2 = lookahead(2), b3 = lookahead(3);
203 |         if (!is_continuation(b1) || !is_continuation(b2) ||
204 |             !is_continuation(b3)) {
205 |           invalid();
206 |         } else if (b0 == 0xf0 && b1 < 0x90) {
207 |           // Overlong encoding
208 |           invalid();
209 |         } else if (b0 == 0xf4 && b1 >= 0x90) {
210 |           // > U+10FFFF
211 |           invalid();
212 |         } else {
213 |           decode_as(((b0 & 0x07) << 18) | ((b1 & 0x3f) << 12) |
214 |                         ((b2 & 0x3f) << 6) | (b3 & 0x3f),
215 |                     4);
216 |         }
217 |       } else {
218 |         // > U+10FFFF
219 |         invalid();
220 |       }
221 |     }
222 |   }
223 | 
224 |   static bool is_alphanumeric(Char const c) {
225 |     return u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM);
226 |   }
227 | 
228 |   static bool is_uppercase(Char const c) {
229 |     return u_hasBinaryProperty(c, UCHAR_UPPERCASE);
230 |   }
231 | 
232 |   static Char uppercase_to_lowercase(Char const c) {
233 |     return u_tolower(c);
234 |   }
235 | };
236 | 
237 | #else  // CPSM_CONFIG_ICU
238 | 
239 | struct Utf8StringTraits {
240 |   typedef char32_t Char;
241 | 
242 |   [[noreturn]] static void unimplemented() {
243 |     throw Error("cpsm built without Unicode support");
244 |   }
245 | 
246 |   template <typename F>
247 |   static void for_each_char(boost::string_ref str, F const& f) {
248 |     unimplemented();
249 |   }
250 | 
251 |   static bool is_alphanumeric(Char const c) {
252 |     unimplemented();
253 |   }
254 | 
255 |   static bool is_uppercase(Char const c) {
256 |     unimplemented();
257 |   }
258 | 
259 |   static Char uppercase_to_lowercase(Char const c) {
260 |     unimplemented();
261 |   }
262 | };
263 | 
264 | #endif // CPSM_CONFIG_ICU
265 | 
266 | }  // namespace cpsm
267 | 
268 | #endif /* CPSM_STR_UTIL_H_ */
269 | 


--------------------------------------------------------------------------------