├── .editorconfig
├── .eslintrc
├── .gitignore
├── .npmignore
├── .travis.yml
├── LICENSE
├── README.md
├── example
└── index.html
├── lib
└── commonregex.js
├── package.json
└── test
└── CommonRegexJS_test.js
/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | indent_style = space
8 | indent_size = 2
9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 |
12 | [*.md]
13 | trim_trailing_whitespace = false
14 |
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "env": {
3 | "node": true,
4 | "es6": true,
5 | "browser": true,
6 | "amd": true,
7 | "mocha": true
8 | },
9 | "extends": "eslint:recommended",
10 | "rules": {
11 | "indent": [2, 2],
12 | "linebreak-style": [2, "unix"],
13 | "quotes": [2, "single"],
14 | "no-undef": 2,
15 | "semi": [2, "always"],
16 | "object-curly-spacing": [2, "always"],
17 | "no-useless-concat": 2,
18 | "prefer-template": 0
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .idea/
3 | *.log
4 | .DS_Store
5 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | npm-debug.log
2 | node_modules
3 | example
4 | docs
5 | .idea
6 |
7 |
8 | # don't ignore .npmignore files
9 | !.npmignore
10 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 |
3 | node_js:
4 | - "0.10"
5 | - "0.12"
6 | - "4"
7 | - "5"
8 | - "6"
9 |
10 | script:
11 | - npm test
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | CommonRegexJS
2 | =============
3 |
4 | [CommonRegex](https://github.com/madisonmay/CommonRegex/ "CommonRegex") port for JavaScript
5 |
6 | Find a lot of kinds of common information in a string.
7 |
8 | Pull requests welcome!
9 |
10 | Please note that this is currently English/US specific.
11 |
12 | [](https://travis-ci.org/talyssonoc/commonregexjs)
13 |
14 | Usage
15 | =====
16 |
17 | You can use CommonRegexJS normally, using a script tag:
18 |
19 | ```html
20 |
21 | ```
22 |
23 | Or importing it with RequireJS, putting `commonregex.js` file inside your RequireJS base directory, using like this:
24 |
25 | ```js
26 | requirejs(['commonregex'], function(CommonRegex) {
27 | //Use CommonRegex normally here
28 | }
29 | ```
30 |
31 | Or install via NPM with
32 |
33 | ```sh
34 | $ npm install commonregex
35 | ```
36 |
37 | And use like this:
38 |
39 | ```js
40 | import CommonRegex from 'commonregex'; // For ES6 environments
41 |
42 | var CommonRegex = require('commonregex'); // For ES5 environments
43 | ```
44 |
45 | API
46 | ===
47 |
48 | You can instantiate a CommonRegex object passing a string in the constructor and use the fields of the object to acess the matches and the methods for the matches of other strings (passing the string as parameter), or not pass a string in the constructor and just use the methods.
49 |
50 | Possible properties and its equivalent methods:
51 |
52 | * `#dates` or `#getDates([text])`
53 | * `#times` or `#getTimes([text])`
54 | * `#phones` or `#getPhones([text])`
55 | * `#links` or `#getLinks([text])`
56 | * `#emails` or `#getEmails([text])`
57 | * `#IPv4` or `#getIPv4([text])`
58 | * `#IPv6` or `#getIPv6([text])`
59 | * `#hexColors` or `#getHexColors([text])`
60 | * `#acronyms` or `#getAcronyms([text])`
61 | * `#money` or `#getMoney([text])`
62 | * `#percentages` or `#getPercentages([text])` (matches percentages between 0.00% and 100.00%)
63 | * `#creditCards` or `#getCreditCards([text])`
64 | * `#addresses` or `#getAddresses([text])`
65 |
66 | The regexes used internally are available at `CommonRegex.regexes`.
67 |
68 | Examples
69 | ========
70 |
71 | var text = 'John, please get that article on www.linkedin.com to me by 5:00PM\n'
72 | + 'on Jan 9th 2012. 4:00 would be ideal, actually. If you have any questions,\n'
73 | + 'you can reach my associate at (012)-345-6789 or associative@mail.com.\n'
74 | + 'I\'ll be on UK during the whole week on a J.R.R. Tolkien convention.';
75 |
76 | var commonRegex = new CommonRegex(text);
77 | console.log(commonRegex.dates);
78 | //logs ["Jan 9th 2012"]
79 | console.log(commonRegex.times);
80 | //logs ["5:00PM", "4:00"]
81 | console.log(commonRegex.phones);
82 | //logs ["(012)-345-6789"]
83 | console.log(commonRegex.links);
84 | //logs ["www.linkedin.com"]
85 | console.log(commonRegex.emails);
86 | //logs ["associative@mail.com"]
87 | console.log(commonRegex.getAcronyms());
88 | //logs ["UK", "J.R.R."]
89 |
90 | Alternatively, you can generate a single CommonRegex instance and use it to parse multiple segments of text.
91 |
92 | var commonRegex = new CommonRegex();
93 | console.log(commonRegex.times('When are you free? Do you want to meet up for coffee at 4:00?''));
94 | //logs ["4:00"]
95 | console.log(commonRegex.getMoney('They said the price was US$5,000.90, actually it is US$3,900.5. It\'s $1100.4 less, can you imagine this?'));
96 | //logs ["US$5,000.90", "US$3,900.5", "$1100.4"]
97 | console.log(commonRegex.getPercentages('I\'m 99.9999999% sure that I\'ll get a raise of 5%.'));
98 | //logs ["99.9999999%", "5%"]
99 | console.log(commonRegex.getIPv6('The IPv6 address for localhost is 0:0:0:0:0:0:0:1, or alternatively, ::1.'));
100 | //logs ["0:0:0:0:0:0:0:1", "::1"]
101 |
102 |
103 | CommonRegex Ports
104 | =================
105 | There are CommonRegex ports for other languages, see [here](https://github.com/madisonmay/CommonRegex/#commonregex-ports "CommonRegex ports")
106 |
--------------------------------------------------------------------------------
/example/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | CommonRegex JS port example
4 |
5 |
6 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/lib/commonregex.js:
--------------------------------------------------------------------------------
1 | (function(root, factory) {
2 | if (typeof define === 'function' && define.amd) {
3 | // AMD. Register as an anonymous module.
4 | define(factory);
5 | } else if (typeof exports === 'object') {
6 | // CommonJS
7 | module.exports = factory();
8 | } else {
9 | // Browser globals
10 | root.CommonRegex = factory();
11 | }
12 | }(this, function() {
13 | function CommonRegex(_text) {
14 | this.text = _text || '';
15 |
16 | if (_text !== undefined) {
17 |
18 | // Use lazy evaluation
19 | regexesNames.forEach(function(r) {
20 | Object.defineProperty(this, r, {
21 | get: function() {
22 | var propertyName = '_' + r;
23 |
24 | if(!this[propertyName]) {
25 | this[propertyName] = this.getMatches(this.text, CommonRegex.regexes[r]);
26 | }
27 |
28 | return this[propertyName];
29 | }
30 | });
31 | }.bind(this));
32 | }
33 | }
34 |
35 | /**
36 | * Used to get all the matches of a regex from a string
37 | * @param {String} text Text to look for the matches
38 | * @param {Regexp} regex Regex to match the text
39 | * @return {Array} Array of matches
40 | */
41 | CommonRegex.prototype.getMatches = function getMatches(_text, regex) {
42 | var text = _text || this.text;
43 | var matches = text.match(regex);
44 |
45 | return matches || [];
46 | };
47 |
48 | CommonRegex.regexes = {
49 |
50 | dates: (function() {
51 | function opt(regex) {
52 | return '(?:' + regex + ')?';
53 | }
54 |
55 | function group(regex) {
56 | return '(?:' + regex + ')';
57 | }
58 |
59 | function any(regexes) {
60 | return regexes.join('|');
61 | }
62 |
63 | var monthRegex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)';
64 | var dayRegex = '[0-3]?\\d(?:st|nd|rd|th)?';
65 | var yearRegex = '\\d{4}';
66 |
67 | var datesRegex = group(any([dayRegex + '\\s+(?:of\\s+)?' + monthRegex, monthRegex + '\\s+' + dayRegex])) + '(?:\\,)?\\s*' + opt(yearRegex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4}';
68 |
69 | return new RegExp(datesRegex, 'gim');
70 | }()),
71 |
72 | times: /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/gim,
73 |
74 | phones: /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/gim,
75 |
76 | links: /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))/gim,
77 |
78 | emails: /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/gim,
79 |
80 | IPv4: /\b((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/gm,
81 |
82 | IPv6: /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/gim,
83 |
84 | hexColors: /#(?:[0-9a-fA-F]{3}){1,2}\b/gim,
85 |
86 | acronyms: /\b(([A-Z]\.)+|([A-Z]){2,})/gm,
87 |
88 | money: /((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b/gm,
89 |
90 | percentages: /(100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%/gm,
91 |
92 | creditCards: /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/gm,
93 |
94 | addresses: /\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|trail|drive|court|parkway|boulevard|circle)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd|cir)\.(?=\b)?)/gim
95 | };
96 |
97 | var regexesNames = Object.keys(CommonRegex.regexes);
98 |
99 | var capitalize = function capitalize(string) {
100 | return string.charAt(0).toUpperCase() + string.slice(1);
101 | };
102 |
103 | // Add a method relative to each one of the regexes
104 | regexesNames.forEach(function(r) {
105 | CommonRegex.prototype['get' + capitalize(r)] = function(text) {
106 | if(text) {
107 | return this.getMatches(text, CommonRegex.regexes[r]);
108 | }
109 |
110 | return this[r];
111 | };
112 | });
113 |
114 | return CommonRegex;
115 | }));
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "commonregex",
3 | "description": "Find a lot of kinds of common information in a string.",
4 | "version": "0.3.1",
5 | "homepage": "https://github.com/talyssonoc/commonregexjs",
6 | "author": {
7 | "name": "talyssonoc",
8 | "email": "talyssonoc@gmail.com"
9 | },
10 | "repository": {
11 | "type": "git",
12 | "url": "git://github.com/talyssonoc/commonregexjs.git"
13 | },
14 | "bugs": {
15 | "url": "https://github.com/talyssonoc/commonregexjs/issues"
16 | },
17 | "license": "MIT",
18 | "main": "lib/commonregex",
19 | "engines": {
20 | "node": ">= 0.10.0"
21 | },
22 | "scripts": {
23 | "test": "mocha",
24 | "lint": "eslint lib/commonregex.js"
25 | },
26 | "dependencies": {},
27 | "devDependencies": {
28 | "chai": "^3.5.0",
29 | "eslint": "^2.10.2",
30 | "mocha": "^2.4.5"
31 | },
32 | "keywords": [
33 | "regex",
34 | "regexp"
35 | ]
36 | }
37 |
--------------------------------------------------------------------------------
/test/CommonRegexJS_test.js:
--------------------------------------------------------------------------------
1 | /*
2 | * CommonRegexJS
3 | * https://github.com/talyssonoc/commonregexjs
4 | *
5 | * Licensed under the MIT license.
6 | */
7 |
8 | 'use strict';
9 |
10 | var expect = require('chai').expect;
11 | var CommonRegex = require('../lib/commonregex');
12 |
13 | describe('CommonRegex', function() {
14 | var commonRegex;
15 |
16 | beforeEach(function() {
17 | var text = 'John, please get that article on www.linkedin.com to me by 5:00PM\n'
18 | + 'on Jan 9th 2012. 4:00 would be ideal, actually. If you have any questions,\n'
19 | + 'you can reach my associate at (012)-345-6789 or associative@mail.com.\n'
20 | + 'I\'ll be in UK during the whole week at a J.R.R. Tolkien convention, starting friday at 7PM.';
21 |
22 | commonRegex = new CommonRegex(text);
23 | });
24 |
25 | describe('#dates', function() {
26 | it('should find dates', function() {
27 | expect(commonRegex.dates).to.eql(['Jan 9th 2012']);
28 | });
29 | });
30 |
31 | describe('#times', function(){
32 | it('should find times', function() {
33 | expect(commonRegex.times).to.eql(['5:00PM', '4:00', '7PM']);
34 | });
35 | });
36 |
37 | describe('#phones', function() {
38 | it('should find phone numbers', function() {
39 | expect(commonRegex.phones).to.eql(['(012)-345-6789']);
40 | });
41 | });
42 |
43 | describe('#links', function() {
44 | it('should find links', function() {
45 | expect(commonRegex.links).to.eql(['www.linkedin.com']);
46 | });
47 | });
48 |
49 | describe('#emails', function() {
50 | it('should find emails', function() {
51 | expect(commonRegex.emails).to.eql(['associative@mail.com']);
52 | });
53 | });
54 |
55 | describe('#IPv4', function() {
56 | it('should find IPv4 addresses', function() {
57 | expect(commonRegex.getIPv4('The IPv4 address for localhost is 127.0.0.1.'))
58 | .to.eql(['127.0.0.1']);
59 | });
60 | });
61 |
62 | describe('#IPv6', function() {
63 | it('should find IPv6 addresses', function() {
64 | expect(commonRegex.getIPv6('The IPv6 address for localhost is 0:0:0:0:0:0:0:1, or alternatively ::1, but not :1:.'))
65 | .to.eql(['0:0:0:0:0:0:0:1', '::1']);
66 | });
67 | });
68 |
69 | describe('#hexColors', function() {
70 | it('should find hex colors codes', function() {
71 | expect(commonRegex.getHexColors('Did you knew that Hacker News orange is #ff6600?')).to.eql(['#ff6600']);
72 | });
73 | });
74 |
75 | describe('#acronyms', function() {
76 | it('should find acronyms', function() {
77 | expect(commonRegex.getAcronyms()).to.eql(['UK', 'J.R.R.']);
78 | });
79 | });
80 |
81 | describe('#money', function() {
82 | it('should find money values', function() {
83 | var text = 'They said the price was US$5,000.90, actually it is US$3,900.5. It\'s $1100.4 less, can you imagine this?';
84 |
85 | var expected = ['US$5,000.90', 'US$3,900.5', '$1100.4'];
86 |
87 | expect(commonRegex.getMoney(text))
88 | .to.eql(expected);
89 | });
90 | });
91 |
92 | describe('#percentages', function() {
93 | it('should find percentages', function() {
94 | expect(commonRegex.getPercentages('I\'m 99.9999999% sure that I\'ll get a raise of 5%.'))
95 | .to.eql(['99.9999999%', '5%']);
96 | });
97 | });
98 |
99 | describe('#creditCards', function() {
100 | it('should find credit cards numbers', function() {
101 | var text = 'His credit card number can be writen as 1234567891011121 or 1234-5678-9101-1121, but not 123-4567891011121.';
102 |
103 | var expected = ['1234567891011121', '1234-5678-9101-1121'];
104 |
105 | expect(commonRegex.getCreditCards(text))
106 | .to.eql(expected);
107 | });
108 | });
109 |
110 | describe('#addresses', function() {
111 | it('should find addresses', function() {
112 | var text = 'checkout the new place at 101 main st., 504 parkwood drive, 3 elm boulevard, 500 elm street, 101 main straight';
113 |
114 | var expected = [
115 | '101 main st.',
116 | '504 parkwood drive',
117 | '3 elm boulevard',
118 | '500 elm street'
119 | ];
120 |
121 | expect(commonRegex.getAddresses(text)).to.eql(expected);
122 | });
123 | });
124 | });
125 |
--------------------------------------------------------------------------------