├── Makefile
├── LICENSE
├── ChangeLog
├── README
├── grepcidr.1
├── COPYING
└── grepcidr.c


/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for grepcidr 3.x
 3 | #
 4 | 
 5 | # Set to where you'd like grepcidr installed
 6 | INSTALL=install
 7 | INSTALLDIR=/usr/local
 8 | INSTALLDIR_BIN=${INSTALLDIR}/bin
 9 | INSTALLDIR_MAN=${INSTALLDIR}/man/man1
10 | #INSTALLDIR_MAN=${INSTALLDIR}/share/man/man1
11 | 
12 | # Set to your favorite C compiler and flags
13 | # with GCC, -O3 makes a lot of difference
14 | # -DDEBUG=1 prints out hex versions of IPs and matches
15 | 
16 | CFLAGS=-O3 -Wall -pedantic
17 | #CFLAGS=-g -Wall -pedantic -DDEBUG=1
18 | TFILES=COPYING LICENSE ChangeLog Makefile README grepcidr.1 grepcidr.c
19 | DIR := $(shell basename ${PWD})
20 | 
21 | # End of settable values
22 | 
23 | all:	grepcidr
24 | 
25 | grepcidr:	grepcidr.c
26 | 	$(CC) $(CFLAGS) $(RPM_OPT_FLAGS) $(RPM_LD_FLAGS) -o grepcidr grepcidr.c
27 | 
28 | install:	all  grepcidr.1
29 | 	$(INSTALL) grepcidr $(DESTDIR)$(INSTALLDIR_BIN)
30 | 	$(INSTALL) -m 0644 grepcidr.1 $(DESTDIR)$(INSTALLDIR_MAN)
31 | 
32 | clean:
33 | 	rm -f grepcidr
34 | 
35 | tar:
36 | 	cd ..; tar cvjf ${DIR}.tjz $(patsubst %,${DIR}/%,${TFILES})
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2020, Standcore LLC
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | Portions of the source code are copyright (C) 2004, 2005  Jem E. Berkes <jberkes@pc-tools.net>
28 | and may be subject to a GNU public license
29 | 


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
 1 | Version 3.0
 2 | ============
 3 | 
 4 | - Fix -h flag
 5 | 
 6 | Version 2.991
 7 | ============
 8 | 
 9 | - Add -D for partial range matching
10 | - Fix minor bugs
11 | - More extensive debugging info incuded with -DDEBUG
12 | 
13 | Version 2.99
14 | ============
15 | 
16 | - Add -C to match CIDR ranges in the input
17 | 
18 | Version 2.98
19 | ===========
20 | 
21 | - Make 0.0.0.0/0 work
22 | - Replace fgets() with getline() so it can handle arbitrary line
23 |   lengths in non-mmap files
24 | - Twiddle state machine so ::::ffff:1.2.3.4 and ::::1 are recognized
25 |   as IPv6
26 | - Match IPv6 after 8 groups so 1:2:3:4:5:6:7:8:junk works
27 | - Fix bug that allowed oversize values in first v6 group
28 | - Peek after last colon in low group so 1::2:junk works
29 | 
30 | Version 2.97
31 | ===========
32 | 
33 | - Quick flag doesn't recognize v4 with dots before or after, to limit
34 |   false matches in rDNS names
35 | 
36 | Version 2.96
37 | ===========
38 | 
39 | - New -s flag to stop comlaints about sloppy (misaligned CIDRs)
40 | - CIDRs now always properly bit aligned, even if the base address wasn't
41 | - Minor bug fixes
42 | 
43 | Version 2.95
44 | ===========
45 | 
46 | - Rewritten to use a faster state machine recognizer and mapped files,
47 |   about 3 times faster overall, no speed penalty for matching through
48 |   the whole line
49 | - Handles both IPv4 and IPv6 patterns
50 | - Better error checking
51 | - Handle multiple input files
52 | 
53 | Version 1.3
54 | ===========
55 | - Much faster than past versions due to binary search of patterns
56 | - Decreased memory usage
57 | - Applied search improvements suggested by Dick Wesseling <ftu@fi.uu.nl>
58 | - Now supports IP ranges as well as CIDR format
59 | - Improved usage to be more grep-like (e.g. filename on command line)
60 | - Now uses grep-like exit code (0=ok, 1=no match, 2=error)
61 | 
62 | 
63 | Version 1.2
64 | ===========
65 | - Improved algorithm for faster processing with large number of patterns
66 |   (approx. 50 times as fast on test set, with specs from diverse regions)
67 | 
68 | 
69 | version 1.1
70 | ===========
71 | - New algorithm has increased speed by about 2.4 times!
72 | - Added -c (count) option, like grep
73 | - Reduced memory usage in case loading large files (-f)
74 | - Added simple Makefile
75 | 
76 | 
77 | version 1.0
78 | ===========
79 | - First public release
80 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | grepcidr 3.0 - Filter IP addresses matching IPv4 and IPv6 CIDR specification
 2 | Parts Copyright (C) 2004-2005  Jem E. Berkes <jberkes@pc-tools.net>
 3 | 	http://www.pc-tools.net/unix/grepcidr/
 4 | 
 5 | Revised by John Levine <johnl@standcore.com> Dec 2013-Dec 2020
 6 | 
 7 | PURPOSE
 8 | -------
 9 | grepcidr can be used to filter a list of IP addresses against one or more
10 | Classless Inter-Domain Routing (CIDR) specifications, or arbitrary networks
11 | specified by an address range. As with grep, there are options to invert
12 | matching and load patterns from a file. grepcidr is capable of comparing
13 | thousands or even millions of IPs to networks with little memory usage and
14 | in reasonable computation time.
15 | 
16 | grepcidr has endless uses in network software, including: mail filtering and
17 | processing, network security, log analysis, and many custom applications.
18 | 
19 | COMPILING & INSTALLING
20 | ----------------------
21 | Edit Makefile to customize the build. Then,
22 | make
23 | make install
24 | 
25 | 
26 | COMMAND USAGE
27 | -------------
28 | Usage:
29 |         grepcidr [-V] [-cCDvhais] PATTERN [FILE ...]
30 |         grepcidr [-V] [-cCDvhais] [-e PATTERN | -f FILE] [FILE ...]
31 | 
32 | -V	Show software version
33 | -a	Anchor matches to beginning of line, otherwise match anywhere
34 | -c	Display count of the lines that would have been shown, instead of showing them
35 | -C	Parse CIDR ranges in input and only match if a search term encompasses the entire range.
36 | -D	Parse CIDR ranges in input and match if a search term matches any part of the range.
37 | -v	Invert the sense of matching, to select non-matching IP addresses
38 | -e	Specify pattern(s) on command-line
39 | -f	Obtain CIDR and range pattern(s) from file
40 | -i	Ignore patterns that are not valid CIDRs or ranges
41 | -h	Do not print filenames when matching multiple files
42 | 
43 | PATTERN specified on the command line may contain multiple patterns
44 | separated by whitespace or commas. For long lists of network patterns,
45 | specify a -f FILE to load where each line contains one pattern. Comment
46 | lines starting with # are ignored, as are comments following white space
47 | after a pattern.  Use -i to ignore invalid pattern lines.
48 | 
49 | Each pattern, whether on the command line or inside a file, may be:
50 | CIDR format	a.b.c.d/xx or aa:bb::cc:dd/xx
51 | IP range	a.b.c.d-e.f.g.h
52 | Single IP	a.b.c.d or aa:bb:cc:dd
53 | 
54 | IPv6 addresses can be written in any format including embedded IPv4.
55 | The zero address :: is accepted as a pattern but does not match in
56 | files.  (Use regular grep if that's what you're looking for.)  It does
57 | not accept IPv6 ranges, since few people use them.
58 | 
59 | Grepcidr uses a state machine to look for IP addresses in the input,
60 | and a binary search to match addresses against patterns.  Its speed is
61 | roughly O(N) in the size of the input, and O(log N) in the number of
62 | patterns.  A prepass over the patterns merges adjacent and overlapping
63 | patterns so there is negligible speed penalty for matching, e.g.
64 | 1.2.2.0/24 and 1.2.3.0/24 rather than 1.2.2.0/23.
65 | 
66 | Input files are mapped into memory if possible, so the state machine
67 | can make one pass over the whole file.  If mapping fails, it reads the
68 | input a line at a time.
69 | 
70 | EXAMPLES
71 | --------
72 | 
73 | grepcidr -f ournetworks blocklist > abuse.log
74 | 	Find our customers that show up in blocklists
75 | 
76 | grepcidr 127.0.0.0/8,::1 iplog
77 | 	Searches for any localnet IP addresses inside the iplog file
78 | 
79 | grepcidr "192.168.0.1-192.168.10.13" iplog
80 | 	Searches for IPs matching indicated range in the iplog file
81 | 
82 | script | grepcidr -ivf whitelist > blocklist
83 | 	Create a blocklist, with whitelisted networks removed (inverse)
84 | 
85 | grepcidr -if list1 list2
86 | 	Cross-reference two lists, outputs IPs common to both lists
87 | 


--------------------------------------------------------------------------------
/grepcidr.1:
--------------------------------------------------------------------------------
  1 | .TH "GREPCIDR" "1" 
  2 | .SH "NAME" 
  3 | grepcidr \(em Filter IP addresses matching IPv4 and IPv6 address specifications
  4 | .SH "SYNOPSIS" 
  5 | .PP 
  6 | \fBgrepcidr\fR [\fB-V\fP]  [\fB-cCDvahisq\fP]  \fIPATTERN\fP [\fIFILE ...\fP]  
  7 | .PP 
  8 | \fBgrepcidr\fR [\fB-V\fP]  [\fB-cCDvahisq\fP] [\fB-e \fIPATTERN\fR\fP | \fB-f \fIFILE\fP]  [\fIFILE ...\fP]
  9 | .SH "DESCRIPTION" 
 10 | .PP 
 11 | \fBgrepcidr\fR can be used to filter a list of IP addresses and ranges against one or more 
 12 | IPv4 and IPv6 address ranges.  As with grep, there are options to invert 
 13 | matching and load patterns from a file.  grepcidr is designed to scale well,
 14 | and handle large numbers of patterns and large input files efficiently.
 15 | This version uses a completely rewritten parser that is typically two or three times
 16 | faster than previous versions.
 17 | .\" 
 18 | .SH "OPTIONS" 
 19 | .IP "\fB-V\fP" 10 
 20 | Show software version 
 21 | .IP "\fB-c\fP" 10 
 22 | Display count of the lines that otherwise would have been printed
 23 | .IP "\fB-v\fP" 10 
 24 | Invert the sense of matching, to select lines with IPs that don't match any pattern
 25 | .IP "\fB-a\fP" 10 
 26 | (anchor) Only match addresses that occur at the beginning of a line
 27 | .IP "\fB-e\fP" 10 
 28 | Specify pattern(s) as an argument
 29 | .IP "\fB-f\fP" 10 
 30 | Obtain pattern(s) from a file 
 31 | .IP "\fB-h\fP" 10 
 32 | Do not print file names with matched lines
 33 | .IP "\fB-i\fP" 10 
 34 | Ignore bad patterns
 35 | .IP "\fB-s\fP" 10 
 36 | (Sloppy) Don't complain about misaligned CIDR ranges.
 37 | .IP "\fB-C\fP" 10 
 38 | Parse CIDR ranges in input and match if a search term covers the entire range.
 39 | .IP "\fB-D\fP" 10 
 40 | Parse CIDR ranges in input and match if a search term covers any of the range.
 41 | .IP "\fB-q\fP" 10 
 42 | (Quick) Ignore IPv4 addresses that are followed by a dot.
 43 | .SH "USAGE NOTES" 
 44 | .PP 
 45 | PATTERN specified on the command line may contain multiple patterns 
 46 | separated by white space or commas. For long lists of network patterns, 
 47 | use \fB-f\fP to load a file where each line contains one pattern. Comment 
 48 | lines starting with # are ignored.
 49 | In a file, each pattern can be followed by a space and comments.
 50 | .PP 
 51 | Each pattern, whether on the command line or inside a file, may be: 
 52 | .PP 
 53 | .nf 
 54 | a.b.c.d/xx        (IPv4 CIDR range) 
 55 | a.b.c.d-e.f.g.h   (IPv4 range) 
 56 | a.b.c.d           (Single IPv4 address)
 57 | ab:cd::ef	  (Single IPv6 address)
 58 | ab:cd::ef/xx	  (IPv6 CIDR range)
 59 | ab::cd:a.b.c.d	  (IPv4 address embedded in IPv6)
 60 | .fi 
 61 | .PP
 62 | Invalid patterns are ignored with the \fB-i\fP flag, which can be useful for
 63 | using files of IPs or CIDRs as patterns that also contain other material.
 64 | CIDRs are always properly aligned even if the base address wasn't, e.g.,
 65 | 12.34.56.78/24 is treated as 12.34.56.0/24,
 66 | and 1234:5678::abcd/64 is treated as 1234:5678::0/64.
 67 | Complaints about misaligned CIDRs can be suppressed with \fB-s\fP.
 68 | .SH COMPATIBILITY
 69 | .PP 
 70 | This version of \fBgrepcidr\fR normally searches for IP addresses anywhere 
 71 | on the input line.
 72 | It uses a reasonable but ad-hoc parser to look for the beginning of an address.
 73 | This can cause unexpected results in some cases, e.g. ab:c12.34.56.78 will not
 74 | match as an IPv4 address because ab:c12 looks like the beginning of an IPv6
 75 | address.
 76 | Addresses written as IPv6 embedded IPv4 addresses will match either the IPv4
 77 | or IPv6 address pattern, e.g. ::ffff:12.34.56.78 will match both ::ffff:0/96 and 12.34.0.0/16.
 78 | .PP
 79 | Even though :: is a valid way to write a zero IPv6 address, grepcidr won't match it.
 80 | (If that's what you want, use regular grep.)
 81 | It will match 0:: or ::0.0.0.0 or 0::0 or or any other form.
 82 | .PP
 83 | Normally grepcidr will match every string that looks like
 84 | an IPv4 address, so in 1.2.3.4.5.6.7.8 it will match
 85 | both 1.2.3.4 and 5.6.7.8.
 86 | The \fB-q\fP option ignores addresses preceded or followed by a dot,
 87 | which avoids false matches in some contexts.
 88 | .PP 
 89 | Use the \fB-a\fP option to look for addresses only at the
 90 | start of the line, optionally preceded by white space.
 91 | This type of search is stricter, but not significantly faster.
 92 | .PP 
 93 | Note that \fB-v\fP outputs only lines that do contain IP addresses, but do not match
 94 | any of the patterns.
 95 | .PP
 96 | If there is more than one file named on the command line, each matched line
 97 | is preceded by the file name unless the \fR-h\fP flag is set.
 98 | .SH "EXAMPLES" 
 99 | .PP 
100 | \fI\fBgrepcidr\fR \-f ournetworks blocklist > abuse.log\fP 
101 | .PP 
102 | Find customers (CIDR ranges in file) that appear in blocklist 
103 | .PP 
104 | \fI\fBgrepcidr\fR 127.0.0.0/8,::1 iplog\fP 
105 | .PP 
106 | Searches for any localnet IP addresses inside the iplog file 
107 | .PP 
108 | \fI\fBgrepcidr\fR "192.168.0.1-192.168.10.13" iplog\fP
109 | .br
110 | \fI\fBgrepcidr\fR "2001:3def:1234::/48" iplog\fP 
111 | .PP 
112 | Searches for IPs matching indicated range in the iplog file 
113 | .PP 
114 | \fI\fBscript\fR | \fBgrepcidr\fR \-vf whitelist > blocklist\fP 
115 | .PP 
116 | Create a blocklist, with whitelisted networks removed (inverse) 
117 | .PP 
118 | \fI\fBgrepcidr\fR \-f list1 list2\fP 
119 | .PP 
120 | Cross-reference two lists, outputs IPs common to both lists 
121 | .SH "EXIT STATUS" 
122 | .PP 
123 | As with grep: the exit status is 0 if matching IPs are found, and 1 
124 | if not found.  If an error occurred the exit status is 2.  
125 | .SH "AUTHOR" 
126 | .PP 
127 | This software is based on software written by Jem Berkes <jem@berkes.ca>,
128 | extensively rewritten by John Levine <johnl@taugh.com>. 
129 | Permission is granted to copy, distribute and/or modify this document under 
130 | the terms of the GNU General Public License, Version 2 or any later version 
131 | published by the Free Software Foundation. 
132 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 | 		    GNU GENERAL PUBLIC LICENSE
  2 | 		       Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |                        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 | 			    Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 | 		    GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 | 			    NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 | 		     END OF TERMS AND CONDITIONS
281 | 
282 | 


--------------------------------------------------------------------------------
/grepcidr.c:
--------------------------------------------------------------------------------
   1 | /*
   2 | 
   3 |   grepcidr 3.0 - Filter IP addresses matching IPv4 and IPv6 CIDR specification
   4 |   Parts copyright (C) 2004, 2005  Jem E. Berkes <jberkes@pc-tools.net>
   5 |   	www.sysdesign.ca
   6 |   Somewhat rewritten by John Levine <johnl@standcore.com>
   7 | */
   8 | 
   9 | #define _WITH_GETLINE /* hint for FreeBSD */
  10 | #include <stdio.h>
  11 | #include <stdlib.h>
  12 | #include <string.h>
  13 | #include <getopt.h>
  14 | #include <ctype.h>
  15 | #include <sys/types.h>
  16 | #include <sys/stat.h>
  17 | #include <sys/mman.h>
  18 | #include <assert.h>
  19 | 
  20 | #define EXIT_OK		0
  21 | #define EXIT_NOMATCH	1
  22 | #define EXIT_ERROR	2
  23 | 
  24 | #define TXT_VERSION	"grepcidr 3.02\nParts copyright (C) 2004, 2005  Jem E. Berkes <jberkes@pc-tools.net>\n"
  25 | #define TXT_USAGE	"Usage:\n" \
  26 | 			"\tgrepcidr [-V] [-cCDvhais] PATTERN [FILE...]\n" \
  27 | 			"\tgrepcidr [-V] [-cCDvhais] [-e PATTERN | -f FILE] [FILE...]\n"
  28 | #define MAXFIELD	512
  29 | #define TOKEN_SEPS	"\t,\r\n"	/* so user can specify multiple patterns on command line */
  30 | #define INIT_NETWORKS	8192
  31 | 
  32 | /*
  33 | 	Specifies a network. Whether originally in CIDR format (IP/mask)
  34 | 	or a range of IPs (IP_start-IP_end), spec is converted to a range.
  35 | 	The range is min to max (32-bit or 128 bit IPs) inclusive.
  36 | */
  37 | struct netspec
  38 | {
  39 | 	unsigned int min;
  40 | 	unsigned int max;
  41 | };
  42 | 
  43 | typedef struct v6addr { unsigned char a[16]; } v6addr;
  44 | 
  45 | /* redefine this if your memcmp is slow, but it probably isn't */
  46 | #define v6cmp(a1, a2) memcmp((a1).a,(a2).a,16)
  47 | 
  48 | struct netspec6
  49 | {
  50 | 	v6addr min;
  51 | 	v6addr max;
  52 | };
  53 | 
  54 | /* Global variables */
  55 | static unsigned int npatterns = 0;		/* total patterns in array */
  56 | static unsigned int n6patterns = 0;		/* total patterns in v6 array */
  57 | static unsigned int capacity = 0;		/* current capacity of array */
  58 | static unsigned int capacity6 = 0;		/* current capacity of v6 array */
  59 | static struct netspec* array = NULL;		/* array of patterns, network specs */
  60 | static struct netspec6* array6 = NULL;		/* array of patterns, v6 network specs */
  61 | static unsigned int counting = 0;		/* when non-zero, counts matches */
  62 | static int invert = 0;				/* flag for inverted mode */
  63 | static int anchor = 0;				/* anchor matches at beginning of line */
  64 | static int nonames = 0;				/* don't show filenames */
  65 | static int nmatch = 0;				/* count of matches for exit code */
  66 | static int igbadpat = 0;			/* ignore bad patterns */
  67 | static int sloppy = 0;				/* don't complain about sloppy CIDR */
  68 | static int cidrsearch = 0;			/* parse and match CIDR in haystack */
  69 | static int didrsearch = 0;			/* match CIDR if overlaps with haystack */
  70 | static int quick = 0;				/* quick match, ignore v4 with dots before or after */
  71 | 
  72 | static void scan_block(char *bp, size_t blen, const char *fn);
  73 | static void scan_read(FILE *f, const char *fn);
  74 | static int applymask6(const v6addr ahi, int size, struct netspec6 *spec);
  75 | 
  76 | /* for getline */
  77 | char *linep = NULL;
  78 | size_t linesize;
  79 | 
  80 | /*
  81 | 	Insert new spec inside array of network spec
  82 | 	Dynamically grow array buffer as needed
  83 | */
  84 | void array_insert(struct netspec* newspec)
  85 | {
  86 | 	/* Initial array allocation */
  87 | 	if(!array) {
  88 | 		capacity = INIT_NETWORKS;
  89 | 		array = (struct netspec*) malloc(capacity*sizeof(struct netspec));
  90 | 		if(!array) {
  91 | 			perror("Out of memory");
  92 | 			exit(EXIT_ERROR);
  93 | 		}
  94 | 	}
  95 | 	if (npatterns == capacity)
  96 | 	{
  97 | 		capacity *= 2;
  98 | 		array = (struct netspec *)realloc(array, capacity*sizeof(struct netspec));
  99 | 		if(!array) {
 100 | 			perror("Out of memory");
 101 | 			exit(EXIT_ERROR);
 102 | 		}
 103 | 	}
 104 | 	array[npatterns++] = *newspec;
 105 | }
 106 | 
 107 | void array_insert6(struct netspec6* newspec)
 108 | {
 109 | 	/* Initial array allocation */
 110 | 	if(!array6) {
 111 | 		capacity6 = INIT_NETWORKS;
 112 | 		array6 = (struct netspec6*) malloc(capacity6*sizeof(struct netspec6));
 113 | 		if(!array6) {
 114 | 			perror("Out of memory");
 115 | 			exit(EXIT_ERROR);
 116 | 		}
 117 | 	}
 118 | 	if (n6patterns == capacity6)
 119 | 	{
 120 | 		capacity6 *= 2;
 121 | 		array6 = (struct netspec6 *)realloc(array6, capacity6*sizeof(struct netspec6));
 122 | 		if(!array6) {
 123 | 			perror("Out of memory");
 124 | 			exit(EXIT_ERROR);
 125 | 		}
 126 | 	}
 127 | 	array6[n6patterns++] = *newspec;
 128 | }
 129 | 
 130 | /*
 131 | 	Given string, fills in the struct netspec (must be allocated)
 132 | 	Accept CIDR IP/mask format or IP_start-IP_end range.
 133 | 	Returns true (nonzero) on success, false (zero) on failure.
 134 | */
 135 | int net_parse(const char* line, struct netspec* spec)
 136 | {
 137 | 	unsigned int minip = 0, maxip = 0;
 138 | 	unsigned int octet = 0;
 139 | 	unsigned int size = 0;	/* if using CIDR IP/mask format */
 140 | 	unsigned int mask;
 141 | 	char *p;
 142 | 	enum iscan {
 143 | 		I_BEG = 0,	/* beginning of line */
 144 | 		I_IP1,		/* first octet*/
 145 | 		I_IP1D,		/* dot after first octet */
 146 | 		I_IP2,		/* second octet */
 147 | 		I_IP2D,		/* dot after second octet */
 148 | 		I_IP3,		/* third octet */
 149 | 		I_IP3D,		/* dot after third octet */
 150 | 		I_IP4,		/* fourth octet */
 151 | 		I_MIP1,		/* first octet of max IP */
 152 | 		I_MIP1D,	/* dot after first octet */
 153 | 		I_MIP2,		/* second octet */
 154 | 		I_MIP2D,	/* dot after second octet */
 155 | 		I_MIP3,		/* third octet */
 156 | 		I_MIP3D,	/* dot after third octet */
 157 | 		I_MIP4,		/* fourth octet */
 158 | 		I_PIP,		/* post first IP */
 159 | 		I_MASK,		/* scanning a mask */
 160 | 		I_PD		/* post dash */
 161 | 
 162 | 	} state;
 163 | 	state = I_BEG;
 164 | 	for(p = (char *)line;;) {
 165 | 		int ch = *p++;
 166 | 
 167 | 		switch(state) {
 168 | 			case I_BEG:
 169 | 				if(isspace(ch))
 170 | 					continue;
 171 | 				if(isdigit(ch)) {	/* start a potential IP */
 172 | 					octet = ch-'0';
 173 | 					state = I_IP1;
 174 | 					continue;
 175 | 				}
 176 | 				break;
 177 | 
 178 | 			case I_IP1:	/* in an IP address */
 179 | 			case I_IP2:
 180 | 			case I_IP3:
 181 | 
 182 | 			case I_MIP1:	/* in a second IP address */
 183 | 			case I_MIP2:
 184 | 			case I_MIP3:
 185 | 				if(isdigit(ch)) {
 186 | 					octet = octet*10 + ch-'0';
 187 | 					continue;
 188 | 				}
 189 | 				if(ch == '.') {
 190 | 					if(octet > 255) { /* not a real address */
 191 | 						return 0;
 192 | 					}
 193 | 					maxip <<= 8;
 194 | 					maxip += octet;
 195 | 					state++;	/* corresponding dot state */
 196 | 					continue;
 197 | 				}
 198 | 				/* otherwise, wasn't a full IP */
 199 | 				return 0;
 200 | 
 201 | 			case I_IP1D:	/* saw dot after an octet */
 202 | 			case I_IP2D:
 203 | 			case I_IP3D:
 204 | 
 205 | 			case I_MIP1D:	/* saw dot after an octet */
 206 | 			case I_MIP2D:
 207 | 			case I_MIP3D:
 208 | 				if(isdigit(ch)) {
 209 | 					octet = ch-'0';
 210 | 					state++;	/* next octet state */
 211 | 					continue;
 212 | 				}
 213 | 				return 0;	/* wasn't an IP */
 214 | 
 215 | 			case I_IP4:	/* in last octet */
 216 | 				if(isdigit(ch)) {
 217 | 					octet = octet*10 + ch-'0';
 218 | 					continue;
 219 | 				}
 220 | 
 221 | 				/* OK, we have the IP */
 222 | 				if(octet > 255) { /* not a real address */
 223 | 					return 0;
 224 | 				}
 225 | 				maxip <<= 8;
 226 | 				maxip += octet;
 227 | 				minip = maxip;	/* until we see otherwise */
 228 | 				if(!ch) break;	/* end of string */
 229 | 				if(ch == '/')
 230 | 					state = I_MASK;
 231 | 				else if(ch == '-')
 232 | 					state = I_PD;
 233 | 				else
 234 | 					state = I_PIP;
 235 | 				continue;
 236 | 
 237 | 			case I_MIP4:	/* in last octet of range max*/
 238 | 				if(isdigit(ch)) {
 239 | 					octet = octet*10 + ch-'0';
 240 | 					continue;
 241 | 				}
 242 | 
 243 | 				/* OK, we have the IP */
 244 | 				if(octet > 255) { /* not a real address */
 245 | 					return 0;
 246 | 				}
 247 | 				maxip <<= 8;
 248 | 				maxip += octet;
 249 | 				if(ch && !isspace(ch))
 250 | 					return 0;	/* junk at end */
 251 | 				break;
 252 | 
 253 | 			case I_PIP:
 254 | 				if(ch == '/')
 255 | 					state = I_MASK;
 256 | 				else if(ch == '-')
 257 | 					state = I_PD;
 258 | 				else if(!ch)
 259 | 					break;	/* single IP with spaces after it */
 260 | 				else if(!isspace(ch))
 261 | 					return 0;	/* junk */
 262 | 				continue;
 263 | 			case I_PD:
 264 | 				if(isspace(ch))
 265 | 					continue;
 266 | 				if(!isdigit(ch))
 267 | 					return 0;	/* junk */
 268 | 				octet = ch-'0';
 269 | 				state = I_MIP1;
 270 | 				continue;
 271 | 					
 272 | 			case I_MASK:	/* CIDR mask size */
 273 | 				if(isdigit(ch)) {
 274 | 					size = size*10 + ch-'0';
 275 | 					continue;
 276 | 				}
 277 | 				if(ch && !isspace(ch))
 278 | 					return 0;	/* junk at end */
 279 | 				if(size > 32)
 280 | 					return 0;	/* not a reasonable cidr */
 281 | 				mask = (1L<<(32-size))-1;
 282 | 				if(maxip&mask && !sloppy)
 283 | 					fprintf(stderr, "Invalid cidr: %s\n", line);
 284 | 				minip &= ~mask;	/* force to CIDR boundary */
 285 | 				maxip |= mask;
 286 | 				
 287 | 				break;
 288 | 		}
 289 | 		if(ch && !isspace(ch)) return 0;	/* crud at end of address */
 290 | 		break;
 291 | 	}
 292 | 	/* got something, return it */
 293 | 	spec->min = minip;
 294 | 	spec->max = maxip;
 295 | #if DEBUG
 296 | 	if(getenv("RANGES"))printf("range %08x - %08x\n", minip, maxip);
 297 | #endif /* DEBUG */
 298 | 	if(minip > maxip)
 299 | 		fprintf(stderr, "Backward range: %s\n", line);
 300 | 	return 1;
 301 | }
 302 | 
 303 | /*
 304 |  * parse IPv6 address or CIDR
 305 |  * no ranges, since they don't seem popular
 306 |  * This should handle the full syntax in RFC 4291 sec 2.2 and 2.3 
 307 |  */
 308 | /* turn a hex digit to a value, has to be a hex digit */
 309 | #define xtod(c) ((c<='9')?(c-'0'):((c&15)+9))
 310 | 
 311 | int net_parse6(const char* line, struct netspec6* spec)
 312 | {
 313 | 	v6addr ahi;	/* high part of address */
 314 | 	v6addr alo;	/* low part of address */
 315 | 	int nhi = 0;	/* how many bytes in ahi */
 316 | 	int nlo = 0;	/* how many bytes in alo */
 317 | 	int octet = -1;	/* current v4 octet, -1 means not an octet */
 318 | 	unsigned int chunk = 0;	/* current 16 bit chunk */
 319 | 	int size = -1;
 320 | 	enum sv6 {
 321 | 		V_BEG = 0,	/* beginning of string */
 322 | 		V_HCH,		/* in a hi chunk */
 323 | 		V_HC1,		/* hi, seen one colon */
 324 | 		V_HC2,		/* hi, seen two colons */
 325 | 		V_LCH,		/* in a low chunk */
 326 | 		V_LC1,		/* seen a low colon */
 327 | 		V_IC1,		/* seen initial colon */
 328 | 		V_EIP1D,	/* dot after first octet of embedded IPv4 */
 329 | 		V_EIP2,		/* second octet */
 330 | 		V_EIP2D,	/* dot after second octet */
 331 | 		V_EIP3,		/* third octet */
 332 | 		V_EIP3D,	/* dot after third octet */
 333 | 		V_EIP4,		/* fourth octet */
 334 | 		V_SIZE		/* CIDR size */
 335 | 	} state;
 336 | 	char *p = (char *)line;
 337 | 	
 338 | 	state = 0;
 339 | 
 340 | 	for(;;) {
 341 | 		int ch = *p++;
 342 | 		
 343 | 		switch(state) {
 344 | 			case V_BEG:
 345 | 				if(isspace(ch)) continue;
 346 | 				if(isxdigit(ch)) {	/* first chunk can't be v4 */
 347 | 					chunk = xtod(ch);
 348 | 					state = V_HCH;
 349 | 					continue;
 350 | 				}
 351 | 				if(ch == ':') {
 352 | 					state = V_IC1;
 353 | 					continue;
 354 | 				}
 355 | 				return 0;	/* not an IP */
 356 | 
 357 | 			case V_IC1:		/* leading colon must be two colons */
 358 | 				if(ch == ':') {
 359 | 					state = V_HC2;
 360 | 					continue;
 361 | 				}
 362 | 				return 0;	/* not an IP */
 363 | 
 364 | 			case V_HCH:
 365 | 				if(isxdigit(ch)) {
 366 | 					chunk = (chunk<<4)+xtod(ch);
 367 | 					if(isdigit(ch)) {
 368 | 						if(octet >= 0) octet = octet*10 + ch-'1';
 369 | 					} else
 370 | 						octet = -1; /* not v4 */
 371 | 					continue;
 372 | 				}
 373 | 				/* finish the current chunk */
 374 | 
 375 | 				if(ch == '.') {
 376 | 					if(nhi == 12 && octet >= 0 && octet <= 255) { /* embedded v4 */
 377 | 						ahi.a[nhi++] = octet;
 378 | 						state = V_EIP1D;
 379 | 						continue;
 380 | 					}
 381 | 					return 0;	/* not an IP */
 382 | 				}
 383 | 
 384 | 				if(nhi > 14) return 0;	/* too many chunks */
 385 | 				ahi.a[nhi++] = chunk >> 8;	/* big-endian for memcmp() */
 386 | 				ahi.a[nhi++] = chunk & 255;
 387 | 				if(ch == ':') {
 388 | 					state = V_HC1;
 389 | 					continue;
 390 | 				}
 391 | 				if(ch == '/') {
 392 | 					state = V_SIZE;
 393 | 					continue;
 394 | 				}
 395 | 				break;	/* end of the number */
 396 | 
 397 | 			case V_HC1:
 398 | 				if(isxdigit(ch)) {
 399 | 					chunk = xtod(ch);
 400 | 					if(isdigit(ch))
 401 | 						octet = chunk;
 402 | 					else
 403 | 						octet = -1;
 404 | 					state = V_HCH;
 405 | 					continue;
 406 | 				}
 407 | 				if(ch == ':') {
 408 | 					state = V_HC2;
 409 | 					continue;
 410 | 				}
 411 | 				return 0;	/* not an IP */
 412 | 				
 413 | 			case V_HC2:
 414 | 				if(isxdigit(ch)) {	/* two colons and digit, start low half */
 415 | 					chunk = xtod(ch);
 416 | 					if(isdigit(ch))
 417 | 						octet = chunk;
 418 | 					else
 419 | 						octet = -1;
 420 | 					state = V_LCH;
 421 | 					continue;
 422 | 				}
 423 | 				if(ch == '/') {
 424 | 					state = V_SIZE;
 425 | 					continue;
 426 | 				}
 427 | 				break;	/* end of only high half */
 428 | 
 429 | 			case V_LCH:
 430 | 				if(isxdigit(ch)) {
 431 | 					chunk = (chunk<<4)+xtod(ch);
 432 | 					if(isdigit(ch)) {
 433 | 						if(octet >= 0) octet = octet*10 + ch-'0';
 434 | 					} else
 435 | 						octet = -1; /* not v4 */
 436 | 					continue;
 437 | 				}
 438 | 				/* finish the current chunk */
 439 | 				if(ch == '.') {
 440 | 					if((nhi+nlo) < 12
 441 | 					   && octet >= 0 && octet <= 255) { /* embedded v4 */
 442 | 						/* move all into ahi */
 443 | 						memset(ahi.a+nhi, 0, 12-(nhi+nlo));
 444 | 						if(nlo) {
 445 | 							memcpy(ahi.a+12-nlo, alo.a, nlo);
 446 | 							nlo = 0;
 447 | 						}
 448 | 						nhi = 12;
 449 | 						ahi.a[nhi++] = octet;
 450 | 						state = V_EIP1D;
 451 | 						continue;
 452 | 					}
 453 | 					return 0;	/* not an embedded v4 */
 454 | 				}
 455 | 
 456 | 				if((nhi+nlo) > 12) return 0;	/* too many chunks */
 457 | 				if(chunk > 0xffff) return 0;	/* too big for a chunk */
 458 | 				alo.a[nlo++] = chunk >> 8;	/* big-endian for memcmp() */
 459 | 				alo.a[nlo++] = chunk & 255;
 460 | 				if(ch == ':') {
 461 | 					state = V_LC1;
 462 | 					continue;
 463 | 				}
 464 | 				if(ch == '/') {
 465 | 					state = V_SIZE;
 466 | 					continue;
 467 | 				}
 468 | 				break;	/* end of the number */
 469 | 				
 470 | 			case V_LC1:
 471 | 				if(isxdigit(ch)) {
 472 | 					chunk = xtod(ch);
 473 | 					if(isdigit(ch))
 474 | 						octet = chunk;
 475 | 					else
 476 | 						octet = -1;
 477 | 					state = V_LCH;
 478 | 					continue;
 479 | 				}
 480 | 				return 0;	/* trailing junk, not an IP */
 481 | 
 482 | 			case V_EIP1D:		/* dot after first octet of embedded IPv4 */
 483 | 			case V_EIP2D:		/* dot after second octet */
 484 | 			case V_EIP3D:		/* dot after third octet */
 485 | 				if(isdigit(ch)) {
 486 | 					octet = ch-'0';
 487 | 					state++;
 488 | 					continue;
 489 | 				}
 490 | 				return 0;	/* not an IP */
 491 | 
 492 | 			case V_EIP2:		/* second octet */
 493 | 			case V_EIP3:		/* third octet */
 494 | 				if(isdigit(ch)) {
 495 | 					octet = octet*10 + ch-'0';
 496 | 					continue;
 497 | 				}
 498 | 				if(ch == '.') {
 499 | 					if(octet > 255) return 0;	/* not an IP */
 500 | 					ahi.a[nhi++] = octet;
 501 | 					state++;
 502 | 					continue;
 503 | 				}
 504 | 				return 0;	/* not an IP */
 505 | 
 506 | 			case V_EIP4:		/* fourth octet */
 507 | 				if(isdigit(ch)) {
 508 | 					octet = octet*10 + ch-'0';
 509 | 					continue;
 510 | 				}
 511 | 				if(octet > 255) break;	/* not an IP */
 512 | 				ahi.a[nhi++] = octet;
 513 | 				if(ch == '/') {
 514 | 					state = V_SIZE;
 515 | 					continue;
 516 | 				}
 517 | 				break;	/* four octets, we're done */
 518 | 
 519 | 			case V_SIZE:
 520 | 				if(isdigit(ch)) {
 521 | 					if (size < 0) size = 0;
 522 | 					size = size*10 + ch-'0';
 523 | 					continue;
 524 | 				}
 525 | 				if(size < 0 || size > 128) return 0;	/* no digits or junk at the end */
 526 | 				break;
 527 | 		}
 528 | 		break;
 529 | 		/* accept if \0 or space after an item */
 530 | 		if(ch && !isspace(ch)) return 0;	/* crud in the item */
 531 | 	}
 532 | 
 533 | 	/* combine ahi and alo */
 534 | 	if(nlo && (nhi+nlo) >= 16) return 0;	/* too many chunks */
 535 | 	if((nhi+nlo) < 16) 
 536 | 		memset(ahi.a+nhi, 0, 16-(nhi+nlo));
 537 | 	if(nlo)memcpy(ahi.a+16-nlo, alo.a, nlo);
 538 | 	if (!applymask6(ahi, size, spec) && !sloppy) {
 539 | 		p = strchr(line, '\n');
 540 | 		if(p) *p = 0;	/* just a string */
 541 | 		fprintf(stderr, "Bad cidr range: %s\n", line);
 542 | 	}
 543 | 	return 1;
 544 | }
 545 | 
 546 | /* Return 0 (softfail) if bits were set in host part of CIDR address */
 547 | static int applymask6(const v6addr ahi, int size, struct netspec6 *spec)
 548 | {
 549 | 	int badbits = 0;	/* bits already set, bad CIDR */
 550 | 	assert(size >= 0 && size <= 128);
 551 | 
 552 | 	spec->min = spec->max = ahi;
 553 | 
 554 | 	if(size >= 0) {	/* set low bits for the range */
 555 | 			/* and also check that they were already zero */
 556 | 		int nbits = size&7; /* bits within a byte */
 557 | 		int nbytes = size >> 3;
 558 | 
 559 | 		if(nbits) {
 560 | 			int mask = 255>>nbits;	
 561 | 
 562 | 			if(ahi.a[nbytes]&mask) badbits = 1;
 563 | 			spec->min.a[nbytes] &= 255-mask;
 564 | 			spec->max.a[nbytes] |= mask;
 565 | 			nbytes++;
 566 | 		}
 567 | 		while(nbytes < 16) {
 568 | 			if(ahi.a[nbytes]) badbits = 1;
 569 | 			spec->min.a[nbytes] = 0;
 570 | 			spec->max.a[nbytes] = 255;
 571 | 			nbytes++;
 572 | 		}
 573 | 	}
 574 | 	return !badbits;
 575 | }
 576 | 
 577 | /* Compare two netspecs, for sorting. Comparison is done on minimum of range */
 578 | int netsort(const void* a, const void* b)
 579 | {
 580 | 	unsigned int c1 = ((struct netspec*)a)->min;
 581 | 	unsigned int c2 = ((struct netspec*)b)->min;
 582 | 	if (c1 < c2) return -1;
 583 | 	if (c1 > c2) return +1;
 584 | 
 585 | 	c1 = ((struct netspec*)a)->max;
 586 | 	c2 = ((struct netspec*)b)->max;
 587 | 	if (c1 < c2) return -1;
 588 | 	if (c1 > c2) return +1;
 589 | 	return 0;
 590 | }
 591 | 
 592 | int netsort6(const void* a, const void* b)
 593 | {
 594 | 	int r;
 595 | 	v6addr *c1 = &((struct netspec6*)a)->min;
 596 | 	v6addr *c2 = &((struct netspec6*)b)->min;
 597 | 	r = v6cmp(*c1, *c2);
 598 | 	if(r != 0) return r;
 599 | 
 600 | 	c1 = &((struct netspec6*)a)->max;
 601 | 	c2 = &((struct netspec6*)b)->max;
 602 | 	return v6cmp(*c1, *c2);
 603 | }
 604 | 
 605 | int main(int argc, char* argv[])
 606 | {
 607 | 	static char shortopts[] = "acCDe:f:hiqsvV";
 608 | 	char* pat_filename = NULL;		/* filename containing patterns */
 609 | 	char* pat_strings = NULL;		/* pattern strings on command line */
 610 | 	int foundopt;
 611 | 
 612 | 	if (argc == 1)
 613 | 	{
 614 | 		fprintf(stderr, TXT_USAGE);
 615 | 		return EXIT_ERROR;
 616 | 	}
 617 | 
 618 | 	while ((foundopt = getopt(argc, argv, shortopts)) != -1)
 619 | 	{
 620 | 		switch (foundopt)
 621 | 		{
 622 | 			case 'V':
 623 | 				puts(TXT_VERSION);
 624 | 				return EXIT_ERROR;
 625 | 				
 626 | 			case 'c':
 627 | 				counting = 1;
 628 | 				break;
 629 | 				
 630 | 			case 'v':
 631 | 				invert = 1;
 632 | 				break;
 633 | 				
 634 | 			case 'h':
 635 | 				nonames = 1;
 636 | 				break;
 637 | 
 638 | 			case 'a':
 639 | 				anchor = 1;
 640 | 				break;
 641 | 
 642 | 			case 'i':
 643 | 				igbadpat = 1;
 644 | 				break;
 645 | 
 646 | 			case 'q':
 647 | 				quick = 1;
 648 | 				break;
 649 | 
 650 | 			case 's':
 651 | 				sloppy = 1;
 652 | 				break;
 653 | 
 654 | 			case 'D':
 655 | 				didrsearch = 1;
 656 | 				/* fall through */
 657 | 
 658 | 			case 'C':
 659 | 				cidrsearch = 1;
 660 | 				break;
 661 | 
 662 | 			case 'e':
 663 | 				pat_strings = optarg;
 664 | 				break;
 665 | 
 666 | 			case 'f':
 667 | 				pat_filename = optarg;
 668 | 				break;
 669 | 				
 670 | 			default:
 671 | 				fprintf(stderr, TXT_USAGE);
 672 | 				return EXIT_ERROR;
 673 | 		}
 674 | 	}
 675 | 	if (!pat_filename && !pat_strings)
 676 | 	{
 677 | 		if (optind < argc)
 678 | 			pat_strings = argv[optind++];
 679 | 		else
 680 | 		{
 681 | 			fprintf(stderr, "Specify PATTERN or -f FILE to read patterns from\n");
 682 | 			return EXIT_ERROR;
 683 | 		}
 684 | 	}
 685 | 	
 686 | 	/* Load patterns defining networks */
 687 | 	if (pat_filename)
 688 | 	{
 689 | 		FILE* data = fopen(pat_filename, "r");
 690 | 		if (data)
 691 | 		{
 692 | 			while (getline(&linep, &linesize, data) > 0)
 693 | 			{
 694 | 				if (*linep != '#') {
 695 | 					if(strchr(linep, ':')) {
 696 | 						struct netspec6 spec6;
 697 | 
 698 | 						if(net_parse6(linep, &spec6))
 699 | 							array_insert6(&spec6);
 700 | 						else if(!igbadpat)
 701 | 							fprintf(stderr, "Not a pattern: %s", linep);
 702 | 					} else {
 703 | 						struct netspec spec;
 704 | 
 705 | 						if (net_parse(linep, &spec))
 706 | 							array_insert(&spec);
 707 | 						else if(!igbadpat)
 708 | 							fprintf(stderr, "Not a pattern: %s", linep);
 709 | 					}
 710 | 				}
 711 | 			}
 712 | 			fclose(data);
 713 | 		}
 714 | 		else
 715 | 		{
 716 | 			perror(pat_filename);
 717 | 			return EXIT_ERROR;
 718 | 		}
 719 | 	}
 720 | 	if (pat_strings)
 721 | 	{
 722 | 		char* token = strtok(pat_strings, TOKEN_SEPS);
 723 | 		while (token)
 724 | 		{
 725 | 			if(strchr(token, ':')) {
 726 | 				struct netspec6 spec6;
 727 | 
 728 | 				if(net_parse6(token, &spec6))
 729 | 					array_insert6(&spec6);
 730 | 				else if(!igbadpat)
 731 | 					fprintf(stderr, "Not a pattern: %s\n", token);
 732 | 			} else {
 733 | 				struct netspec spec;
 734 | 
 735 | 				if (net_parse(token, &spec))
 736 | 					array_insert(&spec);
 737 | 				else if(!igbadpat)
 738 | 					fprintf(stderr, "Not a pattern: %s\n", token);
 739 | 			}
 740 | 			token = strtok(NULL, TOKEN_SEPS);
 741 | 		}
 742 | 	}
 743 | 	
 744 | 	if(!npatterns && !n6patterns) {
 745 | 		fprintf(stderr, "No patterns to match\n");
 746 | 		return EXIT_ERROR;
 747 | 	}
 748 | 
 749 | 	/* Prepare array for rapid searching */
 750 | 	if(npatterns) {
 751 | 		struct netspec *inp, *outp;
 752 | #if DEBUG
 753 | 		char *dnp;
 754 | 		if((dnp = getenv("PRESORT4")) != 0) {
 755 | 			FILE *f = fopen(dnp, "w");
 756 | 			struct netspec *p;
 757 | 			for(p = array; p < array+npatterns; p++)
 758 | 				fprintf(f, "%d.%d.%d.%d-%d.%d.%d.%d\n", p->min>>24,
 759 | 					  (p->min>>16)&255, (p->min>>8)&255, p->min&255,
 760 | 					  p->max>>24, (p->max>>16)&255, (p->max>>8)&255, p->max&255);
 761 | 			fclose(f);
 762 | 		}
 763 | #endif /* DEBUG */		
 764 | 		qsort(array, npatterns, sizeof(struct netspec), netsort);
 765 | #if DEBUG
 766 | 		if((dnp = getenv("POSTSORT4")) != 0) {
 767 | 			FILE *f = fopen(dnp, "w");
 768 | 			struct netspec *p;
 769 | 			for(p = array; p < array+npatterns; p++)
 770 | 				fprintf(f, "%d.%d.%d.%d-%d.%d.%d.%d\n", p->min>>24,
 771 | 					  (p->min>>16)&255, (p->min>>8)&255, p->min&255,
 772 | 					  p->max>>24, (p->max>>16)&255, (p->max>>8)&255, p->max&255);
 773 | 			fclose(f);
 774 | 		}
 775 | #endif /* DEBUG */		
 776 | 
 777 | 		/* combine overlapping ranges
 778 | 		 * outp is clean so far, inp is checked for overlap
 779 | 		 */
 780 | 		outp = array;
 781 | 		for (inp = array+1; inp < array+npatterns; inp++)
 782 | 		{
 783 | 			if (inp->max <= outp->max)
 784 | 				continue;		/* contained within previous range, ignore */
 785 | 
 786 | 			if(inp->min <= outp->max) {	/* overlapping ranges, combine */
 787 | 				outp->max = inp->max;
 788 | 				continue;
 789 | 			}
 790 | 			if(++outp < inp)
 791 | 				*outp = *inp;		/* move down due to previously combined or ignored */
 792 | 		}
 793 | 		npatterns = outp-array+1;		/* adjusted count after combinations */
 794 | #if DEBUG
 795 | 		if((dnp = getenv("POSTMERGE4")) != 0) {
 796 | 			FILE *f = fopen(dnp, "w");
 797 | 			struct netspec *p;
 798 | 			for(p = array; p < array+npatterns; p++)
 799 | 				fprintf(f, "%d.%d.%d.%d-%d.%d.%d.%d\n", p->min>>24,
 800 | 					  (p->min>>16)&255, (p->min>>8)&255, p->min&255,
 801 | 					  p->max>>24, (p->max>>16)&255, (p->max>>8)&255, p->max&255);
 802 | 			fclose(f);
 803 | 		}
 804 | #endif /* DEBUG */		
 805 | 	}
 806 | 	if(n6patterns) {
 807 | 		struct netspec6 *inp, *outp;
 808 | 
 809 | 		qsort(array6, n6patterns, sizeof(struct netspec6), netsort6);
 810 | 
 811 | 		/* combine overlapping ranges
 812 | 		 * outp is clean so far, inp is checked for overlap
 813 | 		 */
 814 | 		outp = array6;
 815 | 		for (inp = array6+1; inp < array6+n6patterns; inp++)
 816 | 		{
 817 | 			if (v6cmp(inp->max, outp->max) <= 0)
 818 | 				continue;		/* contained within previous range, ignore */
 819 | 
 820 | 			if(v6cmp(inp->min, outp->max)<=0) {	/* overlapping ranges, combine */
 821 | 				outp->max = inp->max;
 822 | 				continue;
 823 | 			}
 824 | 			if(++outp < inp)
 825 | 				*outp = *inp;		/* move down due to previously combined or ignored */
 826 | 		}
 827 | 		n6patterns = outp-array6+1;		/* adjusted count after combinations */
 828 | 	}
 829 | 
 830 | # if DEBUG
 831 | 	{	/* DEBUG */
 832 | 		int i,n;
 833 | 		for(n = 0; n < n6patterns; n++) {
 834 | 			printf("min %d:", n);
 835 | 			for(i = 0; i<16; i++) printf(" %02x", array6[n].min.a[i]);
 836 | 			printf("\nmax %d:",n);
 837 | 			for(i = 0; i<16; i++) printf(" %02x", array6[n].max.a[i]);
 838 | 			printf("\n");
 839 | 		}
 840 | 	}
 841 | # endif /* DEBUG */
 842 | 	if (optind >= argc) {
 843 | 		scan_read(stdin, NULL);
 844 | 	} else {
 845 | 		if(optind+1 >= argc) nonames = 1;	/* just one file, no name */
 846 | 
 847 | 		while(optind < argc) {
 848 | 			char *fn = argv[optind++];
 849 | 			FILE *f = fopen(fn, "r");
 850 | 			char *fmap;
 851 | 			size_t flen;
 852 | 			struct stat statbuf;
 853 | 		
 854 | 			if(!f) {
 855 | 				perror(fn);
 856 | 				return EXIT_ERROR;
 857 | 			}
 858 | 			if(fstat(fileno(f), &statbuf) != 0 || (statbuf.st_mode&S_IFMT)!= S_IFREG ) {
 859 | 				scan_read(f, fn);		/* can't stat or not a normal file, fall back to read */
 860 | 				fclose(f);
 861 | 				continue;
 862 | 			}
 863 | 			flen = statbuf.st_size;
 864 | 			if(flen == 0) {
 865 | 				fclose(f);	/* empty file, forget it */
 866 | 				continue;
 867 | 			}
 868 | 
 869 | 			fmap = mmap(NULL, flen, PROT_READ, MAP_SHARED, fileno(f), (off_t)0);
 870 | 			if(fmap == MAP_FAILED) {
 871 | 				perror("map failed");
 872 | 				scan_read(f, fn);	/* can't map, fall back to read */
 873 | 				fclose(f);
 874 | 				continue;
 875 | 			}
 876 | 			/* hint that it'll be read sequentially */
 877 | 			madvise(fmap, flen, MADV_SEQUENTIAL);
 878 | 
 879 | 			scan_block(fmap, flen, fn);
 880 | 			munmap(fmap, flen);
 881 | 			fclose(f);
 882 | 		}
 883 | 	}
 884 | 
 885 | 	/* Cleanup */
 886 | 	if (counting)
 887 | 		printf("%u\n", nmatch);
 888 | 	if (nmatch)
 889 | 		return EXIT_OK;
 890 | 	else
 891 | 		return EXIT_NOMATCH;
 892 | }
 893 | 
 894 | /* scan a line at a time */
 895 | static void scan_read(FILE *f, const char *fn)
 896 | {
 897 | 	ssize_t len;
 898 | 
 899 | 	while((len = getline(&linep, &linesize, f)) > 0)
 900 | 	      scan_block(linep, len, fn);
 901 | }
 902 | 
 903 | static int netmatch(const struct netspec ip4);
 904 | static int netmatch6(const struct netspec6 ip6);
 905 | 
 906 | /* scan some text, must be whole lines
 907 |  * generally either one line or the whole file
 908 |  * bp: pointer to buffer
 909 |  * blen: length of buffer
 910 |  * fn: filename for printing
 911 |  * This should handle the full V6 syntax in RFC 4291 sec 2.2 and 2.3 except for
 912 |  * :: for a zero address
 913 |  * strings of colons may confuse it
 914 |  */
 915 | static void scan_block(char *bp, size_t blen, const char *fn)
 916 | {
 917 | 	enum sscan {
 918 | 		S_BEG = 0,	/* beginning of line */
 919 | 		S_SC,		/* scan for IP */
 920 | 		S_NSC,		/* saw a dot, scan for non-digit */
 921 | 		S_IP1,		/* first octet or maybe first v6 chunk*/
 922 | 		S_IP1D,		/* dot after first octet */
 923 | 		S_IP2,		/* second octet */
 924 | 		S_IP2D,		/* dot after second octet */
 925 | 		S_IP3,		/* third octet */
 926 | 		S_IP3D,		/* dot after third octet */
 927 | 		S_IP4,		/* fourth octet */
 928 | 		S_V4SZ,		/* v4 cidr prefix */
 929 | 		S_HCH,		/* in a hi v6 chunk */
 930 | 		S_HC1,		/* hi, seen one colon */
 931 | 		S_HC2,		/* hi, seen two colons */
 932 | 		S_LCH,		/* in a low chunk */
 933 | 		S_LC1,		/* seen a low colon */
 934 | 		S_IC1,		/* seen initial colon */
 935 | 		S_EIP1D,	/* dot after first octet in embedded v4 */
 936 | 		S_EIP2,		/* second octet */
 937 | 		S_EIP2D,	/* dot after second octet */
 938 | 		S_EIP3,		/* third octet */
 939 | 		S_EIP3D,	/* dot after third octet */
 940 | 		S_EIP4,		/* fourth octet */
 941 | 		S_V6SZ,		/* v6 cidr prefix */
 942 | 		S_SCNL,		/* scan for new line */
 943 | 		S_SCNLP		/* scan for new line and print line */
 944 | 	} state;
 945 | 	enum sscan snext = anchor?S_SCNL:S_SC;	/* state after not an IP */
 946 | 
 947 | 	char *p = bp;		/* current character */
 948 | 	char *plim = bp+blen;	/* end of buffer */
 949 | 	char *lp = bp;		/* beginning of current line */
 950 | 	unsigned int ip4 = 0;	/* IPv4 value */
 951 | 	int octet = 0;		/* current octet */
 952 | 	int size = -1;		/* CIDR size */
 953 | 	v6addr ahi;		/* high part of address */
 954 | 	v6addr alo;		/* low part of address */
 955 | 	struct netspec range4;  /* IPv4 address or range */
 956 | 	struct netspec6 range6; /* IPv6 address or range */
 957 | 	int nhi = 0;		/* how many bytes in ahi */
 958 | 	int nlo = 0;		/* how many bytes in alo */
 959 | 	unsigned int chunk = 0;	/* current 16 bit chunk */
 960 | 	int seenone = 0;	/* seen an address on this line, for -v */
 961 | 
 962 | 	state = S_BEG;
 963 | 	for(p = bp; p < plim;) {
 964 | 		int ch = *p++;
 965 | 
 966 | 		switch(state) {
 967 | 			case S_BEG:	/* beginning of line */
 968 | 				lp = p-1;
 969 | 				seenone = 0;
 970 | 				/* skip leading spaces */
 971 | 				while(p < plim && (ch == ' ' || ch == '\t'))
 972 | 					ch = *p++;
 973 | 				/* fall through */
 974 | 
 975 | 			case S_SC:		/* normal scanning */
 976 | 				if(isdigit(ch)) {	/* start a potential IP of either type */
 977 | 					ip4 = 0;
 978 | 					state = S_IP1;
 979 | 					nhi = nlo = 0;
 980 | 					octet = chunk = ch-'0';
 981 | 					continue;
 982 | 				} else if(isxdigit(ch)) {
 983 | 					state = S_HCH;
 984 | 					nhi = nlo = 0;
 985 | 					octet = -1;	/* hex, not v4 */
 986 | 					chunk = xtod(ch);
 987 | 					continue;
 988 | 				} else if(ch == ':') {
 989 | 					state = S_IC1;
 990 | 					continue;
 991 | 				} else if(quick && ch == '.') {
 992 | 					state = S_NSC;
 993 | 					continue;
 994 | 				}
 995 | 				break;
 996 | 
 997 | 			case S_NSC:		/* ignore crud after a dot */
 998 | 				if(isdigit(ch) || ch == '.')
 999 | 					continue;
1000 | 				state = S_SC;
1001 | 				break;
1002 | 
1003 | 			case S_IC1:		/* initial colon must be two colons and lo part */
1004 | 				if(ch == ':') {
1005 | 					nhi = nlo = 0;
1006 | 					state = S_HC2;
1007 | 					continue;
1008 | 				}
1009 | 				/* rescan as normal in case it was
1010 | 				 * a random colon before an IP
1011 | 				 */
1012 | 				state = S_SC;
1013 | 				p--;
1014 | 				continue;
1015 | 
1016 | 			case S_HCH:	/* high v6 chunk */
1017 | 				if(isxdigit(ch)) {
1018 | 					chunk = (chunk<<4) + xtod(ch);
1019 | 					if(isdigit(ch))
1020 | 						octet = octet*10 + ch-'0';	/* in case it turns out to be v4 */
1021 | 					else
1022 | 						octet = -1;			/* hex, can't be v4 */
1023 | 					continue;
1024 | 				}
1025 | 				/* finish the current chunk */
1026 | 				if(ch == '.' && nhi < 14 && octet >= 0) { /* possible v4 address, is it embedded? */
1027 | 					if(octet > 255) { /* not a real address */
1028 | 						break;
1029 | 					}
1030 | 					/* is it embedded? */
1031 | 					if(nhi == 12) {
1032 | 						ahi.a[nhi++] = octet;
1033 | 						state = S_EIP1D;
1034 | 						continue;
1035 | 					}
1036 | 					/* v6 address was too short,
1037 | 					 * must be a regular v4 address
1038 | 					 */
1039 | 					ip4 = octet;
1040 | 					state = S_IP1D;	/* corresponding dot state */
1041 | 					continue;
1042 | 				}
1043 | 				if(chunk > 0xffff)
1044 | 					break;		/* value too big */
1045 | 				if(nhi < 16) {	/* if too long, keep parsing to avoid strange matches */
1046 | 					ahi.a[nhi++] = chunk >> 8;	/* big-endian for memcmp() */
1047 | 					ahi.a[nhi++] = chunk & 255;
1048 | 				}
1049 | 				if(ch == ':') {
1050 | 					state = S_HC1;
1051 | 					continue;
1052 | 				}
1053 | 				/* was it full address? */
1054 | 				if(nhi == 16) {
1055 | 					if(!n6patterns) break;	/* no v6 patterns */
1056 | 					if(cidrsearch && ch == '/') {
1057 | 						size = 0;
1058 | 						state = S_V6SZ;
1059 | 						continue;
1060 | 					}
1061 | 					seenone = 1;
1062 | 					range6.min = range6.max = ahi;
1063 | 					if(!netmatch6(range6))
1064 | 						break; /* didn't match */
1065 | 					state = S_SCNLP;
1066 | 					goto scnlp;	/* in case it was a \n */
1067 | 				}
1068 | 				break;	/* partial address, not an IP */
1069 | 
1070 | 			case S_HC1:	/* colon separator in hi part */
1071 | 				if(isxdigit(ch)) {
1072 | 					chunk = xtod(ch);
1073 | 					if(isdigit(ch))
1074 | 						octet = ch-'0';
1075 | 					else
1076 | 						octet = -1;
1077 | 					state = S_HCH;
1078 | 					continue;
1079 | 				}
1080 | 				if(ch == ':') {	/* two colons, might be end or lo part can follow */
1081 | 					state = S_HC2;
1082 | 					continue;
1083 | 				}
1084 | 				break;	/* not an IP */
1085 | 
1086 | 			case S_HC2:	/* seen high:: might be end or might be low chunk */
1087 | 				if(isxdigit(ch)) {	/* two colons and digit, start low chunks */
1088 | 					chunk = xtod(ch);
1089 | 					if(isdigit(ch))
1090 | 						octet = chunk;
1091 | 					else
1092 | 						octet = -1;
1093 | 					state = S_LCH;
1094 | 					continue;
1095 | 				}
1096 | 
1097 | 				/* high part only, check it */
1098 | 				if(!nhi) {
1099 | 					if(ch == ':')	/* string of possibly leading colons */
1100 | 						continue;
1101 | 					break;	/* don't match :: as zero address */
1102 | 				}
1103 | 				if(!n6patterns) break;	/* no v6 patterns */
1104 | 				memset(ahi.a+nhi, 0, 16-nhi);	/* zero low bytes */
1105 | 				if(cidrsearch && ch == '/') {
1106 | 					size = 0;
1107 | 					state = S_V6SZ;
1108 | 					continue;
1109 | 				} else
1110 | 					size = -1;
1111 | 
1112 | 				seenone = 1;
1113 | 				range6.min = range6.max = ahi;
1114 | 				if(!netmatch6(range6))
1115 | 					break; /* didn't match */
1116 | 				state = S_SCNLP;
1117 | 				goto scnlp;	/* in case it was a \n */
1118 | 
1119 | 			case S_V6SZ:
1120 | 				if(isdigit(ch)) {
1121 | 					if (size >= 0)
1122 | 						size = size*10 + ch-'0';
1123 | 					if(size > 128) /* gobble up the rest */
1124 | 						size = -1;
1125 | 					continue;
1126 | 				}
1127 | 				if(!n6patterns) break;	/* no v6 patterns */
1128 | 				seenone = 1;
1129 | 				if (size < 0) size = 0; /* ignore bad prefix */
1130 | 				/* TODO: check badbits? naah */
1131 | 				applymask6(ahi, size, &range6);
1132 | 				if(!netmatch6(range6))
1133 | 					break; /* didn't match */
1134 | 				state = S_SCNLP;
1135 | 				goto scnlp;	/* in case it was a \n */
1136 | 
1137 | 			case S_LCH:		/* low chunk */
1138 | 				if(isxdigit(ch)) {
1139 | 					chunk = (chunk<<4)+xtod(ch);
1140 | 					if(isdigit(ch))
1141 | 						octet = octet*10 + ch-'0';	/* in case it turns out to be v4 */
1142 | 					else
1143 | 						octet = -1;
1144 | 					continue;
1145 | 				}
1146 | 				/* finish the current chunk */
1147 | 				if(ch == '.' && octet >= 0 && octet <= 255) { /* maybe a v4 address */
1148 | 					if((nhi+nlo) < 12) { /* embedded v4 */
1149 | 						/* move all into ahi */
1150 | 						memset(ahi.a+nhi, 0, 12-(nhi+nlo));
1151 | 						if(nlo)
1152 | 							memcpy(ahi.a+12-nlo, alo.a, nlo);
1153 | 						nhi = 12;
1154 | 						ahi.a[nhi++] = octet;
1155 | 						state = S_EIP1D;
1156 | 						continue;
1157 | 					}
1158 | 				}
1159 | 				/* doesn't look like an octet, or too
1160 | 				 * long to be embedded, treat as
1161 | 				 * likely v6
1162 | 				 */
1163 | 				if(chunk > 0xffff) break;	/* too big for a chunk */
1164 | 				if(nlo < 16) {	/* keep parsing overlong to avoid strange results */
1165 | 					alo.a[nlo++] = chunk >> 8;	/* big-endian for memcmp() */
1166 | 					alo.a[nlo++] = chunk & 255;
1167 | 				}
1168 | 				if(ch == ':') {
1169 | 					state = S_LC1;
1170 | 					continue;
1171 | 				}
1172 | 				/* end of lo part, check it */
1173 | 				if(!n6patterns) break;		/* no v6 patterns */
1174 | 				if((nhi+nlo) >= 14) break;	/* too many chunks. not an IP */
1175 | 				memset(ahi.a+nhi, 0, 16-(nhi+nlo));	/* combine hi and lo parts */
1176 | 				memcpy(ahi.a+(16-nlo), alo.a, nlo);
1177 | 				if(cidrsearch && ch == '/') {
1178 | 					state = S_V6SZ;
1179 | 					size = 0;
1180 | 					continue;
1181 | 				}
1182 | 				seenone = 1;
1183 | 				range6.min = range6.max = ahi;
1184 | 				if(!netmatch6(range6))
1185 | 					break; /* didn't match */
1186 | 				state = S_SCNLP;
1187 | 				goto scnlp;	/* in case it was a \n */
1188 | 
1189 | 			case S_LC1:	/* seen a colon after a low chunk */
1190 | 				if(isxdigit(ch)) {
1191 | 					chunk = xtod(ch);
1192 | 					if(isdigit(ch))
1193 | 						octet = chunk;
1194 | 					else
1195 | 						octet = -1;
1196 | 					state = S_LCH;
1197 | 					continue;
1198 | 				}
1199 | 				break;	/* trailing junk, not an IP */
1200 | 
1201 | 			case S_IP1:	/* in an IP address, don't know yet which kind */
1202 | 				if(isxdigit(ch)) {
1203 | 					chunk = (chunk<<4) + xtod(ch);
1204 | 					if(!isdigit(ch)) {
1205 | 						state = S_HCH;	/* doesn't look like a v4 address */
1206 | 						octet = -1;
1207 | 						continue;
1208 | 					}
1209 | 				} else if(ch == ':') {
1210 | 					/* finish the current chunk,
1211 | 					 * which must be chunk 0 */
1212 | 					ahi.a[nhi++] = chunk >> 8;	/* big-endian for memcmp() */
1213 | 					ahi.a[nhi++] = chunk & 255;
1214 | 					state = S_HC1;
1215 | 					continue;
1216 | 				}
1217 | 				/* fall through */
1218 | 			case S_IP2:
1219 | 			case S_IP3:
1220 | 				if(isdigit(ch)) {
1221 | 					octet = octet*10 + ch-'0';
1222 | 					continue;
1223 | 				}
1224 | 				if(ch == '.') {
1225 | 					if(octet > 255) { /* not a real address */
1226 | 						break;
1227 | 					}
1228 | 					ip4 <<= 8;
1229 | 					ip4 += octet;
1230 | 					state++;	/* corresponding dot state */
1231 | 					continue;
1232 | 				}
1233 | 				/* otherwise, wasn't a full IP */
1234 | 				break;
1235 | 
1236 | 			case S_IP1D:	/* saw dot after an octet */
1237 | 			case S_IP2D:
1238 | 			case S_IP3D:
1239 | 			case S_EIP1D:	/* saw dot after an embedded octet */
1240 | 			case S_EIP2D:
1241 | 			case S_EIP3D:
1242 | 				if(isdigit(ch)) {
1243 | 					octet = ch-'0';
1244 | 					state++;	/* next digit state */
1245 | 					continue;
1246 | 				}
1247 | 				break;	/* wasn't an IP */
1248 | 
1249 | 			case S_IP4:	/* in last octet */
1250 | 				if(isdigit(ch)) {
1251 | 					octet = octet*10 + ch-'0';
1252 | 					continue;
1253 | 				}
1254 | 				/* OK, we have the IP */
1255 | 				if(quick && ch == '.') {	/* seen crud, skip it */
1256 | 					state = S_NSC;
1257 | 					continue;
1258 | 				}
1259 | 				if(octet > 255) { /* not a real address */
1260 | 					break;
1261 | 				}
1262 | 				ip4 <<= 8;
1263 | 				ip4 += octet;
1264 | 				if(!npatterns) break; /* no v4 patterns */
1265 | 				if(cidrsearch && ch == '/') {
1266 | 					state = S_V4SZ;
1267 | 					size = 0;
1268 | 					continue;
1269 | 				}
1270 | 				seenone = 1;
1271 | 				range4.min = range4.max = ip4;
1272 | 				if(!netmatch(range4))
1273 | 					break; /* didn't match */
1274 | 				state = S_SCNLP;
1275 | 				goto scnlp;	/* in case it was a \n */
1276 | 
1277 |                         case S_V4SZ:    /* cidr size */
1278 | 				if(isdigit(ch)) {
1279 | 					if (size >= 0)
1280 | 						size = size*10 + ch-'0';
1281 | 					if(size > 32) /* gobble up the rest */
1282 | 						size = -1;
1283 | 					continue;
1284 | 				}
1285 | 				seenone = 1;
1286 | 				range4.min = range4.max = ip4;
1287 | 				if(size >= 0) {		/* ignore bad prefix */
1288 | 					int mask = (1L<<(32-size))-1;
1289 | 					range4.min &= ~mask; /* force to CIDR boundary */
1290 | 					range4.max |= mask;
1291 | 				}
1292 | 				if(!netmatch(range4))
1293 | 					break; /* didn't match */
1294 | 				state = S_SCNLP;
1295 | 				goto scnlp;	/* in case it was a \n */
1296 | 				
1297 | 			case S_EIP2:	/* in embedded octet */
1298 | 			case S_EIP3:
1299 | 				if(isdigit(ch)) {
1300 | 					octet = octet*10 + ch-'0';
1301 | 					continue;
1302 | 				}
1303 | 				if(ch == '.') {
1304 | 					if(octet > 255) { /* not a real address */
1305 | 						break;
1306 | 					}
1307 | 					ahi.a[nhi++] = octet;
1308 | 					state++;	/* corresponding dot state */
1309 | 					continue;
1310 | 				}
1311 | 				/* otherwise, wasn't a full IP */
1312 | 				break;
1313 | 
1314 | 			case S_EIP4:	/* in last embedded octet */
1315 | 				if(isdigit(ch)) {
1316 | 					octet = octet*10 + ch-'0';
1317 | 					continue;
1318 | 				}
1319 | 				/* OK, we have the IP */
1320 | 				if(quick && ch == '.') {	/* seen crud, skip it */
1321 | 					state = S_NSC;
1322 | 					continue;
1323 | 				}
1324 | 				if(octet > 255) { /* not a real address */
1325 | 					break;
1326 | 				}
1327 |                                 /* no CIDR allowed with IPv4 embedded in IPv6 */
1328 | 				ahi.a[nhi++] = octet;
1329 | 				seenone = 1;
1330 | 				if(n6patterns) {
1331 | 					range6.min = range6.max = ahi;
1332 | 					if(netmatch6(range6)) {	/* try a v6 pattern */
1333 | 						state = S_SCNLP;
1334 | 						goto scnlp;	/* in case it was a \n */
1335 | 					}
1336 | 				}
1337 | 				/* get the v4 address as an int and try
1338 | 				 * that */
1339 | 				ip4 = (ahi.a[12]<<24)|(ahi.a[13]<<16)|(ahi.a[14]<<8)|ahi.a[15];
1340 | 				if(cidrsearch && ch == '/') {
1341 | 					state = S_V4SZ;
1342 | 					size = 0;
1343 | 					continue;
1344 | 				}
1345 | 				range4.min = range4.max = ip4;
1346 | 				if(!npatterns || !netmatch(range4))
1347 | 					break; /* didn't match */
1348 | 
1349 | 				state = S_SCNLP;
1350 | 				/* fall through, in case it was a \n */
1351 | 
1352 | scnlp:
1353 | 			case S_SCNLP:	/* print this line */
1354 | 				/* HACK scan the rest of the line fast */
1355 | 				while(ch != '\n' && p < plim)
1356 | 					ch = *p++;
1357 | 
1358 | 				if(ch == '\n') {
1359 | 					if(!invert) {
1360 | 						nmatch++;
1361 | 						if(!counting) {
1362 | 							if(fn && !nonames)
1363 | 								printf("%s:", fn);
1364 | 							fwrite(lp, 1, p-lp, stdout);
1365 | 						}
1366 | 					}
1367 | 					state = S_BEG;
1368 | 				}
1369 | 				continue;
1370 | 
1371 | 			case S_SCNL:
1372 | 				/* HACK scan the rest of the line fast */
1373 | 				while(ch != '\n' && p < plim)
1374 | 					ch = *p++;
1375 | 				break;
1376 | 		}
1377 | 		/* default action if it wasn't an IP */
1378 | 		if(ch == '\n') {
1379 | 			if(invert && seenone) {	/* -v prints or counts lines with IPs that didn't match */
1380 | 				nmatch++;
1381 | 				if(!counting) {
1382 | 					if(fn && !nonames)
1383 | 						printf("%s:", fn);
1384 | 					fwrite(lp, 1, p-lp, stdout);
1385 | 				}
1386 | 			}
1387 | 			state = S_BEG;
1388 | 		} else
1389 | 			state = snext;
1390 | 		continue;
1391 | 
1392 | 	}
1393 | } /* scan_block */
1394 | 
1395 | /*
1396 |  * binary range search for a value
1397 |  */
1398 | static int
1399 | netmatch(const struct netspec ip4)
1400 | {
1401 | 	int minx = 0;
1402 | 	int maxx = npatterns-1;
1403 | 	int tryx = 0;
1404 | 
1405 | 	if(!npatterns) return 0;	/* nothing to match */
1406 | 
1407 | # if DEBUG
1408 | 	{	/* DEBUG */
1409 | 
1410 | 		assert(npatterns);	/* don't call this if there are no v4 patterns */
1411 | 		printf("match: %x %d.%d.%d.%d-%x %d.%d.%d.%d\n", ip4.min, ip4.min>>24,
1412 | 		       (ip4.min>>16)&255, (ip4.min>>8)&255, ip4.min&255,
1413 | 		       ip4.max, ip4.max>>24, (ip4.max>>16)&255, (ip4.max>>8)&255, ip4.max&255);
1414 | 	}
1415 | # endif
1416 | 	/* make sure it's in range */
1417 | 	if(ip4.max < array[0].min || ip4.min > array[maxx].max) return 0;
1418 | 
1419 | 	while(minx <= maxx) {
1420 | 		tryx = (minx+maxx)/2;
1421 | # if DEBUG
1422 | 		if(getenv("TRY")) printf("try %d:%d -> %d %x %x\n", minx, maxx, tryx, array[tryx].min, array[tryx].max);
1423 | # endif
1424 | 
1425 | 		if(ip4.max < array[tryx].min) {
1426 | 			maxx = tryx-1;
1427 | 			continue;
1428 | 		}
1429 | 		if(ip4.min > array[tryx].max) {
1430 | 			minx = tryx+1;
1431 | 			continue;
1432 | 		}
1433 | 		break;	/* gee, we may have found it */
1434 | 	}
1435 | 
1436 | 	if(ip4.min >= array[tryx].min && ip4.max <= array[tryx].max) return 1; /* target in pattern */
1437 | 	if(didrsearch) {	/* look for overlap */
1438 | 		if(ip4.min <= array[tryx].min && ip4.max >= array[tryx].max) return 1; /* pattern in target */
1439 | 		if(ip4.min >= array[tryx].min && ip4.min <= array[tryx].max) return 1; /* base of target in pattern */
1440 | 		if(ip4.max >= array[tryx].min && ip4.max <= array[tryx].max) return 1; /* end of target in pattern */
1441 | 	}
1442 | 	return 0;	/* not in the current entry */
1443 | }
1444 | 
1445 | static int
1446 | netmatch6(const struct netspec6 ip6)
1447 | {
1448 | 	int minx = 0;
1449 | 	int maxx = n6patterns-1;
1450 | 	int tryx = 0;
1451 | 
1452 | 	if(!n6patterns) return 0;	/* nothing to match */
1453 | 
1454 | # if DEBUG
1455 | 	{	/* DEBUG */
1456 | 		int i;
1457 | 
1458 | 		assert(n6patterns);	/* don't call this if there are no v6 patterns */
1459 | 		printf("match:");
1460 | 		for(i = 0; i<16; i++) printf(" %02x", ip6.min.a[i]);
1461 | 		printf("-");
1462 | 		for(i = 0; i<16; i++) printf(" %02x", ip6.max.a[i]);
1463 | 		printf("\n");
1464 | 	}
1465 | # endif
1466 | 	/* make sure it's in range */
1467 | 	if(v6cmp(ip6.max, array6[0].min) < 0 || v6cmp(ip6.min, array6[maxx].max) > 0) {
1468 | #if DEBUG
1469 | 		printf("out of range\n");
1470 | #endif
1471 | 		return 0;
1472 | 	}
1473 | 
1474 | 	while(minx <= maxx) {
1475 | 		tryx = (minx+maxx)/2;
1476 | 
1477 | 		if(v6cmp(ip6.max, array6[tryx].min)<0) {
1478 | 			maxx = tryx-1;
1479 | 			continue;
1480 | 		}
1481 | 		if(v6cmp(ip6.min, array6[tryx].max)>0) {
1482 | 			minx = tryx+1;
1483 | 			continue;
1484 | 		}
1485 | 		break; /* gee, we may have found it */
1486 | 	}
1487 | 
1488 | # if DEBUG
1489 | 	{	/* DEBUG */
1490 | 		int i;
1491 | 
1492 | 		assert(n6patterns);	/* don't call this if there are no v6 patterns */
1493 | 		printf("candidate: %d/%d", minx, maxx);
1494 | 		for(i = 0; i<16; i++) printf(" %02x", array6[minx].min.a[i]);
1495 | 		printf("-");
1496 | 		for(i = 0; i<16; i++) printf(" %02x", array6[minx].max.a[i]);
1497 | 		printf("\n");
1498 | 	}
1499 | # endif
1500 | 
1501 | 	if(v6cmp(ip6.min, array6[tryx].min) >= 0 && v6cmp(ip6.max, array6[tryx].max) <= 0) return 1; /* target in pattern */
1502 | 	if(didrsearch) {
1503 | 		if(v6cmp(ip6.min, array6[tryx].min) <= 0 && v6cmp(ip6.max, array6[tryx].max) >= 0) return 1; /* pattern in target */
1504 | 		if(v6cmp(ip6.min, array6[tryx].min) >= 0 && v6cmp(ip6.min, array6[tryx].max) <= 0) return 1; /* base in pattern */
1505 | 		if(v6cmp(ip6.max, array6[tryx].min) >= 0 && v6cmp(ip6.max, array6[tryx].max) <= 0) return 1; /* end in target */
1506 | 	}
1507 | 	return 0;	/* not in the current entry */
1508 | }
1509 | 


--------------------------------------------------------------------------------