├── AUTHORS ├── .gitmodules ├── test.js ├── nodelint.js ├── package.json ├── BENCHMARK.md ├── testdata ├── test.html └── benchmark.html ├── benchmark.js ├── example.js ├── README.md ├── lib └── soupselect.js └── tests └── soupselect.js /AUTHORS: -------------------------------------------------------------------------------- 1 | # Authors ordered by first contribution 2 | 3 | Simon Willison https://github.com/simonw 4 | Harry Fuecks https://github.com/harryf 5 | Chris O'Hara https://github.com/chriso -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/nodeunit"] 2 | path = deps/nodeunit 3 | url = git://github.com/caolan/nodeunit.git 4 | [submodule "deps/htmlparser"] 5 | path = deps/htmlparser 6 | url = http://github.com/tautologistics/node-htmlparser.git 7 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | require.paths.unshift('lib') 2 | require.paths.push('.') 3 | require.paths.push('deps/nodeunit/lib') 4 | 5 | var reporter = require('reporters/default'); 6 | var args = process.ARGV.slice(2); 7 | if(args.length > 0) { 8 | reporter.run(args); 9 | } else { 10 | reporter.run(['tests']) 11 | } 12 | -------------------------------------------------------------------------------- /nodelint.js: -------------------------------------------------------------------------------- 1 | // nodelint (JSLint) options 2 | // run like: nodelint lib/soupselect.js --config nodelint.js 3 | var options = { 4 | adsafe : false, 5 | bitwise : false, 6 | browser : false, 7 | error_prefix : "\u001b[1m", 8 | error_suffix : ":\u001b[0m ", 9 | onevar : false, 10 | plusplus : false, 11 | regexp : false, 12 | undef : false, 13 | white : false, 14 | }; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "soupselect", 3 | "version": "0.2.0", 4 | "engines": { 5 | "node": ">=0.2.0" 6 | }, 7 | "author": { 8 | "name": "Harry Fuecks", 9 | "email": "hfuecks@gmail.com", 10 | "url": "http://twitter.com/hfuecks" 11 | }, 12 | "url": "http://github.com/harryf/node-soupselect", 13 | "dependencies": { 14 | "htmlparser": ">= 1.6.2", 15 | "nodeunit": ">= 0.3.0" 16 | }, 17 | "repository" : [ 18 | { "type":"git", "url":"git://github.com/harryf/node-soupselect.git" } 19 | ], 20 | "main": "./lib/soupselect", 21 | "license": "MIT", 22 | "description": "Adds CSS selector support to htmlparser for scraping activities - port of soupselect (python)" 23 | } 24 | -------------------------------------------------------------------------------- /BENCHMARK.md: -------------------------------------------------------------------------------- 1 | A log of benchmarks to track changes across releases. See benchmark.js 2 | 3 | 2010.10.06, version 0.1.0 4 | ------------------------- 5 | 6 | body : 4ms, 1 elements 7 | div : 3ms, 51 elements 8 | body div : 7ms, 51 elements 9 | div p : 4ms, 137 elements 10 | div p a : 3ms, 29 elements 11 | .note : 3ms, 14 elements 12 | div.example : 2ms, 43 elements 13 | ul .tocline2 : 3ms, 12 elements 14 | #title : 0ms, 1 elements 15 | h1#title : 0ms, 1 elements 16 | div #title : 3ms, 1 elements 17 | ul.toc li.tocline2 : 2ms, 12 elements 18 | div[class] : 3ms, 51 elements 19 | div[class=example] : 2ms, 43 elements 20 | div[class^=exa] : 3ms, 43 elements 21 | div[class$=mple] : 3ms, 43 elements 22 | div[class*=e] : 2ms, 50 elements 23 | div[class|=dialog] : 5ms, 0 elements 24 | div[class!=made_up] : 2ms, 51 elements 25 | div[class~=example] : 6ms, 43 elements 26 | -------------------------------------------------------------------------------- /testdata/test.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | The title 6 | 7 | 8 | 9 | 10 |
11 |
12 |

An H1

13 |

Some text

14 |

Some more text

15 |

An H2

16 |

Another

17 | Bob 18 |

Another H2

19 | me 20 | Test foo1bar1 21 | bar2 22 | OK foo2 seriously 23 |
24 |

English

25 |

English UK

26 |

English US

27 |

French

28 |
29 | 30 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /benchmark.js: -------------------------------------------------------------------------------- 1 | /* 2 | Based on the http://mootools.net/slickspeed/ benchmarks. Uses the same sample document 3 | but only runs those tests where the CSS syntax used is supported by soupselect 4 | */ 5 | 6 | var select = require('soupselect').select, 7 | htmlparser = require("htmlparser"), 8 | fs = require('fs'); 9 | 10 | var html = fs.readFileSync('testdata/benchmark.html', 'utf-8'); 11 | 12 | var selectors = [ 13 | 'body', 14 | 'div', 15 | 'body div', 16 | 'div p', 17 | 'div p a', 18 | '.note', 19 | 'div.example', 20 | 'ul .tocline2', 21 | '#title', 22 | 'h1#title', 23 | 'div #title', 24 | 'ul.toc li.tocline2', 25 | 'div[class]', 26 | 'div[class=example]', 27 | 'div[class^=exa]', 28 | 'div[class$=mple]', 29 | 'div[class*=e]', 30 | 'div[class|=dialog]', 31 | 'div[class!=made_up]', 32 | 'div[class~=example]', 33 | ]; 34 | 35 | selectors.forEach(function(selector) { 36 | 37 | var handler = new htmlparser.DefaultHandler(function(err, dom) { 38 | if (err) { 39 | console.error("Error: " + err); 40 | } else { 41 | var start = new Date().getTime(); 42 | var els = select(dom, selector); 43 | var elapsed = new Date().getTime() - start; 44 | sys.puts(selector + " : " + elapsed + "ms, " + els.length + " elements"); 45 | } 46 | }); 47 | 48 | var parser = new htmlparser.Parser(handler); 49 | parser.parseComplete(html); 50 | 51 | }); -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | var select = require('./lib/soupselect').select, 2 | htmlparser = require("htmlparser"), 3 | http = require('http'); 4 | 5 | // fetch some HTML... 6 | var http = require('http'); 7 | var host = 'www.reddit.com'; 8 | var client = http.createClient(80, host); 9 | var request = client.request('GET', '/',{'host': host}); 10 | 11 | request.on('response', function (response) { 12 | response.setEncoding('utf8'); 13 | 14 | var body = ""; 15 | response.on('data', function (chunk) { 16 | body = body + chunk; 17 | }); 18 | 19 | response.on('end', function() { 20 | 21 | // now we have the whole body, parse it and select the nodes we want... 22 | var handler = new htmlparser.DefaultHandler(function(err, dom) { 23 | if (err) { 24 | console.error("Error: " + err); 25 | } else { 26 | 27 | // soupselect happening here... 28 | var titles = select(dom, 'a.title'); 29 | 30 | sys.puts("Top stories from reddit"); 31 | titles.forEach(function(title) { 32 | sys.puts("- " + title.children[0].raw + " [" + title.attribs.href + "]\n"); 33 | }) 34 | } 35 | }); 36 | 37 | var parser = new htmlparser.Parser(handler); 38 | parser.parseComplete(body); 39 | }); 40 | }); 41 | request.end(); 42 | 43 | 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | node-soupselect 2 | --------------- 3 | 4 | A port of Simon Willison's [soupselect](http://code.google.com/p/soupselect/) for use with node.js and node-htmlparser. 5 | 6 | $ npm install soupselect 7 | 8 | Minimal example... 9 | 10 | var select = require('soupselect').select; 11 | // dom provided by htmlparser... 12 | select(dom, "#main a.article").forEach(function(element) {//...}); 13 | 14 | Wanted a friendly way to scrape HTML using node.js. Tried using [jsdom](http://github.com/tmpvar/jsdom), prompted by [this article](http://blog.nodejitsu.com/jsdom-jquery-in-5-lines-on-nodejs) but, unfortunately, [jsdom](http://github.com/tmpvar/jsdom) takes a strict view of lax HTML making it unusable for scraping the kind of soup found in real world web pages. Luckily [htmlparser](http://github.com/tautologistics/node-htmlparser/) is more forgiving. More details on this found [here](http://www.reddit.com/r/node/comments/dm0tz/nodesoupselect_for_scraping_html_with_css/c118r23). 15 | 16 | A complete example including fetching HTML etc...; 17 | 18 | var select = require('soupselect').select, 19 | htmlparser = require("htmlparser"), 20 | http = require('http'); 21 | 22 | // fetch some HTML... 23 | var http = require('http'); 24 | var host = 'www.reddit.com'; 25 | var client = http.createClient(80, host); 26 | var request = client.request('GET', '/',{'host': host}); 27 | 28 | request.on('response', function (response) { 29 | response.setEncoding('utf8'); 30 | 31 | var body = ""; 32 | response.on('data', function (chunk) { 33 | body = body + chunk; 34 | }); 35 | 36 | response.on('end', function() { 37 | 38 | // now we have the whole body, parse it and select the nodes we want... 39 | var handler = new htmlparser.DefaultHandler(function(err, dom) { 40 | if (err) { 41 | console.error("Error: " + err); 42 | } else { 43 | 44 | // soupselect happening here... 45 | var titles = select(dom, 'a.title'); 46 | 47 | sys.puts("Top stories from reddit"); 48 | titles.forEach(function(title) { 49 | sys.puts("- " + title.children[0].raw + " [" + title.attribs.href + "]\n"); 50 | }) 51 | } 52 | }); 53 | 54 | var parser = new htmlparser.Parser(handler); 55 | parser.parseComplete(body); 56 | }); 57 | }); 58 | request.end(); 59 | 60 | Notes: 61 | 62 | * Requires node-htmlparser > 1.6.2 & node.js 2+ 63 | * Calls to select are synchronous - not worth trying to make it asynchronous IMO given the use case 64 | 65 | -------------------------------------------------------------------------------- /lib/soupselect.js: -------------------------------------------------------------------------------- 1 | /** 2 | Port of Simon Willison's Soup Select http://code.google.com/p/soupselect/ 3 | http://www.opensource.org/licenses/mit-license.php 4 | 5 | MIT licensed http://www.opensource.org/licenses/mit-license.php 6 | */ 7 | 8 | var domUtils = require("htmlparser").DomUtils; 9 | 10 | var tagRe = /^[a-z0-9]+$/; 11 | 12 | /* 13 | /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/ 14 | \---/ \---/\-------------/ \-------/ 15 | | | | | 16 | | | | The value 17 | | | ~,|,^,$,* or = 18 | | Attribute 19 | Tag 20 | */ 21 | var attrSelectRe = /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/; 22 | 23 | /** 24 | Takes an operator and a value and returns a function which can be used to 25 | test other values against test provided value using the given operation 26 | Used to checking attribute values for attribute selectors 27 | */ 28 | function makeValueChecker(operator, value) { 29 | value = typeof(value) === 'string' ? value : ''; 30 | 31 | return operator ? { 32 | '=': function ( test_value ) { return test_value === value; }, 33 | // attribute includes value as one of a set of space separated tokens 34 | '~': function ( test_value ) { return test_value ? test_value.split(/\s+/).indexOf(value) !== -1 : false; }, 35 | // attribute starts with value 36 | '^': function ( test_value ) { return test_value ? test_value.substr(0, value.length) === value : false; }, 37 | // attribute ends with value 38 | '$': function ( test_value ) { return test_value ? test_value.substr(-value.length) === value : false; }, 39 | // attribute contains value 40 | '*': function ( test_value ) { return test_value ? test_value.indexOf(value) !== -1 : false; }, 41 | // attribute is either exactly value or starts with value- 42 | '|': function ( test_value ) { return test_value ? test_value === value || 43 | test_value.substr(0, value.length + 1) === value + '-' : false; }, 44 | // default to just check attribute existence... 45 | }[operator] : function ( test_value ) { return test_value ? true : false; }; 46 | 47 | } 48 | 49 | /** 50 | Takes a dom tree or part of one from htmlparser and applies 51 | the provided selector against. The returned value is also 52 | a valid dom tree, so can be passed by into 53 | htmlparser.DomUtil.* calls 54 | */ 55 | exports = exports.select = function(dom, selector) { 56 | var currentContext = [dom]; 57 | var found, tag, options; 58 | 59 | var tokens = selector.split(/\s+/); 60 | 61 | for ( var i = 0; i < tokens.length; i++ ) { 62 | 63 | // Attribute selectors 64 | var match = attrSelectRe.exec(tokens[i]); 65 | if ( match ) { 66 | var attribute = match[2], operator = match[3], value = match[4]; 67 | tag = match[1]; 68 | options = {}; 69 | options[attribute] = makeValueChecker(operator, value); 70 | 71 | found = []; 72 | for (var j = 0; j < currentContext.length; j++ ) { 73 | found = found.concat(domUtils.getElements(options, currentContext[j])); 74 | }; 75 | 76 | if ( tag ) { 77 | // Filter to only those matching the tag name 78 | found = domUtils.getElements({ 'tag_name': tag }, found, false); 79 | } 80 | 81 | currentContext = found; 82 | 83 | } 84 | 85 | // ID selector 86 | else if ( tokens[i].indexOf('#') !== -1 ) { 87 | found = []; 88 | 89 | var id_selector = tokens[i].split('#', 2)[1]; 90 | 91 | // need to stop on the first id found (in bad HTML)... 92 | var el = null; 93 | for ( var k = 0; k < currentContext.length; k++ ) { 94 | 95 | // the document has no child elements but tags do so we search children to avoid 96 | // returning the current element via a false positive 97 | if ( typeof currentContext[k].children !== 'undefined' ) { 98 | el = domUtils.getElementById(id_selector, currentContext[k].children); 99 | } else { 100 | el = domUtils.getElementById(id_selector, currentContext[k]); 101 | } 102 | 103 | if ( el ) { 104 | found.push(el); 105 | break; 106 | } 107 | } 108 | 109 | if (!found[0]) { 110 | currentContext = []; 111 | break; 112 | } 113 | 114 | currentContext = found; 115 | } 116 | 117 | // Class selector 118 | else if ( tokens[i].indexOf('.') !== -1 ) { 119 | var parts = tokens[i].split('.'); 120 | tag = parts[0]; 121 | options = {}; 122 | options['class'] = function (value) { 123 | if (!value) return false; 124 | var classes = value.split(/\s+/); 125 | for (var i = 1, len = parts.length; i < len; i++) { 126 | if (!~classes.indexOf(parts[i])) return false; 127 | } 128 | return true; 129 | }; 130 | 131 | found = []; 132 | for ( var l = 0; l < currentContext.length; l++ ) { 133 | var context = currentContext[l]; 134 | if ( tag.length > 0 ) { 135 | context = domUtils.getElementsByTagName(tag, context); 136 | // don't recurse in the case we have a tag or we get children we might not want 137 | found = found.concat(domUtils.getElements(options, context, false)); 138 | } else { 139 | found = found.concat(domUtils.getElements(options, context)); 140 | } 141 | 142 | }; 143 | 144 | currentContext = found; 145 | } 146 | 147 | // Star selector 148 | else if ( tokens[i] === '*' ) { 149 | // nothing to do right? 150 | } 151 | 152 | // Tag selector 153 | else { 154 | if (!tagRe.test(tokens[i])) { 155 | currentContext = []; 156 | break; 157 | } 158 | 159 | found = []; 160 | for ( var m = 0; m < currentContext.length; m++ ) { 161 | // htmlparsers document itself has no child property - only nodes do... 162 | if ( typeof currentContext[m].children !== 'undefined' ) { 163 | found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m].children)); 164 | } else if (i === 0) { 165 | found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m])); 166 | } 167 | 168 | }; 169 | 170 | currentContext = found; 171 | } 172 | }; 173 | 174 | return currentContext; 175 | }; 176 | -------------------------------------------------------------------------------- /tests/soupselect.js: -------------------------------------------------------------------------------- 1 | var select = require('soupselect').select, 2 | htmlparser = require("htmlparser"), 3 | fs = require('fs'); 4 | 5 | var html = fs.readFileSync('testdata/test.html', 'utf-8'); 6 | 7 | function runTest(test, callback) { 8 | var handler = new htmlparser.DefaultHandler(function(err, dom) { 9 | if (err) { 10 | console.error("Error: " + err); 11 | } else { 12 | callback(dom); 13 | } 14 | }); 15 | var parser = new htmlparser.Parser(handler); 16 | parser.parseComplete(html); 17 | } 18 | 19 | function assertSelects(test, dom, selector, expected_ids) { 20 | var el_ids = []; 21 | var els = select(dom, selector); 22 | els.forEach(function(el) { 23 | if ( el.attribs && el.attribs.id ) { 24 | el_ids.push(el.attribs.id); 25 | } else { 26 | el_ids.push(''); 27 | } 28 | }); 29 | el_ids.sort(); 30 | expected_ids.sort(); 31 | test.deepEqual( 32 | expected_ids, 33 | el_ids, 34 | "Selector " + selector + ", expected " + sys.inspect(expected_ids)+ ", got " + sys.inspect(el_ids) 35 | ); 36 | } 37 | 38 | function assertSelectMultiple(test, dom, specs) { 39 | specs.forEach(function(spec){ 40 | assertSelects(test, dom, spec[0], spec[1]); 41 | }); 42 | } 43 | 44 | exports.basicSelectors = { 45 | one_tag_one: function(test) { 46 | runTest(test, function(dom) { 47 | var els = select(dom, 'title'); 48 | test.equal(els.length, 1); 49 | test.equal(els[0].name, 'title'); 50 | test.equal(els[0].children[0].raw, 'The title'); 51 | }); 52 | test.done(); 53 | }, 54 | 55 | one_tag_many: function(test) { 56 | runTest(test, function(dom) { 57 | var els = select(dom, 'div'); 58 | test.equal(els.length, 3); 59 | els.forEach(function(div) { 60 | test.equal(div.name, 'div'); 61 | }); 62 | }); 63 | test.done(); 64 | }, 65 | 66 | tag_in_tag_one: function(test) { 67 | runTest(test, function(dom) { 68 | assertSelects(test, dom, 'div div', ['inner']); 69 | }); 70 | test.done(); 71 | }, 72 | 73 | tags_in_tags: function(test) { 74 | runTest(test, function(dom) { 75 | assertSelects(test, dom, 'span span', ['yx', 'yy']); 76 | }); 77 | test.done(); 78 | }, 79 | 80 | tag_in_tag_many: function(test) { 81 | ['html div', 'html body div', 'body div'].forEach(function(selector) { 82 | runTest(test, function(dom) { 83 | assertSelects(test, dom, selector, ['main', 'inner', 'footer']); 84 | }); 85 | }); 86 | test.done(); 87 | }, 88 | 89 | tag_no_match: function(test) { 90 | runTest(test, function(dom) { 91 | test.equal(select(dom, 'del').length, 0); 92 | }); 93 | test.done(); 94 | }, 95 | 96 | tag_invalid_tag: function(test) { 97 | runTest(test, function(dom) { 98 | test.equal(select(dom, 'tag%t').length, 0); 99 | }); 100 | test.done(); 101 | }, 102 | 103 | header_tags: function(test) { 104 | runTest(test, function(dom) { 105 | assertSelectMultiple(test, dom, [ 106 | ['h1', ['header1']], 107 | ['h2', ['header2', 'header3']] 108 | ]); 109 | }); 110 | test.done(); 111 | }, 112 | 113 | class_one: function(test) { 114 | runTest(test, function(dom) { 115 | ['.onep', 'p.onep', 'html p.onep'].forEach(function(selector) { 116 | var els = select(dom, selector); 117 | test.equal(els.length, 1); 118 | test.equal(els[0].name, 'p'); 119 | test.equal(els[0].attribs.class, 'onep'); 120 | }); 121 | }); 122 | test.done(); 123 | }, 124 | 125 | class_mismatched_tag: function(test) { 126 | runTest(test, function(dom) { 127 | var els = select(dom, 'div.onep'); 128 | test.equal(els.length, 0); 129 | }); 130 | test.done(); 131 | }, 132 | 133 | multi_class: function(test) { 134 | runTest(test, function(dom) { 135 | var els = select(dom, 'p.class1.class2.class3'); 136 | test.equal(els.length, 1); 137 | }); 138 | test.done(); 139 | }, 140 | 141 | one_id: function(test) { 142 | runTest(test, function(dom) { 143 | ['div#inner', '#inner', 'div div#inner'].forEach(function(selector) { 144 | assertSelects(test, dom, selector, ['inner']); 145 | }); 146 | }); 147 | test.done(); 148 | }, 149 | 150 | bad_id: function(test) { 151 | runTest(test, function(dom) { 152 | var els = select(dom, '#doesnotexist'); 153 | test.equal(els.length, 0); 154 | }); 155 | test.done(); 156 | }, 157 | 158 | items_in_id: function(test) { 159 | runTest(test, function(dom) { 160 | var els = select(dom, 'div#inner p'); 161 | test.equal(els.length, 3); 162 | els.forEach(function(el) { 163 | test.equal(el.name, 'p'); 164 | }); 165 | test.equal(els[1].attribs.class, 'onep'); 166 | 167 | // attribs not created when none around - should really be checking there's no class attribute 168 | test.ok(typeof els[0].attribs == 'undefined'); 169 | test.done(); 170 | }); 171 | }, 172 | 173 | a_bunch_of_emptys: function(test) { 174 | runTest(test, function(dom) { 175 | ['div#main del', 'div#main div.oops', 'div div#main'].forEach(function(selector) { 176 | test.equal(select(dom, selector).length, 0); 177 | }); 178 | }); 179 | test.done(); 180 | }, 181 | 182 | multi_class_support: function(test) { 183 | runTest(test, function(dom) { 184 | ['.class1', 'p.class1', '.class2', 'p.class2', 185 | '.class3', 'p.class3', 'html p.class2', 186 | 'div#inner .class2'].forEach(function(selector) { 187 | assertSelects(test, dom, selector, ['pmulti']); 188 | }); 189 | }); 190 | test.done(); 191 | }, 192 | 193 | } 194 | 195 | exports.attributeSelectors = { 196 | 197 | attribute_equals: function(test) { 198 | runTest(test, function(dom) { 199 | assertSelectMultiple(test, dom, [ 200 | ['p[class="onep"]', ['p1']], 201 | ['p[id="p1"]', ['p1']], 202 | ['[class="onep"]', ['p1']], 203 | ['[id="p1"]', ['p1']], 204 | ['link[rel="stylesheet"]', ['l1']], 205 | ['link[type="text/css"]', ['l1']], 206 | ['link[href="blah.css"]', ['l1']], 207 | ['link[href="no-blah.css"]', []], 208 | ['[rel="stylesheet"]', ['l1']], 209 | ['[type="text/css"]', ['l1']], 210 | ['[href="blah.css"]', ['l1']], 211 | ['[href="no-blah.css"]', []], 212 | ['p[href="no-blah.css"]', []], 213 | ['[href="no-blah.css"]', []], 214 | ]); 215 | }); 216 | test.done(); 217 | }, 218 | 219 | attribute_tilde: function(test) { 220 | runTest(test, function(dom) { 221 | assertSelectMultiple(test, dom, [ 222 | ['p[class~="class1"]', ['pmulti']], 223 | ['p[class~="class2"]', ['pmulti']], 224 | ['p[class~="class3"]', ['pmulti']], 225 | ['[class~="class1"]', ['pmulti']], 226 | ['[class~="class2"]', ['pmulti']], 227 | ['[class~="class3"]', ['pmulti']], 228 | ['a[rel~="friend"]', ['bob']], 229 | ['a[rel~="met"]', ['bob']], 230 | ['[rel~="friend"]', ['bob']], 231 | ['[rel~="met"]', ['bob']], 232 | ]); 233 | }); 234 | test.done(); 235 | }, 236 | 237 | attribute_startswith: function(test) { 238 | runTest(test, function(dom) { 239 | assertSelectMultiple(test, dom, [ 240 | ['[rel^="style"]', ['l1']], 241 | ['link[rel^="style"]', ['l1']], 242 | ['notlink[rel^="notstyle"]', []], 243 | ['[rel^="notstyle"]', []], 244 | ['link[rel^="notstyle"]', []], 245 | ['link[href^="bla"]', ['l1']], 246 | ['a[href^="http://"]', ['bob', 'me']], 247 | ['[href^="http://"]', ['bob', 'me']], 248 | ['[id^="p"]', ['pmulti', 'p1']], 249 | ['[id^="m"]', ['me', 'main']], 250 | ['div[id^="m"]', ['main']], 251 | ['a[id^="m"]', ['me']], 252 | ]); 253 | }); 254 | test.done(); 255 | }, 256 | 257 | attribute_endswith: function(test) { 258 | runTest(test, function(dom) { 259 | assertSelectMultiple(test, dom, [ 260 | ['[href$=".css"]', ['l1']], 261 | ['link[href$=".css"]', ['l1']], 262 | ['link[id$="1"]', ['l1']], 263 | ['[id$="1"]', ['l1', 'p1', 'header1']], 264 | ['div[id$="1"]', []], 265 | ['[id$="noending"]', []], 266 | ]); 267 | }); 268 | test.done(); 269 | }, 270 | 271 | attribute_contains: function(test) { 272 | runTest(test, function(dom) { 273 | assertSelectMultiple(test, dom, [ 274 | // From test_attribute_startswith 275 | ['[rel*="style"]', ['l1']], 276 | ['link[rel*="style"]', ['l1']], 277 | ['notlink[rel*="notstyle"]', []], 278 | ['[rel*="notstyle"]', []], 279 | ['link[rel*="notstyle"]', []], 280 | ['link[href*="bla"]', ['l1']], 281 | ['a[href*="http://"]', ['bob', 'me']], 282 | ['[href*="http://"]', ['bob', 'me']], 283 | ['[id*="p"]', ['pmulti', 'p1']], 284 | ['div[id*="m"]', ['main']], 285 | ['a[id*="m"]', ['me']], 286 | // From test_attribute_endswith 287 | ['[href*=".css"]', ['l1']], 288 | ['link[href*=".css"]', ['l1']], 289 | ['link[id*="1"]', ['l1']], 290 | ['[id*="1"]', ['l1', 'p1', 'header1']], 291 | ['div[id*="1"]', []], 292 | ['[id*="noending"]', []], 293 | // New for this test 294 | ['[href*="."]', ['bob', 'me', 'l1']], 295 | ['a[href*="."]', ['bob', 'me']], 296 | ['link[href*="."]', ['l1']], 297 | ['div[id*="n"]', ['main', 'inner']], 298 | ['div[id*="nn"]', ['inner']], 299 | ]); 300 | }); 301 | test.done(); 302 | }, 303 | 304 | attribute_exact_or_hypen: function(test) { 305 | runTest(test, function(dom) { 306 | assertSelectMultiple(test, dom, [ 307 | ['p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']], 308 | ['[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']], 309 | ['p[lang|="fr"]', ['lang-fr']], 310 | ['p[lang|="gb"]', []], 311 | ]); 312 | }); 313 | test.done(); 314 | }, 315 | 316 | attribute_exists: function(test) { 317 | runTest(test, function(dom) { 318 | assertSelectMultiple(test, dom, [ 319 | ['[rel]', ['l1', 'bob', 'me']], 320 | ['link[rel]', ['l1']], 321 | ['a[rel]', ['bob', 'me']], 322 | ['[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']], 323 | ['p[class]', ['p1', 'pmulti']], 324 | ['[blah]', []], 325 | ['p[blah]', []], 326 | ]); 327 | }); 328 | test.done(); 329 | }, 330 | } 331 | -------------------------------------------------------------------------------- /testdata/benchmark.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 39 | 40 | 41 | 42 | 43 | 44 |
45 |

W3C 46 | 47 |

Selectors

48 | 49 |

W3C Working Draft 15 December 2005

50 | 51 |
52 | 53 |
This version: 54 | 55 |
56 | http://www.w3.org/TR/2005/WD-css3-selectors-20051215 57 | 58 |
Latest version: 59 | 60 |
61 | http://www.w3.org/TR/css3-selectors 62 | 63 |
Previous version: 64 | 65 |
66 | http://www.w3.org/TR/2001/CR-css3-selectors-20011113 67 | 68 |
Editors: 69 | 70 |
Daniel Glazman (Invited Expert)
71 | 72 |
Tantek Çelik (Invited Expert) 73 | 74 |
Ian Hickson (Google) 76 | 77 |
Peter Linss (former editor, Netscape/AOL) 79 | 80 |
John Williams (former editor, Quark, Inc.) 82 | 83 |
84 | 85 |
103 | 104 |
105 | 106 |

Abstract

107 | 108 |

Selectors are patterns that match against elements in a 109 | tree. Selectors have been optimized for use with HTML and XML, and 110 | are designed to be usable in performance-critical code.

111 | 112 |

CSS (Cascading 113 | Style Sheets) is a language for describing the rendering of HTML and XML documents on 116 | screen, on paper, in speech, etc. CSS uses Selectors for binding 117 | style properties to elements in the document. This document 118 | describes extensions to the selectors defined in CSS level 2. These 119 | extended selectors will be used by CSS level 3. 120 | 121 |

Selectors define the following function:

122 | 123 |
expression ∗ element → boolean
124 | 125 |

That is, given an element and a selector, this specification 126 | defines whether that element matches the selector.

127 | 128 |

These expressions can also be used, for instance, to select a set 129 | of elements, or a single element from a set of elements, by 130 | evaluating the expression across all the elements in a 131 | subtree. STTS (Simple Tree Transformation Sheets), a 133 | language for transforming XML trees, uses this mechanism. [STTS]

134 | 135 |

Status of this document

136 | 137 |

This section describes the status of this document at the 138 | time of its publication. Other documents may supersede this 139 | document. A list of current W3C publications and the latest revision 140 | of this technical report can be found in the W3C technical reports index at 142 | http://www.w3.org/TR/.

143 | 144 |

This document describes the selectors that already exist in CSS1 and CSS2, and 147 | also proposes new selectors for CSS3 and other languages that may need them.

149 | 150 |

The CSS Working Group doesn't expect that all implementations of 151 | CSS3 will have to implement all selectors. Instead, there will 152 | probably be a small number of variants of CSS3, called profiles. For 153 | example, it may be that only a profile for interactive user agents 154 | will include all of the selectors.

155 | 156 |

This specification is a last call working draft for the the CSS Working Group 158 | (Style Activity). This 159 | document is a revision of the Candidate 161 | Recommendation dated 2001 November 13, and has incorporated 162 | implementation feedback received in the past few years. It is 163 | expected that this last call will proceed straight to Proposed 164 | Recommendation stage since it is believed that interoperability will 165 | be demonstrable.

166 | 167 |

All persons are encouraged to review and implement this 168 | specification and return comments to the (archived) 170 | public mailing list www-style 172 | (see instructions). W3C 173 | Members can also send comments directly to the CSS Working 174 | Group. 175 | The deadline for comments is 14 January 2006.

176 | 177 |

This is still a draft document and may be updated, replaced, or 178 | obsoleted by other documents at any time. It is inappropriate to 179 | cite a W3C Working Draft as other than "work in progress". 180 | 181 |

This document may be available in translation. 183 | The English version of this specification is the only normative 184 | version. 185 | 186 |

187 | 188 |

Table of contents

189 | 190 | 276 | 277 |
278 | 279 |

1. Introduction

280 | 281 |

1.1. Dependencies

282 | 283 |

Some features of this specification are specific to CSS, or have 284 | particular limitations or rules specific to CSS. In this 285 | specification, these have been described in terms of CSS2.1. [CSS21]

287 | 288 |

1.2. Terminology

289 | 290 |

All of the text of this specification is normative except 291 | examples, notes, and sections explicitly marked as 292 | non-normative.

293 | 294 |

1.3. Changes from CSS2

295 | 296 |

This section is non-normative.

297 | 298 |

The main differences between the selectors in CSS2 and those in 299 | Selectors are: 300 | 301 |

333 | 334 |

2. Selectors

335 | 336 |

This section is non-normative, as it merely summarizes the 337 | following sections.

338 | 339 |

A Selector represents a structure. This structure can be used as a 340 | condition (e.g. in a CSS rule) that determines which elements a 341 | selector matches in the document tree, or as a flat description of the 342 | HTML or XML fragment corresponding to that structure.

343 | 344 |

Selectors may range from simple element names to rich contextual 345 | representations.

346 | 347 |

The following table summarizes the Selector syntax:

348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 363 | 364 | 365 | 366 | 367 | 369 | 370 | 371 | 372 | 373 | 376 | 377 | 378 | 379 | 381 | 384 | 385 | 386 | 387 | 389 | 392 | 393 | 394 | 395 | 397 | 400 | 401 | 402 | 403 | 405 | 408 | 409 | 410 | 411 | 413 | 416 | 417 | 418 | 419 | 421 | 424 | 425 | 426 | 427 | 428 | 431 | 432 | 433 | 434 | 435 | 438 | 439 | 440 | 441 | 443 | 446 | 447 | 448 | 449 | 450 | 453 | 454 | 455 | 456 | 458 | 461 | 462 | 463 | 464 | 465 | 468 | 469 | 470 | 471 | 472 | 475 | 476 | 477 | 478 | 479 | 482 | 483 | 484 | 485 | 486 | 489 | 490 | 491 | 492 | 493 | 496 | 497 | 498 | 499 | 500 | 503 | 504 | 505 | 506 | 508 | 511 | 512 | 513 | 514 | 517 | 520 | 521 | 522 | 523 | 524 | 527 | 528 | 529 | 530 | 531 | 534 | 535 | 536 | 537 | 539 | 542 | 543 | 544 | 545 | 547 | 550 | 551 | 552 | 553 | 555 | 558 | 559 | 560 | 561 | 562 | 565 | 566 | 567 | 568 | 569 | 572 | 573 | 574 | 575 | 577 | 580 | 581 | 582 | 583 | 584 | 587 | 588 | 589 | 590 | 591 | 594 | 595 | 596 | 597 | 599 | 602 | 603 | 604 | 605 | 606 | 609 | 610 | 611 | 612 | 613 | 616 | 617 | 618 | 619 | 620 | 623 | 624 | 625 | 626 | 627 | 630 | 631 | 632 | 633 | 634 | 636 | 637 | 638 | 639 | 640 | 642 |
PatternMeaningDescribed in sectionFirst defined in CSS level
*any elementUniversal 362 | selector2
Ean element of type EType selector1
E[foo]an E element with a "foo" attributeAttribute 375 | selectors2
E[foo="bar"]an E element whose "foo" attribute value is exactly 380 | equal to "bar"Attribute 383 | selectors2
E[foo~="bar"]an E element whose "foo" attribute value is a list of 388 | space-separated values, one of which is exactly equal to "bar"Attribute 391 | selectors2
E[foo^="bar"]an E element whose "foo" attribute value begins exactly 396 | with the string "bar"Attribute 399 | selectors3
E[foo$="bar"]an E element whose "foo" attribute value ends exactly 404 | with the string "bar"Attribute 407 | selectors3
E[foo*="bar"]an E element whose "foo" attribute value contains the 412 | substring "bar"Attribute 415 | selectors3
E[hreflang|="en"]an E element whose "hreflang" attribute has a hyphen-separated 420 | list of values beginning (from the left) with "en"Attribute 423 | selectors2
E:rootan E element, root of the documentStructural 430 | pseudo-classes3
E:nth-child(n)an E element, the n-th child of its parentStructural 437 | pseudo-classes3
E:nth-last-child(n)an E element, the n-th child of its parent, counting 442 | from the last oneStructural 445 | pseudo-classes3
E:nth-of-type(n)an E element, the n-th sibling of its typeStructural 452 | pseudo-classes3
E:nth-last-of-type(n)an E element, the n-th sibling of its type, counting 457 | from the last oneStructural 460 | pseudo-classes3
E:first-childan E element, first child of its parentStructural 467 | pseudo-classes2
E:last-childan E element, last child of its parentStructural 474 | pseudo-classes3
E:first-of-typean E element, first sibling of its typeStructural 481 | pseudo-classes3
E:last-of-typean E element, last sibling of its typeStructural 488 | pseudo-classes3
E:only-childan E element, only child of its parentStructural 495 | pseudo-classes3
E:only-of-typean E element, only sibling of its typeStructural 502 | pseudo-classes3
E:emptyan E element that has no children (including text 507 | nodes)Structural 510 | pseudo-classes3
E:link
E:visited
an E element being the source anchor of a hyperlink of 515 | which the target is not yet visited (:link) or already visited 516 | (:visited)The link 519 | pseudo-classes1
E:active
E:hover
E:focus
an E element during certain user actionsThe user 526 | action pseudo-classes1 and 2
E:targetan E element being the target of the referring URIThe target 533 | pseudo-class3
E:lang(fr)an element of type E in language "fr" (the document 538 | language specifies how language is determined)The :lang() 541 | pseudo-class2
E:enabled
E:disabled
a user interface element E which is enabled or 546 | disabledThe UI element states 549 | pseudo-classes3
E:checkeda user interface element E which is checked (for instance a radio-button or checkbox)The UI element states 557 | pseudo-classes3
E::first-linethe first formatted line of an E elementThe ::first-line 564 | pseudo-element1
E::first-letterthe first formatted letter of an E elementThe ::first-letter 571 | pseudo-element1
E::selectionthe portion of an E element that is currently 576 | selected/highlighted by the userThe UI element 579 | fragments pseudo-elements3
E::beforegenerated content before an E elementThe ::before 586 | pseudo-element2
E::aftergenerated content after an E elementThe ::after 593 | pseudo-element2
E.warningan E element whose class is 598 | "warning" (the document language specifies how class is determined).Class 601 | selectors1
E#myidan E element with ID equal to "myid".ID 608 | selectors1
E:not(s)an E element that does not match simple selector sNegation 615 | pseudo-class3
E Fan F element descendant of an E elementDescendant 622 | combinator1
E > Fan F element child of an E elementChild 629 | combinator2
E + Fan F element immediately preceded by an E elementAdjacent sibling combinator2
E ~ Fan F element preceded by an E elementGeneral sibling combinator3
643 | 644 |

The meaning of each selector is derived from the table above by 645 | prepending "matches" to the contents of each cell in the "Meaning" 646 | column.

647 | 648 |

3. Case sensitivity

649 | 650 |

The case sensitivity of document language element names, attribute 651 | names, and attribute values in selectors depends on the document 652 | language. For example, in HTML, element names are case-insensitive, 653 | but in XML, they are case-sensitive.

654 | 655 |

4. Selector syntax

656 | 657 |

A selector is a chain of one 658 | or more sequences of simple selectors 659 | separated by combinators.

660 | 661 |

A sequence of simple selectors 662 | is a chain of simple selectors 663 | that are not separated by a combinator. It 664 | always begins with a type selector or a 665 | universal selector. No other type 666 | selector or universal selector is allowed in the sequence.

667 | 668 |

A simple selector is either a type selector, universal selector, attribute selector, class selector, ID selector, content selector, or pseudo-class. One pseudo-element may be appended to the last 678 | sequence of simple selectors.

679 | 680 |

Combinators are: white space, "greater-than 681 | sign" (U+003E, >), "plus sign" (U+002B, 682 | +) and "tilde" (U+007E, ~). White 683 | space may appear between a combinator and the simple selectors around 684 | it. Only the characters "space" (U+0020), "tab" 685 | (U+0009), "line feed" (U+000A), "carriage return" (U+000D), and "form 686 | feed" (U+000C) can occur in white space. Other space-like characters, 687 | such as "em-space" (U+2003) and "ideographic space" (U+3000), are 688 | never part of white space.

689 | 690 |

The elements of a document tree that are represented by a selector 691 | are the subjects of the selector. A 692 | selector consisting of a single sequence of simple selectors 693 | represents any element satisfying its requirements. Prepending another 694 | sequence of simple selectors and a combinator to a sequence imposes 695 | additional matching constraints, so the subjects of a selector are 696 | always a subset of the elements represented by the last sequence of 697 | simple selectors.

698 | 699 |

An empty selector, containing no sequence of simple selectors and 700 | no pseudo-element, is an invalid 701 | selector.

702 | 703 |

5. Groups of selectors

704 | 705 |

When several selectors share the same declarations, they may be 706 | grouped into a comma-separated list. (A comma is U+002C.)

707 | 708 |
709 |

CSS examples:

710 |

In this example, we condense three rules with identical 711 | declarations into one. Thus,

712 |
h1 { font-family: sans-serif }
 713 | h2 { font-family: sans-serif }
 714 | h3 { font-family: sans-serif }
715 |

is equivalent to:

716 |
h1, h2, h3 { font-family: sans-serif }
717 |
718 | 719 |

Warning: the equivalence is true in this example 720 | because all the selectors are valid selectors. If just one of these 721 | selectors were invalid, the entire group of selectors would be 722 | invalid. This would invalidate the rule for all three heading 723 | elements, whereas in the former case only one of the three individual 724 | heading rules would be invalidated.

725 | 726 | 727 |

6. Simple selectors

728 | 729 |

6.1. Type selector

730 | 731 |

A type selector is the name of a document language 732 | element type. A type selector represents an instance of the element 733 | type in the document tree.

734 | 735 |
736 |

Example:

737 |

The following selector represents an h1 element in the document tree:

738 |
h1
739 |
740 | 741 | 742 |

6.1.1. Type selectors and namespaces

743 | 744 |

Type selectors allow an optional namespace ([XMLNAMES]) component. A namespace prefix 746 | that has been previously declared may be prepended to the element name 747 | separated by the namespace separator "vertical bar" 748 | (U+007C, |).

749 | 750 |

The namespace component may be left empty to indicate that the 751 | selector is only to represent elements with no declared namespace.

752 | 753 |

An asterisk may be used for the namespace prefix, indicating that 754 | the selector represents elements in any namespace (including elements 755 | with no namespace).

756 | 757 |

Element type selectors that have no namespace component (no 758 | namespace separator), represent elements without regard to the 759 | element's namespace (equivalent to "*|") unless a default 760 | namespace has been declared. If a default namespace has been declared, 761 | the selector will represent only elements in the default 762 | namespace.

763 | 764 |

A type selector containing a namespace prefix that has not been 765 | previously declared is an invalid selector. 766 | The mechanism for declaring a namespace prefix is left up to the 767 | language implementing Selectors. In CSS, such a mechanism is defined 768 | in the General Syntax module.

769 | 770 |

In a namespace-aware client, element type selectors will only match 771 | against the local part 773 | of the element's qualified 775 | name. See below for notes about matching 776 | behaviors in down-level clients.

777 | 778 |

In summary:

779 | 780 |
781 |
ns|E
782 |
elements with name E in namespace ns
783 |
*|E
784 |
elements with name E in any namespace, including those without any 785 | declared namespace
786 |
|E
787 |
elements with name E without any declared namespace
788 |
E
789 |
if no default namespace has been specified, this is equivalent to *|E. 790 | Otherwise it is equivalent to ns|E where ns is the default namespace.
791 |
792 | 793 |
794 |

CSS examples:

795 | 796 |
@namespace foo url(http://www.example.com);
 797 |  foo|h1 { color: blue }
 798 |  foo|* { color: yellow }
 799 |  |h1 { color: red }
 800 |  *|h1 { color: green }
 801 |  h1 { color: green }
802 | 803 |

The first rule will match only h1 elements in the 804 | "http://www.example.com" namespace.

805 | 806 |

The second rule will match all elements in the 807 | "http://www.example.com" namespace.

808 | 809 |

The third rule will match only h1 elements without 810 | any declared namespace.

811 | 812 |

The fourth rule will match h1 elements in any 813 | namespace (including those without any declared namespace).

814 | 815 |

The last rule is equivalent to the fourth rule because no default 816 | namespace has been defined.

817 | 818 |
819 | 820 |

6.2. Universal selector

821 | 822 |

The universal selector, written "asterisk" 823 | (*), represents the qualified name of any element 824 | type. It represents any single element in the document tree in any 825 | namespace (including those without any declared namespace) if no 826 | default namespace has been specified. If a default namespace has been 827 | specified, see Universal selector and 828 | Namespaces below.

829 | 830 |

If the universal selector is not the only component of a sequence 831 | of simple selectors, the * may be omitted.

832 | 833 |
834 |

Examples:

835 | 840 |
841 | 842 |

Note: it is recommended that the 843 | *, representing the universal selector, not be 844 | omitted.

845 | 846 |

6.2.1. Universal selector and namespaces

847 | 848 |

The universal selector allows an optional namespace component. It 849 | is used as follows:

850 | 851 |
852 |
ns|*
853 |
all elements in namespace ns
854 |
*|*
855 |
all elements
856 |
|*
857 |
all elements without any declared namespace
858 |
*
859 |
if no default namespace has been specified, this is equivalent to *|*. 860 | Otherwise it is equivalent to ns|* where ns is the default namespace.
861 |
862 | 863 |

A universal selector containing a namespace prefix that has not 864 | been previously declared is an invalid 865 | selector. The mechanism for declaring a namespace prefix is left up 866 | to the language implementing Selectors. In CSS, such a mechanism is 867 | defined in the General Syntax module.

868 | 869 | 870 |

6.3. Attribute selectors

871 | 872 |

Selectors allow the representation of an element's attributes. When 873 | a selector is used as an expression to match against an element, 874 | attribute selectors must be considered to match an element if that 875 | element has an attribute that matches the attribute represented by the 876 | attribute selector.

877 | 878 |

6.3.1. Attribute presence and values 879 | selectors

880 | 881 |

CSS2 introduced four attribute selectors:

882 | 883 |
884 |
[att] 885 |
Represents an element with the att attribute, whatever the value of 886 | the attribute.
887 |
[att=val]
888 |
Represents an element with the att attribute whose value is exactly 889 | "val".
890 |
[att~=val]
891 |
Represents an element with the att attribute whose value is a whitespace-separated list of words, one of 893 | which is exactly "val". If "val" contains whitespace, it will never 894 | represent anything (since the words are separated by 895 | spaces).
896 |
[att|=val] 897 |
Represents an element with the att attribute, its value either 898 | being exactly "val" or beginning with "val" immediately followed by 899 | "-" (U+002D). This is primarily intended to allow language subcode 900 | matches (e.g., the hreflang attribute on the 901 | link element in HTML) as described in RFC 3066 ([RFC3066]). For lang (or 903 | xml:lang) language subcode matching, please see the :lang pseudo-class.
905 |
906 | 907 |

Attribute values must be identifiers or strings. The 908 | case-sensitivity of attribute names and values in selectors depends on 909 | the document language.

910 | 911 |
912 | 913 |

Examples:

914 | 915 |

The following attribute selector represents an h1 916 | element that carries the title attribute, whatever its 917 | value:

918 | 919 |
h1[title]
920 | 921 |

In the following example, the selector represents a 922 | span element whose class attribute has 923 | exactly the value "example":

924 | 925 |
span[class="example"]
926 | 927 |

Multiple attribute selectors can be used to represent several 928 | attributes of an element, or several conditions on the same 929 | attribute. Here, the selector represents a span element 930 | whose hello attribute has exactly the value "Cleveland" 931 | and whose goodbye attribute has exactly the value 932 | "Columbus":

933 | 934 |
span[hello="Cleveland"][goodbye="Columbus"]
935 | 936 |

The following selectors illustrate the differences between "=" 937 | and "~=". The first selector will represent, for example, the value 938 | "copyright copyleft copyeditor" on a rel attribute. The 939 | second selector will only represent an a element with 940 | an href attribute having the exact value 941 | "http://www.w3.org/".

942 | 943 |
a[rel~="copyright"]
 944 | a[href="http://www.w3.org/"]
945 | 946 |

The following selector represents a link element 947 | whose hreflang attribute is exactly "fr".

948 | 949 |
link[hreflang=fr]
950 | 951 |

The following selector represents a link element for 952 | which the values of the hreflang attribute begins with 953 | "en", including "en", "en-US", and "en-cockney":

954 | 955 |
link[hreflang|="en"]
956 | 957 |

Similarly, the following selectors represents a 958 | DIALOGUE element whenever it has one of two different 959 | values for an attribute character:

960 | 961 |
DIALOGUE[character=romeo]
 962 | DIALOGUE[character=juliet]
963 | 964 |
965 | 966 |

6.3.2. Substring matching attribute 967 | selectors

968 | 969 |

Three additional attribute selectors are provided for matching 970 | substrings in the value of an attribute:

971 | 972 |
973 |
[att^=val]
974 |
Represents an element with the att attribute whose value begins 975 | with the prefix "val".
976 |
[att$=val] 977 |
Represents an element with the att attribute whose value ends with 978 | the suffix "val".
979 |
[att*=val] 980 |
Represents an element with the att attribute whose value contains 981 | at least one instance of the substring "val".
982 |
983 | 984 |

Attribute values must be identifiers or strings. The 985 | case-sensitivity of attribute names in selectors depends on the 986 | document language.

987 | 988 |
989 |

Examples:

990 |

The following selector represents an HTML object, referencing an 991 | image:

992 |
object[type^="image/"]
993 |

The following selector represents an HTML anchor a with an 994 | href attribute whose value ends with ".html".

995 |
a[href$=".html"]
996 |

The following selector represents an HTML paragraph with a title 997 | attribute whose value contains the substring "hello"

998 |
p[title*="hello"]
999 |
1000 | 1001 |

6.3.3. Attribute selectors and namespaces

1002 | 1003 |

Attribute selectors allow an optional namespace component to the 1004 | attribute name. A namespace prefix that has been previously declared 1005 | may be prepended to the attribute name separated by the namespace 1006 | separator "vertical bar" (|). In keeping with 1007 | the Namespaces in the XML recommendation, default namespaces do not 1008 | apply to attributes, therefore attribute selectors without a namespace 1009 | component apply only to attributes that have no declared namespace 1010 | (equivalent to "|attr"). An asterisk may be used for the 1011 | namespace prefix indicating that the selector is to match all 1012 | attribute names without regard to the attribute's namespace. 1013 | 1014 |

An attribute selector with an attribute name containing a namespace 1015 | prefix that has not been previously declared is an invalid selector. The mechanism for declaring 1017 | a namespace prefix is left up to the language implementing Selectors. 1018 | In CSS, such a mechanism is defined in the General Syntax module. 1019 | 1020 |

1021 |

CSS examples:

1022 |
@namespace foo "http://www.example.com";
1023 | [foo|att=val] { color: blue }
1024 | [*|att] { color: yellow }
1025 | [|att] { color: green }
1026 | [att] { color: green }
1027 | 1028 |

The first rule will match only elements with the attribute 1029 | att in the "http://www.example.com" namespace with the 1030 | value "val".

1031 | 1032 |

The second rule will match only elements with the attribute 1033 | att regardless of the namespace of the attribute 1034 | (including no declared namespace).

1035 | 1036 |

The last two rules are equivalent and will match only elements 1037 | with the attribute att where the attribute is not 1038 | declared to be in a namespace.

1039 | 1040 |
1041 | 1042 |

6.3.4. Default attribute values in DTDs

1043 | 1044 |

Attribute selectors represent explicitly set attribute values in 1045 | the document tree. Default attribute values may be defined in a DTD or 1046 | elsewhere, but cannot always be selected by attribute 1047 | selectors. Selectors should be designed so that they work even if the 1048 | default values are not included in the document tree.

1049 | 1050 |

More precisely, a UA is not required to read an "external 1051 | subset" of the DTD but is required to look for default 1052 | attribute values in the document's "internal subset." (See [XML10] for definitions of these subsets.)

1054 | 1055 |

A UA that recognizes an XML namespace [XMLNAMES] is not required to use its 1057 | knowledge of that namespace to treat default attribute values as if 1058 | they were present in the document. (For example, an XHTML UA is not 1059 | required to use its built-in knowledge of the XHTML DTD.)

1060 | 1061 |

Note: Typically, implementations 1062 | choose to ignore external subsets.

1063 | 1064 |
1065 |

Example:

1066 | 1067 |

Consider an element EXAMPLE with an attribute "notation" that has a 1068 | default value of "decimal". The DTD fragment might be

1069 | 1070 |
<!ATTLIST EXAMPLE notation (decimal,octal) "decimal">
1071 | 1072 |

If the style sheet contains the rules

1073 | 1074 |
EXAMPLE[notation=decimal] { /*... default property settings ...*/ }
1075 | EXAMPLE[notation=octal]   { /*... other settings...*/ }
1076 | 1077 |

the first rule will not match elements whose "notation" attribute 1078 | is set by default, i.e. not set explicitly. To catch all cases, the 1079 | attribute selector for the default value must be dropped:

1080 | 1081 |
EXAMPLE                   { /*... default property settings ...*/ }
1082 | EXAMPLE[notation=octal]   { /*... other settings...*/ }
1083 | 1084 |

Here, because the selector EXAMPLE[notation=octal] is 1085 | more specific than the tag 1086 | selector alone, the style declarations in the second rule will override 1087 | those in the first for elements that have a "notation" attribute value 1088 | of "octal". Care has to be taken that all property declarations that 1089 | are to apply only to the default case are overridden in the non-default 1090 | cases' style rules.

1091 | 1092 |
1093 | 1094 |

6.4. Class selectors

1095 | 1096 |

Working with HTML, authors may use the period (U+002E, 1097 | .) notation as an alternative to the ~= 1098 | notation when representing the class attribute. Thus, for 1099 | HTML, div.value and div[class~=value] have 1100 | the same meaning. The attribute value must immediately follow the 1101 | "period" (.).

1102 | 1103 |

UAs may apply selectors using the period (.) notation in XML 1104 | documents if the UA has namespace-specific knowledge that allows it to 1105 | determine which attribute is the "class" attribute for the 1106 | respective namespace. One such example of namespace-specific knowledge 1107 | is the prose in the specification for a particular namespace (e.g. SVG 1108 | 1.0 [SVG] describes the SVG 1110 | "class" attribute and how a UA should interpret it, and 1111 | similarly MathML 1.01 [MATH] describes the MathML 1113 | "class" attribute.)

1114 | 1115 |
1116 |

CSS examples:

1117 | 1118 |

We can assign style information to all elements with 1119 | class~="pastoral" as follows:

1120 | 1121 |
*.pastoral { color: green }  /* all elements with class~=pastoral */
1122 | 1123 |

or just

1124 | 1125 |
.pastoral { color: green }  /* all elements with class~=pastoral */
1126 | 1127 |

The following assigns style only to H1 elements with 1128 | class~="pastoral":

1129 | 1130 |
H1.pastoral { color: green }  /* H1 elements with class~=pastoral */
1131 | 1132 |

Given these rules, the first H1 instance below would not have 1133 | green text, while the second would:

1134 | 1135 |
<H1>Not green</H1>
1136 | <H1 class="pastoral">Very green</H1>
1137 | 1138 |
1139 | 1140 |

To represent a subset of "class" values, each value must be preceded 1141 | by a ".", in any order.

1142 | 1143 |
1144 | 1145 |

CSS example:

1146 | 1147 |

The following rule matches any P element whose "class" attribute 1148 | has been assigned a list of whitespace-separated values that includes 1150 | "pastoral" and "marine":

1151 | 1152 |
p.pastoral.marine { color: green }
1153 | 1154 |

This rule matches when class="pastoral blue aqua 1155 | marine" but does not match for class="pastoral 1156 | blue".

1157 | 1158 |
1159 | 1160 |

Note: Because CSS gives considerable 1161 | power to the "class" attribute, authors could conceivably design their 1162 | own "document language" based on elements with almost no associated 1163 | presentation (such as DIV and SPAN in HTML) and assigning style 1164 | information through the "class" attribute. Authors should avoid this 1165 | practice since the structural elements of a document language often 1166 | have recognized and accepted meanings and author-defined classes may 1167 | not.

1168 | 1169 |

Note: If an element has multiple 1170 | class attributes, their values must be concatenated with spaces 1171 | between the values before searching for the class. As of this time the 1172 | working group is not aware of any manner in which this situation can 1173 | be reached, however, so this behavior is explicitly non-normative in 1174 | this specification.

1175 | 1176 |

6.5. ID selectors

1177 | 1178 |

Document languages may contain attributes that are declared to be 1179 | of type ID. What makes attributes of type ID special is that no two 1180 | such attributes can have the same value in a document, regardless of 1181 | the type of the elements that carry them; whatever the document 1182 | language, an ID typed attribute can be used to uniquely identify its 1183 | element. In HTML all ID attributes are named "id"; XML applications 1184 | may name ID attributes differently, but the same restriction 1185 | applies.

1186 | 1187 |

An ID-typed attribute of a document language allows authors to 1188 | assign an identifier to one element instance in the document tree. W3C 1189 | ID selectors represent an element instance based on its identifier. An 1190 | ID selector contains a "number sign" (U+0023, 1191 | #) immediately followed by the ID value, which must be an 1192 | identifier.

1193 | 1194 |

Selectors does not specify how a UA knows the ID-typed attribute of 1195 | an element. The UA may, e.g., read a document's DTD, have the 1196 | information hard-coded or ask the user. 1197 | 1198 |

1199 |

Examples:

1200 |

The following ID selector represents an h1 element 1201 | whose ID-typed attribute has the value "chapter1":

1202 |
h1#chapter1
1203 |

The following ID selector represents any element whose ID-typed 1204 | attribute has the value "chapter1":

1205 |
#chapter1
1206 |

The following selector represents any element whose ID-typed 1207 | attribute has the value "z98y".

1208 |
*#z98y
1209 |
1210 | 1211 |

Note. In XML 1.0 [XML10], the information about which attribute 1213 | contains an element's IDs is contained in a DTD or a schema. When 1214 | parsing XML, UAs do not always read the DTD, and thus may not know 1215 | what the ID of an element is (though a UA may have namespace-specific 1216 | knowledge that allows it to determine which attribute is the ID 1217 | attribute for that namespace). If a style sheet designer knows or 1218 | suspects that a UA may not know what the ID of an element is, he 1219 | should use normal attribute selectors instead: 1220 | [name=p371] instead of #p371. Elements in 1221 | XML 1.0 documents without a DTD do not have IDs at all.

1222 | 1223 |

If an element has multiple ID attributes, all of them must be 1224 | treated as IDs for that element for the purposes of the ID 1225 | selector. Such a situation could be reached using mixtures of xml:id, 1226 | DOM3 Core, XML DTDs, and namespace-specific knowledge.

1227 | 1228 |

6.6. Pseudo-classes

1229 | 1230 |

The pseudo-class concept is introduced to permit selection based on 1231 | information that lies outside of the document tree or that cannot be 1232 | expressed using the other simple selectors.

1233 | 1234 |

A pseudo-class always consists of a "colon" 1235 | (:) followed by the name of the pseudo-class and 1236 | optionally by a value between parentheses.

1237 | 1238 |

Pseudo-classes are allowed in all sequences of simple selectors 1239 | contained in a selector. Pseudo-classes are allowed anywhere in 1240 | sequences of simple selectors, after the leading type selector or 1241 | universal selector (possibly omitted). Pseudo-class names are 1242 | case-insensitive. Some pseudo-classes are mutually exclusive, while 1243 | others can be applied simultaneously to the same 1244 | element. Pseudo-classes may be dynamic, in the sense that an element 1245 | may acquire or lose a pseudo-class while a user interacts with the 1246 | document.

1247 | 1248 | 1249 |

6.6.1. Dynamic pseudo-classes

1250 | 1251 |

Dynamic pseudo-classes classify elements on characteristics other 1252 | than their name, attributes, or content, in principle characteristics 1253 | that cannot be deduced from the document tree.

1254 | 1255 |

Dynamic pseudo-classes do not appear in the document source or 1256 | document tree.

1257 | 1258 | 1259 |
The link pseudo-classes: :link and :visited
1260 | 1261 |

User agents commonly display unvisited links differently from 1262 | previously visited ones. Selectors 1263 | provides the pseudo-classes :link and 1264 | :visited to distinguish them:

1265 | 1266 | 1272 | 1273 |

After some amount of time, user agents may choose to return a 1274 | visited link to the (unvisited) ':link' state.

1275 | 1276 |

The two states are mutually exclusive.

1277 | 1278 |
1279 | 1280 |

Example:

1281 | 1282 |

The following selector represents links carrying class 1283 | external and already visited:

1284 | 1285 |
a.external:visited
1286 | 1287 |
1288 | 1289 |

Note: It is possible for style sheet 1290 | authors to abuse the :link and :visited pseudo-classes to determine 1291 | which sites a user has visited without the user's consent. 1292 | 1293 |

UAs may therefore treat all links as unvisited links, or implement 1294 | other measures to preserve the user's privacy while rendering visited 1295 | and unvisited links differently.

1296 | 1297 |
The user action pseudo-classes 1298 | :hover, :active, and :focus
1299 | 1300 |

Interactive user agents sometimes change the rendering in response 1301 | to user actions. Selectors provides 1302 | three pseudo-classes for the selection of an element the user is 1303 | acting on.

1304 | 1305 | 1328 | 1329 |

There may be document language or implementation specific limits on 1330 | which elements can become :active or acquire 1331 | :focus.

1332 | 1333 |

These pseudo-classes are not mutually exclusive. An element may 1334 | match several pseudo-classes at the same time.

1335 | 1336 |

Selectors doesn't define if the parent of an element that is 1337 | ':active' or ':hover' is also in that state.

1338 | 1339 |
1340 |

Examples:

1341 |
a:link    /* unvisited links */
1342 | a:visited /* visited links */
1343 | a:hover   /* user hovers */
1344 | a:active  /* active links */
1345 |

An example of combining dynamic pseudo-classes:

1346 |
a:focus
1347 | a:focus:hover
1348 |

The last selector matches a elements that are in 1349 | the pseudo-class :focus and in the pseudo-class :hover.

1350 |
1351 | 1352 |

Note: An element can be both ':visited' 1353 | and ':active' (or ':link' and ':active').

1354 | 1355 |

6.6.2. The target pseudo-class :target

1356 | 1357 |

Some URIs refer to a location within a resource. This kind of URI 1358 | ends with a "number sign" (#) followed by an anchor 1359 | identifier (called the fragment identifier).

1360 | 1361 |

URIs with fragment identifiers link to a certain element within the 1362 | document, known as the target element. For instance, here is a URI 1363 | pointing to an anchor named section_2 in an HTML 1364 | document:

1365 | 1366 |
http://example.com/html/top.html#section_2
1367 | 1368 |

A target element can be represented by the :target 1369 | pseudo-class. If the document's URI has no fragment identifier, then 1370 | the document has no target element.

1371 | 1372 |
1373 |

Example:

1374 |
p.note:target
1375 |

This selector represents a p element of class 1376 | note that is the target element of the referring 1377 | URI.

1378 |
1379 | 1380 |
1381 |

CSS example:

1382 |

Here, the :target pseudo-class is used to make the 1383 | target element red and place an image before it, if there is one:

1384 |
*:target { color : red }
1385 | *:target::before { content : url(target.png) }
1386 |
1387 | 1388 |

6.6.3. The language pseudo-class :lang

1389 | 1390 |

If the document language specifies how the human language of an 1391 | element is determined, it is possible to write selectors that 1392 | represent an element based on its language. For example, in HTML [HTML4], the language is determined by a 1394 | combination of the lang attribute, the meta 1395 | element, and possibly by information from the protocol (such as HTTP 1396 | headers). XML uses an attribute called xml:lang, and 1397 | there may be other document language-specific methods for determining 1398 | the language.

1399 | 1400 |

The pseudo-class :lang(C) represents an element that 1401 | is in language C. Whether an element is represented by a 1402 | :lang() selector is based solely on the identifier C 1403 | being either equal to, or a hyphen-separated substring of, the 1404 | element's language value, in the same way as if performed by the '|=' operator in attribute 1406 | selectors. The identifier C does not have to be a valid language 1407 | name.

1408 | 1409 |

C must not be empty. (If it is, the selector is invalid.)

1410 | 1411 |

Note: It is recommended that 1412 | documents and protocols indicate language using codes from RFC 3066 [RFC3066] or its successor, and by means of 1414 | "xml:lang" attributes in the case of XML-based documents [XML10]. See 1417 | "FAQ: Two-letter or three-letter language codes."

1418 | 1419 |
1420 |

Examples:

1421 |

The two following selectors represent an HTML document that is in 1422 | Belgian, French, or German. The two next selectors represent 1423 | q quotations in an arbitrary element in Belgian, French, 1424 | or German.

1425 |
html:lang(fr-be)
1426 | html:lang(de)
1427 | :lang(fr-be) > q
1428 | :lang(de) > q
1429 |
1430 | 1431 |

6.6.4. The UI element states pseudo-classes

1432 | 1433 |
The :enabled and :disabled pseudo-classes
1434 | 1435 |

The :enabled pseudo-class allows authors to customize 1436 | the look of user interface elements that are enabled — which the 1437 | user can select or activate in some fashion (e.g. clicking on a button 1438 | with a mouse). There is a need for such a pseudo-class because there 1439 | is no way to programmatically specify the default appearance of say, 1440 | an enabled input element without also specifying what it 1441 | would look like when it was disabled.

1442 | 1443 |

Similar to :enabled, :disabled allows the 1444 | author to specify precisely how a disabled or inactive user interface 1445 | element should look.

1446 | 1447 |

Most elements will be neither enabled nor disabled. An element is 1448 | enabled if the user can either activate it or transfer the focus to 1449 | it. An element is disabled if it could be enabled, but the user cannot 1450 | presently activate it or transfer focus to it.

1451 | 1452 | 1453 |
The :checked pseudo-class
1454 | 1455 |

Radio and checkbox elements can be toggled by the user. Some menu 1456 | items are "checked" when the user selects them. When such elements are 1457 | toggled "on" the :checked pseudo-class applies. The 1458 | :checked pseudo-class initially applies to such elements 1459 | that have the HTML4 selected and checked 1460 | attributes as described in Section 1462 | 17.2.1 of HTML4, but of course the user can toggle "off" such 1463 | elements in which case the :checked pseudo-class would no 1464 | longer apply. While the :checked pseudo-class is dynamic 1465 | in nature, and is altered by user action, since it can also be based 1466 | on the presence of the semantic HTML4 selected and 1467 | checked attributes, it applies to all media. 1468 | 1469 | 1470 |

The :indeterminate pseudo-class
1471 | 1472 |
1473 | 1474 |

Radio and checkbox elements can be toggled by the user, but are 1475 | sometimes in an indeterminate state, neither checked nor unchecked. 1476 | This can be due to an element attribute, or DOM manipulation.

1477 | 1478 |

A future version of this specification may introduce an 1479 | :indeterminate pseudo-class that applies to such elements. 1480 |

1486 | 1487 |
1488 | 1489 | 1490 |

6.6.5. Structural pseudo-classes

1491 | 1492 |

Selectors introduces the concept of structural 1493 | pseudo-classes to permit selection based on extra information that lies in 1494 | the document tree but cannot be represented by other simple selectors or 1495 | combinators. 1496 | 1497 |

Note that standalone pieces of PCDATA (text nodes in the DOM) are 1498 | not counted when calculating the position of an element in the list of 1499 | children of its parent. When calculating the position of an element in 1500 | the list of children of its parent, the index numbering starts at 1. 1501 | 1502 | 1503 |

:root pseudo-class
1504 | 1505 |

The :root pseudo-class represents an element that is 1506 | the root of the document. In HTML 4, this is always the 1507 | HTML element. 1508 | 1509 | 1510 |

:nth-child() pseudo-class
1511 | 1512 |

The 1513 | :nth-child(an+b) 1514 | pseudo-class notation represents an element that has 1515 | an+b-1 siblings 1516 | before it in the document tree, for a given positive 1517 | integer or zero value of n, and has a parent element. In 1518 | other words, this matches the bth child of an element after 1519 | all the children have been split into groups of a elements 1520 | each. For example, this allows the selectors to address every other 1521 | row in a table, and could be used to alternate the color 1522 | of paragraph text in a cycle of four. The a and 1523 | b values must be zero, negative integers or positive 1524 | integers. The index of the first child of an element is 1. 1525 | 1526 |

In addition to this, :nth-child() can take 1527 | 'odd' and 'even' as arguments instead. 1528 | 'odd' has the same signification as 2n+1, 1529 | and 'even' has the same signification as 2n. 1530 | 1531 | 1532 |

1533 |

Examples:

1534 |
tr:nth-child(2n+1) /* represents every odd row of an HTML table */
1535 | tr:nth-child(odd)  /* same */
1536 | tr:nth-child(2n)   /* represents every even row of an HTML table */
1537 | tr:nth-child(even) /* same */
1538 | 
1539 | /* Alternate paragraph colours in CSS */
1540 | p:nth-child(4n+1) { color: navy; }
1541 | p:nth-child(4n+2) { color: green; }
1542 | p:nth-child(4n+3) { color: maroon; }
1543 | p:nth-child(4n+4) { color: purple; }
1544 |
1545 | 1546 |

When a=0, no repeating is used, so for example 1547 | :nth-child(0n+5) matches only the fifth child. When 1548 | a=0, the an part need not be 1549 | included, so the syntax simplifies to 1550 | :nth-child(b) and the last example simplifies 1551 | to :nth-child(5). 1552 | 1553 |

1554 |

Examples:

1555 |
foo:nth-child(0n+1)   /* represents an element foo, first child of its parent element */
1556 | foo:nth-child(1)      /* same */
1557 |
1558 | 1559 |

When a=1, the number may be omitted from the rule. 1560 | 1561 |

1562 |

Examples:

1563 |

The following selectors are therefore equivalent:

1564 |
bar:nth-child(1n+0)   /* represents all bar elements, specificity (0,1,1) */
1565 | bar:nth-child(n+0)    /* same */
1566 | bar:nth-child(n)      /* same */
1567 | bar                   /* same but lower specificity (0,0,1) */
1568 |
1569 | 1570 |

If b=0, then every ath element is picked. In 1571 | such a case, the b part may be omitted. 1572 | 1573 |

1574 |

Examples:

1575 |
tr:nth-child(2n+0) /* represents every even row of an HTML table */
1576 | tr:nth-child(2n) /* same */
1577 |
1578 | 1579 |

If both a and b are equal to zero, the 1580 | pseudo-class represents no element in the document tree.

1581 | 1582 |

The value a can be negative, but only the positive 1583 | values of an+b, for 1584 | n≥0, may represent an element in the document 1585 | tree.

1586 | 1587 |
1588 |

Example:

1589 |
html|tr:nth-child(-n+6)  /* represents the 6 first rows of XHTML tables */
1590 |
1591 | 1592 |

When the value b is negative, the "+" character in the 1593 | expression must be removed (it is effectively replaced by the "-" 1594 | character indicating the negative value of b).

1595 | 1596 |
1597 |

Examples:

1598 |
:nth-child(10n-1)  /* represents the 9th, 19th, 29th, etc, element */
1599 | :nth-child(10n+9)  /* Same */
1600 | :nth-child(10n+-1) /* Syntactically invalid, and would be ignored */
1601 |
1602 | 1603 | 1604 |
:nth-last-child() pseudo-class
1605 | 1606 |

The :nth-last-child(an+b) 1607 | pseudo-class notation represents an element that has 1608 | an+b-1 siblings 1609 | after it in the document tree, for a given positive 1610 | integer or zero value of n, and has a parent element. See 1611 | :nth-child() pseudo-class for the syntax of its argument. 1612 | It also accepts the 'even' and 'odd' values 1613 | as arguments. 1614 | 1615 | 1616 |

1617 |

Examples:

1618 |
tr:nth-last-child(-n+2)    /* represents the two last rows of an HTML table */
1619 | 
1620 | foo:nth-last-child(odd)    /* represents all odd foo elements in their parent element,
1621 |                               counting from the last one */
1622 |
1623 | 1624 | 1625 |
:nth-of-type() pseudo-class
1626 | 1627 |

The :nth-of-type(an+b) 1628 | pseudo-class notation represents an element that has 1629 | an+b-1 siblings with the same 1630 | element name before it in the document tree, for a 1631 | given zero or positive integer value of n, and has a 1632 | parent element. In other words, this matches the bth child 1633 | of that type after all the children of that type have been split into 1634 | groups of a elements each. See :nth-child() pseudo-class 1635 | for the syntax of its argument. It also accepts the 1636 | 'even' and 'odd' values. 1637 | 1638 | 1639 |

1640 |

CSS example:

1641 |

This allows an author to alternate the position of floated images:

1642 |
img:nth-of-type(2n+1) { float: right; }
1643 | img:nth-of-type(2n) { float: left; }
1644 |
1645 | 1646 | 1647 |
:nth-last-of-type() pseudo-class
1648 | 1649 |

The :nth-last-of-type(an+b) 1650 | pseudo-class notation represents an element that has 1651 | an+b-1 siblings with the same 1652 | element name after it in the document tree, for a 1653 | given zero or positive integer value of n, and has a 1654 | parent element. See :nth-child() pseudo-class for the 1655 | syntax of its argument. It also accepts the 'even' and 'odd' values. 1656 | 1657 | 1658 |

1659 |

Example:

1660 |

To represent all h2 children of an XHTML 1661 | body except the first and last, one could use the 1662 | following selector:

1663 |
body > h2:nth-of-type(n+2):nth-last-of-type(n+2)
1664 |

In this case, one could also use :not(), although the 1665 | selector ends up being just as long:

1666 |
body > h2:not(:first-of-type):not(:last-of-type)
1667 |
1668 | 1669 | 1670 |
:first-child pseudo-class
1671 | 1672 |

Same as :nth-child(1). The :first-child pseudo-class 1673 | represents an element that is the first child of some other element. 1674 | 1675 | 1676 |

1677 |

Examples:

1678 |

The following selector represents a p element that is 1679 | the first child of a div element:

1680 |
div > p:first-child
1681 |

This selector can represent the p inside the 1682 | div of the following fragment:

1683 |
<p> The last P before the note.</p>
1684 | <div class="note">
1685 |    <p> The first P inside the note.</p>
1686 | </div>
but cannot represent the second p in the following 1687 | fragment: 1688 |
<p> The last P before the note.</p>
1689 | <div class="note">
1690 |    <h2> Note </h2>
1691 |    <p> The first P inside the note.</p>
1692 | </div>
1693 |

The following two selectors are usually equivalent:

1694 |
* > a:first-child /* a is first child of any element */
1695 | a:first-child /* Same (assuming a is not the root element) */
1696 |
1697 | 1698 |
:last-child pseudo-class
1699 | 1700 |

Same as :nth-last-child(1). The :last-child pseudo-class 1701 | represents an element that is the last child of some other element. 1702 | 1703 |

1704 |

Example:

1705 |

The following selector represents a list item li that 1706 | is the last child of an ordered list ol. 1707 |

ol > li:last-child
1708 |
1709 | 1710 |
:first-of-type pseudo-class
1711 | 1712 |

Same as :nth-of-type(1). The :first-of-type pseudo-class 1713 | represents an element that is the first sibling of its type in the list of 1714 | children of its parent element. 1715 | 1716 |

1717 |

Example:

1718 |

The following selector represents a definition title 1719 | dt inside a definition list dl, this 1720 | dt being the first of its type in the list of children of 1721 | its parent element.

1722 |
dl dt:first-of-type
1723 |

It is a valid description for the first two dt 1724 | elements in the following example but not for the third one:

1725 |
<dl>
1726 |  <dt>gigogne</dt>
1727 |  <dd>
1728 |   <dl>
1729 |    <dt>fusée</dt>
1730 |    <dd>multistage rocket</dd>
1731 |    <dt>table</dt>
1732 |    <dd>nest of tables</dd>
1733 |   </dl>
1734 |  </dd>
1735 | </dl>
1736 |
1737 | 1738 |
:last-of-type pseudo-class
1739 | 1740 |

Same as :nth-last-of-type(1). The 1741 | :last-of-type pseudo-class represents an element that is 1742 | the last sibling of its type in the list of children of its parent 1743 | element.

1744 | 1745 |
1746 |

Example:

1747 |

The following selector represents the last data cell 1748 | td of a table row.

1749 |
tr > td:last-of-type
1750 |
1751 | 1752 |
:only-child pseudo-class
1753 | 1754 |

Represents an element that has a parent element and whose parent 1755 | element has no other element children. Same as 1756 | :first-child:last-child or 1757 | :nth-child(1):nth-last-child(1), but with a lower 1758 | specificity.

1759 | 1760 |
:only-of-type pseudo-class
1761 | 1762 |

Represents an element that has a parent element and whose parent 1763 | element has no other element children with the same element name. Same 1764 | as :first-of-type:last-of-type or 1765 | :nth-of-type(1):nth-last-of-type(1), but with a lower 1766 | specificity.

1767 | 1768 | 1769 |
:empty pseudo-class
1770 | 1771 |

The :empty pseudo-class represents an element that has 1772 | no children at all. In terms of the DOM, only element nodes and text 1773 | nodes (including CDATA nodes and entity references) whose data has a 1774 | non-zero length must be considered as affecting emptiness; comments, 1775 | PIs, and other nodes must not affect whether an element is considered 1776 | empty or not.

1777 | 1778 |
1779 |

Examples:

1780 |

p:empty is a valid representation of the following fragment:

1781 |
<p></p>
1782 |

foo:empty is not a valid representation for the 1783 | following fragments:

1784 |
<foo>bar</foo>
1785 |
<foo><bar>bla</bar></foo>
1786 |
<foo>this is not <bar>:empty</bar></foo>
1787 |
1788 | 1789 |

6.6.6. Blank

1790 | 1791 |

This section intentionally left blank.

1792 | 1793 | 1794 |

6.6.7. The negation pseudo-class

1795 | 1796 |

The negation pseudo-class, :not(X), is a 1797 | functional notation taking a simple 1798 | selector (excluding the negation pseudo-class itself and 1799 | pseudo-elements) as an argument. It represents an element that is not 1800 | represented by the argument. 1801 | 1802 | 1804 | 1805 |

1806 |

Examples:

1807 |

The following CSS selector matches all button 1808 | elements in an HTML document that are not disabled.

1809 |
button:not([DISABLED])
1810 |

The following selector represents all but FOO 1811 | elements.

1812 |
*:not(FOO)
1813 |

The following group of selectors represents all HTML elements 1814 | except links.

1815 |
html|*:not(:link):not(:visited)
1816 |
1817 | 1818 |

Default namespace declarations do not affect the argument of the 1819 | negation pseudo-class unless the argument is a universal selector or a 1820 | type selector.

1821 | 1822 |
1823 |

Examples:

1824 |

Assuming that the default namespace is bound to 1825 | "http://example.com/", the following selector represents all 1826 | elements that are not in that namespace:

1827 |
*|*:not(*)
1828 |

The following CSS selector matches any element being hovered, 1829 | regardless of its namespace. In particular, it is not limited to 1830 | only matching elements in the default namespace that are not being 1831 | hovered, and elements not in the default namespace don't match the 1832 | rule when they are being hovered.

1833 |
*|*:not(:hover)
1834 |
1835 | 1836 |

Note: the :not() pseudo allows 1837 | useless selectors to be written. For instance :not(*|*), 1838 | which represents no element at all, or foo:not(bar), 1839 | which is equivalent to foo but with a higher 1840 | specificity.

1841 | 1842 |

7. Pseudo-elements

1843 | 1844 |

Pseudo-elements create abstractions about the document tree beyond 1845 | those specified by the document language. For instance, document 1846 | languages do not offer mechanisms to access the first letter or first 1847 | line of an element's content. Pseudo-elements allow designers to refer 1848 | to this otherwise inaccessible information. Pseudo-elements may also 1849 | provide designers a way to refer to content that does not exist in the 1850 | source document (e.g., the ::before and 1851 | ::after pseudo-elements give access to generated 1852 | content).

1853 | 1854 |

A pseudo-element is made of two colons (::) followed 1855 | by the name of the pseudo-element.

1856 | 1857 |

This :: notation is introduced by the current document 1858 | in order to establish a discrimination between pseudo-classes and 1859 | pseudo-elements. For compatibility with existing style sheets, user 1860 | agents must also accept the previous one-colon notation for 1861 | pseudo-elements introduced in CSS levels 1 and 2 (namely, 1862 | :first-line, :first-letter, 1863 | :before and :after). This compatibility is 1864 | not allowed for the new pseudo-elements introduced in CSS level 3.

1865 | 1866 |

Only one pseudo-element may appear per selector, and if present it 1867 | must appear after the sequence of simple selectors that represents the 1868 | subjects of the selector. A 1869 | future version of this specification may allow multiple 1870 | pesudo-elements per selector.

1871 | 1872 |

7.1. The ::first-line pseudo-element

1873 | 1874 |

The ::first-line pseudo-element describes the contents 1875 | of the first formatted line of an element. 1876 | 1877 |

1878 |

CSS example:

1879 |
p::first-line { text-transform: uppercase }
1880 |

The above rule means "change the letters of the first line of every 1881 | paragraph to uppercase".

1882 |
1883 | 1884 |

The selector p::first-line does not match any real 1885 | HTML element. It does match a pseudo-element that conforming user 1886 | agents will insert at the beginning of every paragraph.

1887 | 1888 |

Note that the length of the first line depends on a number of 1889 | factors, including the width of the page, the font size, etc. Thus, 1890 | an ordinary HTML paragraph such as:

1891 | 1892 |
1893 | <P>This is a somewhat long HTML 
1894 | paragraph that will be broken into several 
1895 | lines. The first line will be identified
1896 | by a fictional tag sequence. The other lines 
1897 | will be treated as ordinary lines in the 
1898 | paragraph.</P>
1899 | 
1900 | 1901 |

the lines of which happen to be broken as follows: 1902 | 1903 |

1904 | THIS IS A SOMEWHAT LONG HTML PARAGRAPH THAT
1905 | will be broken into several lines. The first
1906 | line will be identified by a fictional tag 
1907 | sequence. The other lines will be treated as 
1908 | ordinary lines in the paragraph.
1909 | 
1910 | 1911 |

This paragraph might be "rewritten" by user agents to include the 1912 | fictional tag sequence for ::first-line. This 1913 | fictional tag sequence helps to show how properties are inherited.

1914 | 1915 |
1916 | <P><P::first-line> This is a somewhat long HTML 
1917 | paragraph that </P::first-line> will be broken into several
1918 | lines. The first line will be identified 
1919 | by a fictional tag sequence. The other lines 
1920 | will be treated as ordinary lines in the 
1921 | paragraph.</P>
1922 | 
1923 | 1924 |

If a pseudo-element breaks up a real element, the desired effect 1925 | can often be described by a fictional tag sequence that closes and 1926 | then re-opens the element. Thus, if we mark up the previous paragraph 1927 | with a span element:

1928 | 1929 |
1930 | <P><SPAN class="test"> This is a somewhat long HTML
1931 | paragraph that will be broken into several
1932 | lines.</SPAN> The first line will be identified
1933 | by a fictional tag sequence. The other lines 
1934 | will be treated as ordinary lines in the 
1935 | paragraph.</P>
1936 | 
1937 | 1938 |

the user agent could simulate start and end tags for 1939 | span when inserting the fictional tag sequence for 1940 | ::first-line. 1941 | 1942 |

1943 | <P><P::first-line><SPAN class="test"> This is a
1944 | somewhat long HTML
1945 | paragraph that will </SPAN></P::first-line><SPAN class="test"> be
1946 | broken into several
1947 | lines.</SPAN> The first line will be identified
1948 | by a fictional tag sequence. The other lines
1949 | will be treated as ordinary lines in the 
1950 | paragraph.</P>
1951 | 
1952 | 1953 |

In CSS, the ::first-line pseudo-element can only be 1954 | attached to a block-level element, an inline-block, a table-caption, 1955 | or a table-cell.

1956 | 1957 |

The "first formatted line" of an 1958 | element may occur inside a 1959 | block-level descendant in the same flow (i.e., a block-level 1960 | descendant that is not positioned and not a float). E.g., the first 1961 | line of the div in <DIV><P>This 1962 | line...</P></DIV> is the first line of the p (assuming 1963 | that both p and div are block-level). 1964 | 1965 |

The first line of a table-cell or inline-block cannot be the first 1966 | formatted line of an ancestor element. Thus, in <DIV><P 1967 | STYLE="display: inline-block">Hello<BR>Goodbye</P> 1968 | etcetera</DIV> the first formatted line of the 1969 | div is not the line "Hello". 1970 | 1971 |

Note that the first line of the p in this 1972 | fragment: <p><br>First... doesn't contain any 1973 | letters (assuming the default style for br in HTML 1974 | 4). The word "First" is not on the first formatted line. 1975 | 1976 |

A UA should act as if the fictional start tags of the 1977 | ::first-line pseudo-elements were nested just inside the 1978 | innermost enclosing block-level element. (Since CSS1 and CSS2 were 1979 | silent on this case, authors should not rely on this behavior.) Here 1980 | is an example. The fictional tag sequence for

1981 | 1982 |
1983 | <DIV>
1984 |   <P>First paragraph</P>
1985 |   <P>Second paragraph</P>
1986 | </DIV>
1987 | 
1988 | 1989 |

is

1990 | 1991 |
1992 | <DIV>
1993 |   <P><DIV::first-line><P::first-line>First paragraph</P::first-line></DIV::first-line></P>
1994 |   <P><P::first-line>Second paragraph</P::first-line></P>
1995 | </DIV>
1996 | 
1997 | 1998 |

The ::first-line pseudo-element is similar to an 1999 | inline-level element, but with certain restrictions. In CSS, the 2000 | following properties apply to a ::first-line 2001 | pseudo-element: font properties, color property, background 2002 | properties, 'word-spacing', 'letter-spacing', 'text-decoration', 2003 | 'vertical-align', 'text-transform', 'line-height'. UAs may apply other 2004 | properties as well.

2005 | 2006 | 2007 |

7.2. The ::first-letter pseudo-element

2008 | 2009 |

The ::first-letter pseudo-element represents the first 2010 | letter of the first line of a block, if it is not preceded by any 2011 | other content (such as images or inline tables) on its line. The 2012 | ::first-letter pseudo-element may be used for "initial caps" and "drop 2013 | caps", which are common typographical effects. This type of initial 2014 | letter is similar to an inline-level element if its 'float' property 2015 | is 'none'; otherwise, it is similar to a floated element.

2016 | 2017 |

In CSS, these are the properties that apply to ::first-letter 2018 | pseudo-elements: font properties, 'text-decoration', 'text-transform', 2019 | 'letter-spacing', 'word-spacing' (when appropriate), 'line-height', 2020 | 'float', 'vertical-align' (only if 'float' is 'none'), margin 2021 | properties, padding properties, border properties, color property, 2022 | background properties. UAs may apply other properties as well. To 2023 | allow UAs to render a typographically correct drop cap or initial cap, 2024 | the UA may choose a line-height, width and height based on the shape 2025 | of the letter, unlike for normal elements.

2026 | 2027 |
2028 |

Example:

2029 |

This example shows a possible rendering of an initial cap. Note 2030 | that the 'line-height' that is inherited by the ::first-letter 2031 | pseudo-element is 1.1, but the UA in this example has computed the 2032 | height of the first letter differently, so that it doesn't cause any 2033 | unnecessary space between the first two lines. Also note that the 2034 | fictional start tag of the first letter is inside the span, and thus 2035 | the font weight of the first letter is normal, not bold as the span: 2036 |

2037 | p { line-height: 1.1 }
2038 | p::first-letter { font-size: 3em; font-weight: normal }
2039 | span { font-weight: bold }
2040 | ...
2041 | <p><span>Het hemelsche</span> gerecht heeft zich ten lange lesten<br>
2042 | Erbarremt over my en mijn benaeuwde vesten<br>
2043 | En arme burgery, en op mijn volcx gebed<br>
2044 | En dagelix geschrey de bange stad ontzet.
2045 | 
2046 |
2047 |

Image illustrating the ::first-letter pseudo-element 2048 |

2049 |
2050 | 2051 |
2052 |

The following CSS will make a drop cap initial letter span about two lines:

2053 | 2054 |
2055 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
2056 | <HTML>
2057 |  <HEAD>
2058 |   <TITLE>Drop cap initial letter</TITLE>
2059 |   <STYLE type="text/css">
2060 |    P               { font-size: 12pt; line-height: 1.2 }
2061 |    P::first-letter { font-size: 200%; font-weight: bold; float: left }
2062 |    SPAN            { text-transform: uppercase }
2063 |   </STYLE>
2064 |  </HEAD>
2065 |  <BODY>
2066 |   <P><SPAN>The first</SPAN> few words of an article
2067 |     in The Economist.</P>
2068 |  </BODY>
2069 | </HTML>
2070 | 
2071 | 2072 |

This example might be formatted as follows:

2073 | 2074 |
2075 |

Image illustrating the combined effect of the ::first-letter and ::first-line pseudo-elements

2076 |
2077 | 2078 |

The fictional tag sequence is:

2080 | 2081 |
2082 | <P>
2083 | <SPAN>
2084 | <P::first-letter>
2085 | T
2086 | </P::first-letter>he first
2087 | </SPAN> 
2088 | few words of an article in the Economist.
2089 | </P>
2090 | 
2091 | 2092 |

Note that the ::first-letter pseudo-element tags abut 2093 | the content (i.e., the initial character), while the ::first-line 2094 | pseudo-element start tag is inserted right after the start tag of the 2095 | block element.

2096 | 2097 |

In order to achieve traditional drop caps formatting, user agents 2098 | may approximate font sizes, for example to align baselines. Also, the 2099 | glyph outline may be taken into account when formatting.

2100 | 2101 |

Punctuation (i.e, characters defined in Unicode in the "open" (Ps), 2102 | "close" (Pe), "initial" (Pi). "final" (Pf) and "other" (Po) 2103 | punctuation classes), that precedes or follows the first letter should 2104 | be included. [UNICODE]

2105 | 2106 |
2107 |

Quotes that precede the
2108 | first letter should be included.

2109 |
2110 | 2111 |

The ::first-letter also applies if the first letter is 2112 | in fact a digit, e.g., the "6" in "67 million dollars is a lot of 2113 | money."

2114 | 2115 |

In CSS, the ::first-letter pseudo-element applies to 2116 | block, list-item, table-cell, table-caption, and inline-block 2117 | elements. A future version of this specification 2118 | may allow this pesudo-element to apply to more element 2119 | types.

2120 | 2121 |

The ::first-letter pseudo-element can be used with all 2122 | such elements that contain text, or that have a descendant in the same 2123 | flow that contains text. A UA should act as if the fictional start tag 2124 | of the ::first-letter pseudo-element is just before the first text of 2125 | the element, even if that first text is in a descendant.

2126 | 2127 |
2128 |

Example:

2129 |

The fictional tag sequence for this HTMLfragment: 2130 |

<div>
2131 | <p>The first text.
2132 |

is: 2133 |

<div>
2134 | <p><div::first-letter><p::first-letter>T</...></...>he first text.
2135 |
2136 | 2137 |

The first letter of a table-cell or inline-block cannot be the 2138 | first letter of an ancestor element. Thus, in <DIV><P 2139 | STYLE="display: inline-block">Hello<BR>Goodbye</P> 2140 | etcetera</DIV> the first letter of the div is not the 2141 | letter "H". In fact, the div doesn't have a first letter. 2142 | 2143 |

The first letter must occur on the first formatted line. For example, in 2145 | this fragment: <p><br>First... the first line 2146 | doesn't contain any letters and ::first-letter doesn't 2147 | match anything (assuming the default style for br in HTML 2148 | 4). In particular, it does not match the "F" of "First." 2149 | 2150 |

In CSS, if an element is a list item ('display: list-item'), the 2151 | ::first-letter applies to the first letter in the 2152 | principal box after the marker. UAs may ignore 2153 | ::first-letter on list items with 'list-style-position: 2154 | inside'. If an element has ::before or 2155 | ::after content, the ::first-letter applies 2156 | to the first letter of the element including that content. 2157 | 2158 |

2159 |

Example:

2160 |

After the rule 'p::before {content: "Note: "}', the selector 2161 | 'p::first-letter' matches the "N" of "Note".

2162 |
2163 | 2164 |

Some languages may have specific rules about how to treat certain 2165 | letter combinations. In Dutch, for example, if the letter combination 2166 | "ij" appears at the beginning of a word, both letters should be 2167 | considered within the ::first-letter pseudo-element. 2168 | 2169 |

If the letters that would form the ::first-letter are not in the 2170 | same element, such as "'T" in <p>'<em>T..., the UA 2171 | may create a ::first-letter pseudo-element from one of the elements, 2172 | both elements, or simply not create a pseudo-element.

2173 | 2174 |

Similarly, if the first letter(s) of the block are not at the start 2175 | of the line (for example due to bidirectional reordering), then the UA 2176 | need not create the pseudo-element(s). 2177 | 2178 |

2179 |

Example:

2180 |

The following example illustrates 2181 | how overlapping pseudo-elements may interact. The first letter of 2182 | each P element will be green with a font size of '24pt'. The rest of 2183 | the first formatted line will be 'blue' while the rest of the 2184 | paragraph will be 'red'.

2185 | 2186 |
p { color: red; font-size: 12pt }
2187 | p::first-letter { color: green; font-size: 200% }
2188 | p::first-line { color: blue }
2189 | 
2190 | <P>Some text that ends up on two lines</P>
2191 | 2192 |

Assuming that a line break will occur before the word "ends", the 2193 | fictional tag 2194 | sequence for this fragment might be:

2195 | 2196 |
<P>
2197 | <P::first-line>
2198 | <P::first-letter> 
2199 | S 
2200 | </P::first-letter>ome text that 
2201 | </P::first-line> 
2202 | ends up on two lines 
2203 | </P>
2204 | 2205 |

Note that the ::first-letter element is inside the ::first-line 2206 | element. Properties set on ::first-line are inherited by 2207 | ::first-letter, but are overridden if the same property is set on 2208 | ::first-letter.

2209 |
2210 | 2211 | 2212 |

7.3. The ::selection pseudo-element

2213 | 2214 |

The ::selection pseudo-element applies to the portion 2215 | of a document that has been highlighted by the user. This also 2216 | applies, for example, to selected text within an editable text 2217 | field. This pseudo-element should not be confused with the :checked pseudo-class (which used to be 2219 | named :selected) 2220 | 2221 |

Although the ::selection pseudo-element is dynamic in 2222 | nature, and is altered by user action, it is reasonable to expect that 2223 | when a UA re-renders to a static medium (such as a printed page, see 2224 | [CSS21]) which was originally rendered to a 2225 | dynamic medium (like screen), the UA may wish to transfer the current 2226 | ::selection state to that other medium, and have all the 2227 | appropriate formatting and rendering take effect as well. This is not 2228 | required — UAs may omit the ::selection 2229 | pseudo-element for static media. 2230 | 2231 |

These are the CSS properties that apply to ::selection 2232 | pseudo-elements: color, background, cursor (optional), outline 2233 | (optional). The computed value of the 'background-image' property on 2234 | ::selection may be ignored. 2235 | 2236 | 2237 |

7.4. The ::before and ::after pseudo-elements

2238 | 2239 |

The ::before and ::after pseudo-elements 2240 | can be used to describe generated content before or after an element's 2241 | content. They are explained in CSS 2.1 [CSS21].

2243 | 2244 |

When the ::first-letter and ::first-line 2245 | pseudo-elements are combined with ::before and 2246 | ::after, they apply to the first letter or line of the 2247 | element including the inserted text.

2248 | 2249 |

8. Combinators

2250 | 2251 |

8.1. Descendant combinator

2252 | 2253 |

At times, authors may want selectors to describe an element that is 2254 | the descendant of another element in the document tree (e.g., "an 2255 | EM element that is contained within an H1 2256 | element"). Descendant combinators express such a relationship. A 2257 | descendant combinator is white space that 2258 | separates two sequences of simple selectors. A selector of the form 2259 | "A B" represents an element B that is an 2260 | arbitrary descendant of some ancestor element A. 2261 | 2262 |

2263 |

Examples:

2264 |

For example, consider the following selector:

2265 |
h1 em
2266 |

It represents an em element being the descendant of 2267 | an h1 element. It is a correct and valid, but partial, 2268 | description of the following fragment:

2269 |
<h1>This <span class="myclass">headline
2270 | is <em>very</em> important</span></h1>
2271 |

The following selector:

2272 |
div * p
2273 |

represents a p element that is a grandchild or later 2274 | descendant of a div element. Note the whitespace on 2275 | either side of the "*" is not part of the universal selector; the 2276 | whitespace is a combinator indicating that the DIV must be the 2277 | ancestor of some element, and that that element must be an ancestor 2278 | of the P.

2279 |

The following selector, which combines descendant combinators and 2280 | attribute selectors, represents an 2281 | element that (1) has the href attribute set and (2) is 2282 | inside a p that is itself inside a div:

2283 |
div p *[href]
2284 |
2285 | 2286 |

8.2. Child combinators

2287 | 2288 |

A child combinator describes a childhood relationship 2289 | between two elements. A child combinator is made of the 2290 | "greater-than sign" (>) character and 2291 | separates two sequences of simple selectors. 2292 | 2293 | 2294 |

2295 |

Examples:

2296 |

The following selector represents a p element that is 2297 | child of body:

2298 |
body > p
2299 |

The following example combines descendant combinators and child 2300 | combinators.

2301 |
div ol>li p
2302 |

It represents a p element that is a descendant of an 2303 | li element; the li element must be the 2304 | child of an ol element; the ol element must 2305 | be a descendant of a div. Notice that the optional white 2306 | space around the ">" combinator has been left out.

2307 |
2308 | 2309 |

For information on selecting the first child of an element, please 2310 | see the section on the :first-child pseudo-class 2312 | above.

2313 | 2314 |

8.3. Sibling combinators

2315 | 2316 |

There are two different sibling combinators: the adjacent sibling 2317 | combinator and the general sibling combinator. In both cases, 2318 | non-element nodes (e.g. text between elements) are ignored when 2319 | considering adjacency of elements.

2320 | 2321 |

8.3.1. Adjacent sibling combinator

2322 | 2323 |

The adjacent sibling combinator is made of the "plus 2324 | sign" (U+002B, +) character that separates two 2325 | sequences of simple selectors. The elements represented by the two 2326 | sequences share the same parent in the document tree and the element 2327 | represented by the first sequence immediately precedes the element 2328 | represented by the second one.

2329 | 2330 |
2331 |

Examples:

2332 |

The following selector represents a p element 2333 | immediately following a math element:

2334 |
math + p
2335 |

The following selector is conceptually similar to the one in the 2336 | previous example, except that it adds an attribute selector — it 2337 | adds a constraint to the h1 element, that it must have 2338 | class="opener":

2339 |
h1.opener + h2
2340 |
2341 | 2342 | 2343 |

8.3.2. General sibling combinator

2344 | 2345 |

The general sibling combinator is made of the "tilde" 2346 | (U+007E, ~) character that separates two sequences of 2347 | simple selectors. The elements represented by the two sequences share 2348 | the same parent in the document tree and the element represented by 2349 | the first sequence precedes (not necessarily immediately) the element 2350 | represented by the second one.

2351 | 2352 |
2353 |

Example:

2354 |
h1 ~ pre
2355 |

represents a pre element following an h1. It 2356 | is a correct and valid, but partial, description of:

2357 |
<h1>Definition of the function a</h1>
2358 | <p>Function a(x) has to be applied to all figures in the table.</p>
2359 | <pre>function a(x) = 12x/13.5</pre>
2360 |
2361 | 2362 |

9. Calculating a selector's specificity

2363 | 2364 |

A selector's specificity is calculated as follows:

2365 | 2366 | 2372 | 2373 |

Selectors inside the negation pseudo-class 2374 | are counted like any other, but the negation itself does not count as 2375 | a pseudo-class.

2376 | 2377 |

Concatenating the three numbers a-b-c (in a number system with a 2378 | large base) gives the specificity.

2379 | 2380 |
2381 |

Examples:

2382 |
*               /* a=0 b=0 c=0 -> specificity =   0 */
2383 | LI              /* a=0 b=0 c=1 -> specificity =   1 */
2384 | UL LI           /* a=0 b=0 c=2 -> specificity =   2 */
2385 | UL OL+LI        /* a=0 b=0 c=3 -> specificity =   3 */
2386 | H1 + *[REL=up]  /* a=0 b=1 c=1 -> specificity =  11 */
2387 | UL OL LI.red    /* a=0 b=1 c=3 -> specificity =  13 */
2388 | LI.red.level    /* a=0 b=2 c=1 -> specificity =  21 */
2389 | #x34y           /* a=1 b=0 c=0 -> specificity = 100 */
2390 | #s12:not(FOO)   /* a=1 b=0 c=1 -> specificity = 101 */
2391 | 
2392 |
2393 | 2394 |

Note: the specificity of the styles 2395 | specified in an HTML style attribute is described in CSS 2396 | 2.1. [CSS21].

2397 | 2398 |

10. The grammar of Selectors

2399 | 2400 |

10.1. Grammar

2401 | 2402 |

The grammar below defines the syntax of Selectors. It is globally 2403 | LL(1) and can be locally LL(2) (but note that most UA's should not use 2404 | it directly, since it doesn't express the parsing conventions). The 2405 | format of the productions is optimized for human consumption and some 2406 | shorthand notations beyond Yacc (see [YACC]) 2407 | are used:

2408 | 2409 | 2416 | 2417 |

The productions are:

2418 | 2419 |
selectors_group
2420 |   : selector [ COMMA S* selector ]*
2421 |   ;
2422 | 
2423 | selector
2424 |   : simple_selector_sequence [ combinator simple_selector_sequence ]*
2425 |   ;
2426 | 
2427 | combinator
2428 |   /* combinators can be surrounded by white space */
2429 |   : PLUS S* | GREATER S* | TILDE S* | S+
2430 |   ;
2431 | 
2432 | simple_selector_sequence
2433 |   : [ type_selector | universal ]
2434 |     [ HASH | class | attrib | pseudo | negation ]*
2435 |   | [ HASH | class | attrib | pseudo | negation ]+
2436 |   ;
2437 | 
2438 | type_selector
2439 |   : [ namespace_prefix ]? element_name
2440 |   ;
2441 | 
2442 | namespace_prefix
2443 |   : [ IDENT | '*' ]? '|'
2444 |   ;
2445 | 
2446 | element_name
2447 |   : IDENT
2448 |   ;
2449 | 
2450 | universal
2451 |   : [ namespace_prefix ]? '*'
2452 |   ;
2453 | 
2454 | class
2455 |   : '.' IDENT
2456 |   ;
2457 | 
2458 | attrib
2459 |   : '[' S* [ namespace_prefix ]? IDENT S*
2460 |         [ [ PREFIXMATCH |
2461 |             SUFFIXMATCH |
2462 |             SUBSTRINGMATCH |
2463 |             '=' |
2464 |             INCLUDES |
2465 |             DASHMATCH ] S* [ IDENT | STRING ] S*
2466 |         ]? ']'
2467 |   ;
2468 | 
2469 | pseudo
2470 |   /* '::' starts a pseudo-element, ':' a pseudo-class */
2471 |   /* Exceptions: :first-line, :first-letter, :before and :after. */
2472 |   /* Note that pseudo-elements are restricted to one per selector and */
2473 |   /* occur only in the last simple_selector_sequence. */
2474 |   : ':' ':'? [ IDENT | functional_pseudo ]
2475 |   ;
2476 | 
2477 | functional_pseudo
2478 |   : FUNCTION S* expression ')'
2479 |   ;
2480 | 
2481 | expression
2482 |   /* In CSS3, the expressions are identifiers, strings, */
2483 |   /* or of the form "an+b" */
2484 |   : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
2485 |   ;
2486 | 
2487 | negation
2488 |   : NOT S* negation_arg S* ')'
2489 |   ;
2490 | 
2491 | negation_arg
2492 |   : type_selector | universal | HASH | class | attrib | pseudo
2493 |   ;
2494 | 2495 | 2496 |

10.2. Lexical scanner

2497 | 2498 |

The following is the tokenizer, written in Flex (see 2499 | [FLEX]) notation. The tokenizer is 2500 | case-insensitive.

2501 | 2502 |

The two occurrences of "\377" represent the highest character 2503 | number that current versions of Flex can deal with (decimal 255). They 2504 | should be read as "\4177777" (decimal 1114111), which is the highest 2505 | possible code point in Unicode/ISO-10646. [UNICODE]

2507 | 2508 |
%option case-insensitive
2509 | 
2510 | ident     [-]?{nmstart}{nmchar}*
2511 | name      {nmchar}+
2512 | nmstart   [_a-z]|{nonascii}|{escape}
2513 | nonascii  [^\0-\177]
2514 | unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
2515 | escape    {unicode}|\\[^\n\r\f0-9a-f]
2516 | nmchar    [_a-z0-9-]|{nonascii}|{escape}
2517 | num       [0-9]+|[0-9]*\.[0-9]+
2518 | string    {string1}|{string2}
2519 | string1   \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
2520 | string2   \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
2521 | invalid   {invalid1}|{invalid2}
2522 | invalid1  \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
2523 | invalid2  \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
2524 | nl        \n|\r\n|\r|\f
2525 | w         [ \t\r\n\f]*
2526 | 
2527 | %%
2528 | 
2529 | [ \t\r\n\f]+     return S;
2530 | 
2531 | "~="             return INCLUDES;
2532 | "|="             return DASHMATCH;
2533 | "^="             return PREFIXMATCH;
2534 | "$="             return SUFFIXMATCH;
2535 | "*="             return SUBSTRINGMATCH;
2536 | {ident}          return IDENT;
2537 | {string}         return STRING;
2538 | {ident}"("       return FUNCTION;
2539 | {num}            return NUMBER;
2540 | "#"{name}        return HASH;
2541 | {w}"+"           return PLUS;
2542 | {w}">"           return GREATER;
2543 | {w}","           return COMMA;
2544 | {w}"~"           return TILDE;
2545 | ":not("          return NOT;
2546 | @{ident}         return ATKEYWORD;
2547 | {invalid}        return INVALID;
2548 | {num}%           return PERCENTAGE;
2549 | {num}{ident}     return DIMENSION;
2550 | "<!--"           return CDO;
2551 | "-->"            return CDC;
2552 | 
2553 | "url("{w}{string}{w}")"                           return URI;
2554 | "url("{w}([!#$%&*-~]|{nonascii}|{escape})*{w}")"  return URI;
2555 | U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?                return UNICODE_RANGE;
2556 | 
2557 | \/\*[^*]*\*+([^/*][^*]*\*+)*\/                    /* ignore comments */
2558 | 
2559 | .                return *yytext;
2560 | 2561 | 2562 | 2563 |

11. Namespaces and down-level clients

2564 | 2565 |

An important issue is the interaction of CSS selectors with XML 2566 | documents in web clients that were produced prior to this 2567 | document. Unfortunately, due to the fact that namespaces must be 2568 | matched based on the URI which identifies the namespace, not the 2569 | namespace prefix, some mechanism is required to identify namespaces in 2570 | CSS by their URI as well. Without such a mechanism, it is impossible 2571 | to construct a CSS style sheet which will properly match selectors in 2572 | all cases against a random set of XML documents. However, given 2573 | complete knowledge of the XML document to which a style sheet is to be 2574 | applied, and a limited use of namespaces within the XML document, it 2575 | is possible to construct a style sheet in which selectors would match 2576 | elements and attributes correctly.

2577 | 2578 |

It should be noted that a down-level CSS client will (if it 2579 | properly conforms to CSS forward compatible parsing rules) ignore all 2580 | @namespace at-rules, as well as all style rules that make 2581 | use of namespace qualified element type or attribute selectors. The 2582 | syntax of delimiting namespace prefixes in CSS was deliberately chosen 2583 | so that down-level CSS clients would ignore the style rules rather 2584 | than possibly match them incorrectly.

2585 | 2586 |

The use of default namespaces in CSS makes it possible to write 2587 | element type selectors that will function in both namespace aware CSS 2588 | clients as well as down-level clients. It should be noted that 2589 | down-level clients may incorrectly match selectors against XML 2590 | elements in other namespaces.

2591 | 2592 |

The following are scenarios and examples in which it is possible to 2593 | construct style sheets which would function properly in web clients 2594 | that do not implement this proposal.

2595 | 2596 |
    2597 |
  1. 2598 | 2599 |

    The XML document does not use namespaces.

    2600 | 2601 | 2617 | 2618 |
  2. 2619 | 2620 |
  3. 2621 | 2622 |

    The XML document defines a single, default namespace used 2623 | throughout the document. No namespace prefixes are used in element 2624 | names.

    2625 | 2626 | 2634 | 2635 |
  4. 2636 | 2637 |
  5. 2638 | 2639 |

    The XML document does not use a default namespace, all 2640 | namespace prefixes used are known to the style sheet author, and 2641 | there is a direct mapping between namespace prefixes and namespace 2642 | URIs. (A given prefix may only be mapped to one namespace URI 2643 | throughout the XML document; there may be multiple prefixes mapped 2644 | to the same URI).

    2645 | 2646 | 2669 | 2670 |
  6. 2671 | 2672 |
2673 | 2674 |

In other scenarios: when the namespace prefixes used in the XML are 2675 | not known in advance by the style sheet author; or a combination of 2676 | elements with no namespace are used in conjunction with elements using 2677 | a default namespace; or the same namespace prefix is mapped to 2678 | different namespace URIs within the same document, or in 2679 | different documents; it is impossible to construct a CSS style sheet 2680 | that will function properly against all elements in those documents, 2681 | unless, the style sheet is written using a namespace URI syntax (as 2682 | outlined in this document or similar) and the document is processed by 2683 | a CSS and XML namespace aware client.

2684 | 2685 |

12. Profiles

2686 | 2687 |

Each specification using Selectors must define the subset of W3C 2688 | Selectors it allows and excludes, and describe the local meaning of 2689 | all the components of that subset.

2690 | 2691 |

Non normative examples: 2692 | 2693 |

2694 | 2695 | 2696 | 2697 | 2698 | 2699 | 2700 | 2701 | 2702 | 2703 | 2706 | 2707 | 2708 | 2718 | 2719 | 2720 |
Selectors profile
SpecificationCSS level 1
Acceptstype selectors
class selectors
ID selectors
:link, 2704 | :visited and :active pseudo-classes
descendant combinator 2705 |
::first-line and ::first-letter pseudo-elements
Excludes 2709 | 2710 |

universal selector
attribute selectors
:hover and :focus 2711 | pseudo-classes
:target pseudo-class
:lang() pseudo-class
all UI 2712 | element states pseudo-classes
all structural 2713 | pseudo-classes
negation pseudo-class
all 2714 | UI element fragments pseudo-elements
::before and ::after 2715 | pseudo-elements
child combinators
sibling combinators 2716 | 2717 |

namespaces

Extra constraintsonly one class selector allowed per sequence of simple 2721 | selectors


2722 | 2723 | 2724 | 2725 | 2726 | 2727 | 2728 | 2729 | 2730 | 2731 | 2737 | 2738 | 2739 | 2748 | 2749 | 2750 |
Selectors profile
SpecificationCSS level 2
Acceptstype selectors
universal selector
attribute presence and 2732 | values selectors
class selectors
ID selectors
:link, :visited, 2733 | :active, :hover, :focus, :lang() and :first-child pseudo-classes 2734 |
descendant combinator
child combinator
adjacent sibling 2735 | combinator
::first-line and ::first-letter pseudo-elements
::before 2736 | and ::after pseudo-elements
Excludes 2740 | 2741 |

content selectors
substring matching attribute 2742 | selectors
:target pseudo-classes
all UI element 2743 | states pseudo-classes
all structural pseudo-classes other 2744 | than :first-child
negation pseudo-class
all UI element 2745 | fragments pseudo-elements
general sibling combinators 2746 | 2747 |

namespaces

Extra constraintsmore than one class selector per sequence of simple selectors (CSS1 2751 | constraint) allowed
2752 | 2753 |

In CSS, selectors express pattern matching rules that determine which style 2754 | rules apply to elements in the document tree. 2755 | 2756 |

The following selector (CSS level 2) will match all anchors a 2757 | with attribute name set inside a section 1 header h1: 2758 |

h1 a[name]
2759 | 2760 |

All CSS declarations attached to such a selector are applied to elements 2761 | matching it.

2762 | 2763 |
2764 | 2765 | 2766 | 2767 | 2768 | 2769 | 2770 | 2771 | 2772 | 2773 | 2774 | 2781 | 2782 | 2783 | 2784 | 2785 | 2786 |
Selectors profile
SpecificationSTTS 3
Accepts 2775 | 2776 |

type selectors
universal selectors
attribute selectors
class 2777 | selectors
ID selectors
all structural pseudo-classes
2778 | all combinators 2779 | 2780 |

namespaces

Excludesnon-accepted pseudo-classes
pseudo-elements
Extra constraintssome selectors and combinators are not allowed in fragment 2787 | descriptions on the right side of STTS declarations.
2788 | 2789 |

Selectors can be used in STTS 3 in two different 2790 | manners: 2791 |

    2792 |
  1. a selection mechanism equivalent to CSS selection mechanism: declarations 2793 | attached to a given selector are applied to elements matching that selector, 2794 |
  2. fragment descriptions that appear on the right side of declarations. 2795 |
2796 | 2797 |

13. Conformance and requirements

2798 | 2799 |

This section defines conformance with the present specification only. 2800 | 2801 |

The inability of a user agent to implement part of this specification due to 2802 | the limitations of a particular device (e.g., non interactive user agents will 2803 | probably not implement dynamic pseudo-classes because they make no sense without 2804 | interactivity) does not imply non-conformance. 2805 | 2806 |

All specifications reusing Selectors must contain a Profile listing the 2808 | subset of Selectors it accepts or excludes, and describing the constraints 2809 | it adds to the current specification. 2810 | 2811 |

Invalidity is caused by a parsing error, e.g. an unrecognized token or a token 2812 | which is not allowed at the current parsing point. 2813 | 2814 |

User agents must observe the rules for handling parsing errors: 2815 |

2821 | 2822 |

Specifications reusing Selectors must define how to handle parsing 2823 | errors. (In the case of CSS, the entire rule in which the selector is 2824 | used is dropped.)

2825 | 2826 | 2835 | 2836 |

14. Tests

2837 | 2838 |

This specification has a test 2840 | suite allowing user agents to verify their basic conformance to 2841 | the specification. This test suite does not pretend to be exhaustive 2842 | and does not cover all possible combined cases of Selectors.

2843 | 2844 |

15. Acknowledgements

2845 | 2846 |

The CSS working group would like to thank everyone who has sent 2847 | comments on this specification over the years.

2848 | 2849 |

The working group would like to extend special thanks to Donna 2850 | McManus, Justin Baker, Joel Sklar, and Molly Ives Brower who perfermed 2851 | the final editorial review.

2852 | 2853 |

16. References

2854 | 2855 |
2856 | 2857 |
[CSS1] 2858 |
Bert Bos, Håkon Wium Lie; "Cascading Style Sheets, level 1", W3C Recommendation, 17 Dec 1996, revised 11 Jan 1999 2859 |
(http://www.w3.org/TR/REC-CSS1) 2860 | 2861 |
[CSS21] 2862 |
Bert Bos, Tantek Çelik, Ian Hickson, Håkon Wium Lie, editors; "Cascading Style Sheets, level 2 revision 1", W3C Working Draft, 13 June 2005 2863 |
(http://www.w3.org/TR/CSS21) 2864 | 2865 |
[CWWW] 2866 |
Martin J. Dürst, François Yergeau, Misha Wolf, Asmus Freytag, Tex Texin, editors; "Character Model for the World Wide Web", W3C Recommendation, 15 February 2005 2867 |
(http://www.w3.org/TR/charmod/) 2868 | 2869 |
[FLEX] 2870 |
"Flex: The Lexical Scanner Generator", Version 2.3.7, ISBN 1882114213 2871 | 2872 |
[HTML4] 2873 |
Dave Ragget, Arnaud Le Hors, Ian Jacobs, editors; "HTML 4.01 Specification", W3C Recommendation, 24 December 1999 2874 |
(http://www.w3.org/TR/html4/) 2875 | 2876 |
[MATH] 2877 |
Patrick Ion, Robert Miner, editors; "Mathematical Markup Language (MathML) 1.01", W3C Recommendation, revision of 7 July 1999 2878 |
(http://www.w3.org/TR/REC-MathML/) 2879 | 2880 |
[RFC3066] 2881 |
H. Alvestrand; "Tags for the Identification of Languages", Request for Comments 3066, January 2001 2882 |
(http://www.ietf.org/rfc/rfc3066.txt) 2883 | 2884 |
[STTS] 2885 |
Daniel Glazman; "Simple Tree Transformation Sheets 3", Electricité de France, submission to the W3C, 11 November 1998 2886 |
(http://www.w3.org/TR/NOTE-STTS3) 2887 | 2888 |
[SVG] 2889 |
Jon Ferraiolo, 藤沢 淳, Dean Jackson, editors; "Scalable Vector Graphics (SVG) 1.1 Specification", W3C Recommendation, 14 January 2003 2890 |
(http://www.w3.org/TR/SVG/) 2891 | 2892 |
[UNICODE]
2893 |
The Unicode Standard, Version 4.1, The Unicode Consortium. Boston, MA, Addison-Wesley, March 2005. ISBN 0-321-18578-1, as amended by Unicode 4.0.1 and Unicode 4.1.0. 2895 |
(http://www.unicode.org/versions/)
2896 | 2897 |
[XML10] 2898 |
Tim Bray, Jean Paoli, C. M. Sperberg-McQueen, Eve Maler, François Yergeau, editors; "Extensible Markup Language (XML) 1.0 (Third Edition)", W3C Recommendation, 4 February 2004 2899 |
(http://www.w3.org/TR/REC-xml/) 2900 | 2901 |
[XMLNAMES] 2902 |
Tim Bray, Dave Hollander, Andrew Layman, editors; "Namespaces in XML", W3C Recommendation, 14 January 1999 2903 |
(http://www.w3.org/TR/REC-xml-names/) 2904 | 2905 |
[YACC] 2906 |
S. C. Johnson; "YACC — Yet another compiler compiler", Technical Report, Murray Hill, 1975 2907 | 2908 |
2909 | 2910 | --------------------------------------------------------------------------------